DataONEorg / rdataone

R package for reading and writing data at DataONE data repositories
http://doi.org/10.5063/F1M61H5X
36 stars 19 forks source link

getDataPackage produces a hard to decipher error message #279

Open amoeba opened 3 years ago

amoeba commented 3 years ago

In https://github.com/DataONEorg/rdataone/issues/203, @earnaud reported this situation:

> library(dataone)
> d1c <- D1Client("STAGING", "urn:node:mnTestARCTIC")
> dp <- getDataPackage(d1c, "urn:uuid:5374d070-e834-4690-987f-d0d0789360b5")
Error in (function (cl, name, valueClass)  : 
  assignment of an object of class “NULL” is not valid for @‘resmapId’ in an object of class “DataPackage”; is(value, "character") is not TRUE

At the time of writing this issue, the Solr doc for urn:uuid:5374d070-e834-4690-987f-d0d0789360b5 doesn't have a resourceMap field.

We should probably:

earnaud commented 3 years ago

I would add how was uploaded the data object matching urn:uuid:5374d070-e834-4690-987f-d0d0789360b5 : I based myself on the recommended {dataone} method and wrote the uploadDP() function:

function(
  # connection arguments
  endpoint, # data.frame of 1 row and 4 columns: name, mn, cn, description
  token, # metacat token matching the selected cn and mn
  # content
  eml, # eml file to upload
  data, # data file(s) to upload
  scripts = NULL, # script file(s) to upload
  # options
  use.doi = FALSE # WIP
) {
  options(dataone_test_token = token)
  options(dataone_token = token)

  # Set variables ----
  # devmsg: function combining message() and sprintf()
  devmsg(tag = "upload", "* Sending data package to %s", endpoint$name)

  d1c <- dataone::D1Client(
    endpoint |>
      dplyr::select(cn) |>
      as.character(),
    endpoint |>
      dplyr::select(mn) |>
      as.character()
  )

  # unused
  if (use.doi) {
    doi <- dataone::generateIdentifier(d1c@mn, "DOI")
  } # TODO check this feature

  # Write DP ----
  # set data package
  dp <- methods::new("DataPackage")

  # * metadata ----
  # Add metadata to the data package
  devmsg(tag = "upload", "* Set metadata")

  metadata_id <- generateIdentifier(d1c@mn, scheme = "uuid")
  doc <- EML::read_eml(eml)
  eml.format <- doc$schemaLocation |>
    gsub(
      pattern = "(eml-[0-9]+\\.[0-9]+\\.[0-9]+).+$", 
      replacement = "\\1"
    )
  metadataObj <- methods::new(
    "DataObject",
    id = metadata_id,
    format = eml.format,
    filename = eml
  )
  dp <- datapack::addMember(dp, metadataObj)

  # * data ----
  # Add data to the data package
  devmsg(tag = "upload", "* Set data")

  data.formats <- mime::guess_type(data)

  for(d in seq(data)) {
    dataObj <- methods::new(
      "DataObject",
      format = data.formats[d],
      filename = data[d]
    )
    dp <- datapack::addMember(dp, do = dataObj, mo = metadataObj)
  }

  # * scripts ----
  # Add scripts to the data package
  if (length(scripts) != 0) {
    devmsg(tag = "upload", "* Set scripts")

    scripts.formats <- mime::guess_type(scripts)

    for(d in seq(scripts)) {
      scriptObj <- methods::new(
        "DataObject",
        format = scripts.formats[d],
        filename = scripts[d]
      )
      dp <- datapack::addMember(dp, do = scriptObj, mo =metadataObj)
    }
  }

  # Access rules ----
  # TODO allow customized access rules

  # Upload ----
  devmsg(tag = "upload", "* Upload")

  packageId <- try(
    dataone::uploadDataPackage(d1c, dp, public = TRUE, quiet = FALSE)
  )

  if (class(packageId) == "try-error")
    browser()
  else
    devmsg(tag = "upload", "* Success (resource map ID: %s)", packageId)

  # empty dataone token cache since function shall be used in a multiuser shiny app
  options(dataone_test_token = NULL)
  options(dataone_token = NULL)

  # return(package_id) 
  return(metadata_id) # was the same as packageId
}

I am targetting the metadata_id since it is the one displayed here : image I thought this was a complete data package ID.

earnaud commented 3 years ago

Little update: here is the state of my dp as built with this function:

> str(dp)
Formal class 'DataPackage' [package "datapack"] with 5 slots
  ..@ relations  :Formal class 'hash' [package "hash"] with 1 slot
  .. .. ..@ .xData:<environment: 0x55e169a76500> 
  ..@ objects    :Formal class 'hash' [package "hash"] with 1 slot
  .. .. ..@ .xData:<environment: 0x55e164f41150> 
  ..@ sysmeta    :Formal class 'SystemMetadata' [package "datapack"] with 24 slots
  .. .. ..@ serialVersion          : num 1
  .. .. ..@ identifier             : chr NA
  .. .. ..@ formatId               : chr NA
  .. .. ..@ size                   : num NA
  .. .. ..@ checksum               : chr NA
  .. .. ..@ checksumAlgorithm      : chr "SHA-256"
  .. .. ..@ submitter              : chr NA
  .. .. ..@ rightsHolder           : chr NA
  .. .. ..@ accessPolicy           :'data.frame':   0 obs. of  2 variables:
  .. .. .. ..$ subject   : chr(0) 
  .. .. .. ..$ permission: chr(0) 
  .. .. ..@ replicationAllowed     : logi TRUE
  .. .. ..@ numberReplicas         : num 3
  .. .. ..@ preferredNodes         : list()
  .. .. ..@ blockedNodes           : list()
  .. .. ..@ obsoletes              : chr NA
  .. .. ..@ obsoletedBy            : chr NA
  .. .. ..@ archived               : logi FALSE
  .. .. ..@ dateUploaded           : chr NA
  .. .. ..@ dateSysMetadataModified: chr "2021-08-17T09:28:13Z"
  .. .. ..@ originMemberNode       : chr NA
  .. .. ..@ authoritativeMemberNode: chr NA
  .. .. ..@ seriesId               : chr NA
  .. .. ..@ mediaType              : chr NA
  .. .. ..@ fileName               : chr NA
  .. .. ..@ mediaTypeProperty      : list()
  ..@ externalIds: list()
  ..@ resmapId   : chr NA
> dp@relations
<hash> containing 2 key-value pair(s).
  relations : c("urn:uuid:3fe994bf-122d-4de2-9f35-956a442b7506", "urn:uuid:d385b802-a22b-4260-8cc8-e1694507f037", "urn:uuid:3fe994bf-122d-4de2-9f35-956a442b7506", "urn:uuid:eefea122-750d-469e-aefd-820bcab0b211") c("http://purl.org/spar/cito/documents", "http://purl.org/spar/cito/isDocumentedBy", "http://purl.org/spar/cito/documents", "http://purl.org/spar/cito/isDocumentedBy") c("urn:uuid:d385b802-a22b-4260-8cc8-e1694507f037", "urn:uuid:3fe994bf-122d-4de2-9f35-956a442b7506", "urn:uuid:eefea122-750d-469e-aefd-820bcab0b211", "urn:uuid:3fe994bf-122d-4de2-9f35-956a442b7506") c("NA", "NA", "NA", "NA") c("NA", "NA", "NA", "NA") c("NA", "NA", "NA", "NA")
  updated : TRUE
> dp@objects
<hash> containing 3 key-value pair(s).
  urn:uuid:3fe994bf-122d-4de2-9f35-956a442b7506 : <S4 class ‘DataObject’ [package “datapack”] with 7 slots>
  urn:uuid:d385b802-a22b-4260-8cc8-e1694507f037 : <S4 class ‘DataObject’ [package “datapack”] with 7 slots>
  urn:uuid:eefea122-750d-469e-aefd-820bcab0b211 : <S4 class ‘DataObject’ [package “datapack”] with 7 slots>