pbs-assess / gfdata

:floppy_disk: An R package for data extraction of BC groundfish data
https://pbs-assess.github.io/gfdata/
1 stars 0 forks source link

Add sablefish head-only sampling program data #19

Open seananderson opened 3 months ago

seananderson commented 3 months ago

From Leah:

SELECT 
Sablefish.dbo.Commercial_Heads_FL.Specimen AS SPECIMEN_ID, 
Sablefish.dbo.Commercial_Heads_FL.Sex, 
Sablefish.dbo.Commercial_Heads_FL.InterOrbital, 
Sablefish.dbo.Commercial_Heads_FL.LengthPred AS LENGTH,
GFBioSQL.dbo.SPECIES.SPECIES_COMMON_NAME,
GFBIOSQL.dbo.B21_Samples.TRIP_ID,
GFBIOSQL.dbo.B21_Samples.FISHING_EVENT_ID, 
GFBIOSQL.dbo.B21_Samples.MAJOR_STAT_AREA_CODE,
GFBIOSQL.dbo.MAJOR_STAT_AREA.MAJOR_STAT_AREA_NAME,
CAST(GFBIOSQL.dbo.B21_Samples.TRIP_START_DATE AS Date) as TRIP_START_DATE,
GFBIOSQL.dbo.B21_Samples.SAMPLE_SOURCE_CODE,
GFBIOSQL.dbo.B21_Samples.SPECIES_CATEGORY_CODE,
GFBIOSQL.dbo.B21_Samples.SAMPLE_TYPE_CODE,
GFBioSQL.dbo.TRIP_SUB_TYPE.TRIP_SUB_TYPE_DESC,
GFBIOSQL.dbo.B21_Samples.GEAR_CODE,
GFBIOSQL.dbo.GEAR.GEAR_DESC,
GFBIOSQL.dbo.B22_Specimens.ROUND_WEIGHT AS WEIGHT,
GFBIOSQL.dbo.B22_Specimens.SPECIMEN_AGE AS AGE,
GFBIOSQL.dbo.B22_Specimens.MATURITY_CODE,
GFBIOSQL.dbo.B22_Specimens.SAMPLE_ID,
CASE WHEN GFBIOSQL.dbo.SPECIMEN_COLLECTED.SPECIMEN_COLLECTED_IND = 'Y' OR GFBIOSQL.dbo.SPECIMEN_COLLECTED.SPECIMEN_COLLECTED_IND = 'y' THEN 1 ELSE 0 END AS AGE_SPECIMEN_COLLECTED,
CASE WHEN GFBIOSQL.dbo.B21_Samples.GEAR_CODE IN (1, 6, 8, 11) THEN ISNULL(GFBioSQL.dbo.TRAWL_SPECS.USABILITY_CODE, 0)
     WHEN GFBIOSQL.dbo.B21_Samples.GEAR_CODE IN (2) THEN ISNULL(GFBioSQL.dbo.TRAP_SPECS.USABILITY_CODE, 0)
     WHEN GFBIOSQL.dbo.B21_Samples.GEAR_CODE IN (5) THEN ISNULL(GFBioSQL.dbo.LONGLINE_SPECS.USABILITY_CODE, 0)
     WHEN GFBIOSQL.dbo.B21_Samples.GEAR_CODE IN (4) THEN ISNULL(GFBioSQL.dbo.HANDLINE_SPECS.USABILITY_CODE, 0)
     ELSE 0 END AS USABILITY_CODE,
CASE WHEN SPECIES_CATEGORY_CODE IN (0, 1, 5, 6) AND (SAMPLE_SOURCE_CODE IS NULL OR SAMPLE_SOURCE_CODE = 1)
        THEN 'UNSORTED'
    WHEN SPECIES_CATEGORY_CODE = 1 AND SAMPLE_SOURCE_CODE = 0
        THEN 'UNSORTED'
    WHEN SPECIES_CATEGORY_CODE IN(1, 2) AND SAMPLE_SOURCE_CODE = 2
        THEN 'KEEPERS'
    WHEN SPECIES_CATEGORY_CODE = 3 AND (SAMPLE_SOURCE_CODE IS NULL OR SAMPLE_SOURCE_CODE IN(1, 2))
        THEN 'KEEPERS'
    WHEN SPECIES_CATEGORY_CODE = 1 AND SAMPLE_SOURCE_CODE = 3
        THEN 'DISCARDS'
    WHEN SPECIES_CATEGORY_CODE = 4 AND SAMPLE_SOURCE_CODE IN(1, 3)
        THEN 'DISCARDS'
    ELSE 'UNKNOWN' END AS SAMPLING_DESC,
YEAR(TRIP_START_DATE) AS YEAR

FROM Sablefish.dbo.Commercial_Heads_FL
JOIN GFBIOSQL.dbo.B22_Specimens ON (Sablefish.dbo.Commercial_Heads_FL.Specimen = GFBIOSQL.dbo.B22_Specimens.SPECIMEN_ID)
JOIN GFBIOSQL.dbo.B21_Samples ON (GFBIOSQL.dbo.B22_Specimens.SAMPLE_ID = GFBIOSQL.dbo.B21_Samples.SAMPLE_ID)
JOIN GFBIOSQL.dbo.SPECIMEN_COLLECTED ON (Sablefish.dbo.Commercial_Heads_FL.Specimen = GFBIOSQL.dbo.SPECIMEN_COLLECTED.SPECIMEN_ID)
JOIN GFBIOSQL.dbo.GEAR ON (GFBIOSQL.dbo.B21_Samples.GEAR_CODE = GFBIOSQL.dbo.GEAR.GEAR_CODE)
JOIN GFBIOSQL.dbo.MAJOR_STAT_AREA ON (GFBIOSQL.dbo.B21_Samples.MAJOR_STAT_AREA_CODE = GFBIOSQL.dbo.MAJOR_STAT_AREA.MAJOR_STAT_AREA_CODE)
JOIN GFBioSQL.dbo.SPECIES ON (GFBIOSQL.dbo.B21_Samples.SPECIES_CODE = GFBioSQL.dbo.SPECIES.SPECIES_CODE)
JOIN GFBioSQL.dbo.TRIP_SUB_TYPE ON (GFBIOSQL.dbo.B21_Samples.TRIP_SUB_TYPE_CODE = GFBioSQL.dbo.TRIP_SUB_TYPE.TRIP_SUB_TYPE_CODE)
LEFT JOIN GFBioSQL.dbo.TRAWL_SPECS ON (GFBIOSQL.dbo.B21_Samples.FISHING_EVENT_ID = GFBioSQL.dbo.TRAWL_SPECS.FISHING_EVENT_ID)
LEFT JOIN GFBioSQL.dbo.TRAP_SPECS ON (GFBIOSQL.dbo.B21_Samples.FISHING_EVENT_ID = GFBioSQL.dbo.TRAP_SPECS.FISHING_EVENT_ID)
LEFT JOIN GFBioSQL.dbo.LONGLINE_SPECS ON (GFBIOSQL.dbo.B21_Samples.FISHING_EVENT_ID = GFBioSQL.dbo.LONGLINE_SPECS.FISHING_EVENT_ID)
LEFT JOIN GFBioSQL.dbo.HANDLINE_SPECS ON (GFBIOSQL.dbo.B21_Samples.FISHING_EVENT_ID = GFBioSQL.dbo.HANDLINE_SPECS.FISHING_EVENT_ID)
sablefish_heads <- function (fl_path = paste0(here::here("data")),
                             fl_name = "sablefish_heads.csv") {

  sablefish <- read.csv(paste0(fl_path, "/", fl_name))

  # Adjust column names
  names(sablefish) <- tolower(names(sablefish))
  names(sablefish)[names(sablefish) == "lengthpred"] <- "length"
  names(sablefish)[names(sablefish) == "specimen"] <- "specimen_id"

  sablefish <- sablefish %>%
    dplyr::select(-interorbital)

  sablefish <- sablefish %>%
    dplyr::mutate(trip_start_date = as.POSIXct(trip_start_date, format = "%m/%d/%Y"),
                  species_common_name = tolower(species_common_name),
                  age = as.numeric(age),
                  weight = as.numeric(weight),
                  maturity_code = as.numeric(maturity_code),
                  major_stat_area_code = as.character(major_stat_area_code),
                  length = round_any(length, accuracy = 10)*0.1,
                  length_type = "fork_length"
                  )

}