Weiming-Hu / AnalogsEnsemble

The C++ and R packages for parallel ensemble forecasts using Analog Ensemble
https://weiming-hu.github.io/AnalogsEnsemble/
MIT License
18 stars 5 forks source link

AnEn Search Space Extension: Discreet output #18

Closed lec170 closed 5 years ago

lec170 commented 5 years ago

Describe the bug When performing verification on the AnEn SSE, histograms show the output to be discreet. See sample plot below: image

Code to reproduce: (Or check out /AnalogsSpatial/SSE_test1.R on svn)

# Generate Analogs

source('~/geolab/projects/AnalogsSpatial/code/SSE_CoreSetUp.R')
# Variables
members.size    <- 25
# Identify the day to use
dayi <- 735
# For two year search history:
search.ID.start <- 1
search.ID.end <- 730
paramBeingForecasted <- 2
#############################################################################
# # Set up parameters to compute analogs
# # weights            <- c(1,1,0,0,0) #  "wdir","ws","2T","2DPT","MSLP"
xs <- as.numeric(lon)
ys <- as.numeric(lat)
nx <- 51; ny <- 49
icounter1 <- dayi
# Generate AnEn output, one day at a time.
# for ( icounter1 in 735:735 ) {  # Now, 730; Eventually do this through to 1095 for the 3rd year. So this is training on the first year and testing on the second two years
test.ID.start <- icounter1
test.ID.end <- test.ID.start+20# Generate the analogs for one day at a time.
# A sampling of 100 stations
stations.ID <- stations_ID <- sort(sample(1:2499,100))
config2 <- generateConfiguration('independentSearch')
config2$observation_id <- paramBeingForecasted
config2$test_forecasts <- fcst.aligned[,stations.ID,test.ID.start:test.ID.end,, drop = F]
config2$search_forecasts <- fcst.aligned[,stations.ID,search.ID.start:search.ID.end, , drop = F]
config2$search_times <- as.vector(times[search.ID.start:search.ID.end])
config2$search_flts <- flts[1:dim(config2$search_forecasts)[4]]
tmp.search.observations2 <- obsv.aligned[,,search.ID.start:search.ID.end,,drop=F]  # create a new copy of it
search.observations2 <- aperm(tmp.search.observations2, c(4, 3, 2, 1)) # Reorganizing the structure
search.observations2 <- array(search.observations2,
dim = c(dim(tmp.search.observations2)[3]
* dim(tmp.search.observations2)[4],
dim(tmp.search.observations2)[2],
dim(tmp.search.observations2)[1]))
search.observations2 <- aperm(search.observations2, c(3, 2, 1))
config2$search_observations <- search.observations2
config2$observation_times = rep(config2$search_times, each = length(config2$search_flts)) + config2$search_flts
config2$num_members <- members.size
num.parameters <- dim(config2$search_forecasts)[1]
config2$weights <- rep(1, num.parameters)
config2$test_stations_x <- xs[stations.ID]
config2$test_stations_y <- ys[stations.ID]
config2$search_stations_x <- xs[stations.ID]
config2$search_stations_y <- ys[stations.ID]
config2$preserve_mapping <- T
config2$verbose <- 3
config2$max_flt_nan <- 1
config2$max_par_nan <- 0
config2$extend_observations <- T
# Validate first before using
validateConfiguration(config2)
# Generate analogs
AnEn.ind <- generateAnalogs(config2)
config <- generateConfiguration('extendedSearch')
config$test_forecasts <- fcst.aligned[,stations.ID,test.ID.start:test.ID.end,, drop = F]
config$observation_id <- paramBeingForecasted
config$search_forecasts <- fcst.aligned[,stations.ID,search.ID.start:search.ID.end, , drop = F]
config$search_times <- as.vector(times[search.ID.start:search.ID.end])
config$search_flts <- flts[1:dim(config$search_forecasts)[4]]  # Want this to match with config$search_forecasts
# # We need to convert obsv.aligned from 4 dimensions to 3 dimensions:
tmp.search.observations <- obsv.aligned[,,search.ID.start:search.ID.end,,drop=F]  # create a new copy of it
search.observations <- aperm(tmp.search.observations, c(4, 3, 2, 1)) # Reorganizing the structure
search.observations <- array(search.observations,
dim = c(dim(tmp.search.observations)[3]
* dim(tmp.search.observations)[4],
dim(tmp.search.observations)[2],
dim(tmp.search.observations)[1]))
# Combined (collapsed) the first two dimensions (multiplied the first two dimensions to bring them together )
search.observations <- aperm(search.observations, c(3, 2, 1))  # Flip the location back
# search.observations <- aperm(search.observations, c(3, 2, 1))
#
config$search_observations <- search.observations   # search_observations[parameter, stations, time]
#
# # Need to go do the dim(config$search_observations)[3]
config$observation_times = rep(config$search_times, each = length(config$search_flts)) + config$search_flts
#
#
config$num_members <- members.size
#
num.parameters <- dim(config$search_forecasts)[1]
config$weights <- rep(1, num.parameters)
#
# # Right now, these first four lines are all the same. (because the test stations are the search stations but in other examples, the test and search stations could be different )
config$test_stations_x <- xs[stations.ID]
config$test_stations_y <- ys[stations.ID]
#
# # config$search_stations_x <- xs
# # config$search_stations_y <- ys
config$search_stations_x <- xs[stations.ID]
config$search_stations_y <- ys[stations.ID]
#
config$preserve_mapping <- T
config$verbose <- 3
config$max_flt_nan <- 1
config$max_par_nan <- 0
config$extend_observations <- T  # added on 20181212 <- analog from the point (target) being forecasted for
# # save search stations in the output
config$preserve_search_stations <- T  # Tells you waht the search stations are that you're looking into
# # save metrics in the output
config$preserve_similarity <- T  # Tells you which statiosn were most similar
#
config$num_nearest <- 8  # This is an option you can vary
config$max_num_search_stations <- 10  # Can decrease this if you want. # Can set this to be the same as number of nearest if/when using # nearest.
# # config$max_num_search_stations <- config$num_nearest
# # config$distance <- 1
#
# # Validate first before using
validateConfiguration(config)
#
# # w/ search extension
AnEn <- generateAnalogs(config)

# Look at anen.ver for SSE and IS: 
anen.ver.sse <- array(AnEn$analogs[,,,,1], dim=dim(AnEn$analogs)[1:4])

anen.ver.ind <- array(AnEn.ind$analogs[,,,,1], dim=dim(AnEn.ind$analogs)[1:4])

# Observations 
# # Observations (Analysis Fields)
nc.analy.file <- '~/geolab_storage_V3/data/Analogs/ECMWF_Italy/ItalyAnalysis.nc'
nc.analysis   <- nc_open(nc.analy.file)
obsv          <- ncvar_get(nc.analysis, 'Data')
# dim(obsv) -- 3 parameters   2499 stations   1102 days    4 flt
# Parameter 3 is temperature 
parameter <- 2
dir <- UVtoDir(obsv[1,,,], obsv[2,,,])
spd  <- UVtoSpd(obsv[1,,,], obsv[2,,,])
obs  <- array(spd,dim=dim(obsv)[2:4])

# Source Verification Functions 
source('~/geolab/projects/ExtremeHeat/code/Verification_Functions.R')
source('~/geolab/projects/ExtremeHeat/code/AnEn_functions.R')
library(ncdf4)

# # 2 days every 6 hours
time      <- (seq(0,48,6) )*60*60   ; time <- time[-length(time)]
# rhist.ver=function(anen.ver, obs.ver)

# anen.ver <- array(AnEn.ind$analogs[,,,,1], dim=dim(AnEn.ind$analogs)[1:4])
obs.ver  <- array( NA, dim=c(nrow(obs), dim(anen.ver.ind)[2], dim(obs)[3]*2 ))
for ( d in 1:dim(anen.ver.ind)[2] ) {
  obs.ver[,d,] = array( cbind( obs[ , test.ID.start+d-1, ], obs[ , test.ID.start+d, ] ), dim=c(nrow(obs), 1, dim(obs)[3]*2 ))
}
# Subselect down to the stations chosen. 
# stations.ID are the stations that are randomly kept 
obs.ver <- obs.ver[stations.ID,,]

rankhist.ind <- rhist.ver(anen.ver = anen.ver.ind ,obs.ver = obs.ver )
barplot(rankhist.ind, main = "AnEn.ind")

rankhist.sse <- rhist.ver(anen.ver = anen.ver.sse ,obs.ver = obs.ver )
barplot(rankhist.sse, main = "AnEn SSE")
lec170 commented 5 years ago

I should also note. I am using RAnEn version 3.1.0. Thanks

Weiming-Hu commented 5 years ago

Hi Luara, I added ``` around your code. This makes it easier to read the code.

lec170 commented 5 years ago

Great, thanks. I'll do that in the future.

Weiming-Hu commented 5 years ago

Resolved a small bug in the commit 07c46b67bf0fd6b9dec8de8c23f84711c9bb4369. Waiting for an updated result from the user.

Weiming-Hu commented 5 years ago

Issue closed due to inactivity.