climate-resource / CMIP-GHG-Concentration-Generation

Generation of historical concentrations for CMIP (and other) experiments
BSD 3-Clause "New" or "Revised" License
2 stars 0 forks source link

Interpolation #20

Closed znichollscr closed 6 months ago

znichollscr commented 8 months ago

Meinshausen 2017 had their own custom interpolation. Consider re-using that here.

Full package: https://drive.google.com/drive/u/1/folders/19I8VMDAwG1JJ_K_Wzw05qrv9ENpFhRD8

Previous R time interpolation code

```R ################################################################################ # This script interpolates the annual means to monthly values while being mean-preserving. # The input is a txt file with with two columns: # * column 1: years # * column 2: annual means # The output is a csv file with year, month, lat, data colums, where data are the values at the latitudes lat. # # This script can be called from the terminal as: # Rscript getinter_annualmeans2.R # e.g.: # Rscript getinter_annualmeans.R # Rscript getinter_annualmeans.R AnnualMeans.txt # Rscript getinter_annualmeans.R AnnualMeans.txt 1.0001 # Rscript getinter_annualmeans.R AnnualMeans.txt 1.0001 0.0001 # Rscript getinter_annualmeans.R AnnualMeans.txt 1.0001 0.0001 5.0 # spline interpolation function spline_interpolation_annual_means = function(y, alpha, auto_increase_by, alpha_max) { n = length(y) m = n*12 B = cbind(rep(1, n), bs((1:m)/12, df = ceiling(alpha*n), intercept = FALSE)) yearvec = rep(1:n, each = 12) p = ncol(B) BM = matrix(nrow = n, ncol = p) for (i in 1:n) BM[ i, ] = apply(B[ which(yearvec == i), ], 2, mean) DB = apply(B, 2, diff) md = nrow(DB) cvec = c(rep(1, md), rep(0, 2*p)) Amat = cbind( matrix(0, nrow = n, ncol = md), BM, -BM) bvec = y const.dir = rep("==", n) Amat = rbind( Amat, cbind(-diag(md) , DB, -DB) , cbind(-diag(md), -DB, DB)) bvec = c(bvec, rep(0, 2*md)) const.dir = c(const.dir, rep("<=", 2*md)) sol = solveLP(cvec, bvec, Amat, const.dir = const.dir, lpSolve = TRUE)$solution # if sol is NULL, interpolation could not be carried out (because function is not sufficiently continuous) if (!is.null(sol)) { sol = sol[-(1:md)] sol = sol[1:p] - sol[-(1:p)] values_interpolated = as.numeric(B %*% sol) return(values_interpolated) } else if (is.null(sol) && auto_increase_by > 0) { alpha_new = alpha + auto_increase_by if (alpha_new > alpha_max) { stop("Eek! Alpha is greater than alpha_max. Could not solve spline interpolation.") } else { print(sprintf("Could not solve spline interpolation. Auto increasing alpha parameter to %s.", alpha_new)) return(spline_interpolation_annual_means(y, alpha_new, auto_increase_by)) } } else if (is.null(sol) && auto_increase_by == 0) { stop("Could not solve spline interpolation. Consider changing alpha or setting auto_increase_by to greater than 0.") } } ################################################################################ # start of the script # read in file_name from command line args = commandArgs(trailingOnly = TRUE) # print(args) # print(length(args)) if (length(args) == 0) { file_name = "AnnualMeans.txt" alpha = 1.00001 auto_increase_by = 0 alpha_max = alpha } else if (length(args) == 1) { file_name = args[1] alpha = 1.00001 auto_increase_by = 0 alpha_max = alpha } else if (length(args) == 2) { file_name = args[1] alpha = as.numeric(args[2]) auto_increase_by = 0 alpha_max = alpha } else if (length(args) == 3) { file_name = args[1] alpha = as.numeric(args[2]) auto_increase_by = as.numeric(args[3]) alpha_max = alpha + (100*auto_increase_by) } else if (length(args) == 4) { file_name = args[1] alpha = as.numeric(args[2]) auto_increase_by = as.numeric(args[3]) alpha_max = as.numeric(args[4]) } else { stop("Please provide the input arguments as: 1) filename (optional, default: AnnualMeans.txt), 2) alpha (optional), 3) auto_increase_alpha_by (optional), 4) alpha_max (optional).") } # print(sprintf("filename: %s, alpha: %s, auto_increase_by: %s, alpha_max: %s", file_name, alpha, auto_increase_by, alpha_max)) # temp = strsplit(file_name, split = ".", fixed = TRUE) # file_name1 = temp[[1]][1] # file_type = temp[[1]][2] # read in file dat = read.table(file_name) x_original = dat[, 1] y_original = dat[, 2] start_index = min(which(y_original > 0)) x = x_original[start_index : length(x_original)] y = y_original[start_index : length((y_original))] yearvec = rep(x_original, each = 12) monthvec = rep(1:12, length(x_original)) x_new = yearvec + (monthvec-0.5)/12 y_new = spline_interpolation_annual_means(y, alpha, auto_increase_by, alpha_max) y_new = c(rep(0, 12*(start_index - 1)), y_new) output_data = cbind(x_new, y_new) colnames(output_data) = c("year", "smoothTrend") write.table(output_data, file = "SmoothTrend.txt", row.names = FALSE) ```

Previous R spatial interpolation code

```R ################################################################################ # This script interpolates the latitude values from a coarse to a fine grid while being mean-preserving. # The input is a csv file with year, month, lat, data columns, with: # * lat: mid-points of the original grid # * data: mean values for the region: lat +/- gridsize/2 # The output is a csv file with year, month, lat, data colums, where data are the values at the latitudes lat. # # This script can be called from the terminal as: # Rscript getinter_lat2.R < # e.g.: # Rscript getinter_lat2.R test.csv # Rscript getinter_lat2.R test.csv 0.1 # Rscript getinter_lat2.R test.csv 0.1 1.75 # load libraries library(splines) library(linprog) library(plyr); library(dplyr) # define the magic box spline interpolation function spline_interpolation = function(latitudes_old, values_old, latitudes_new, alpha = 1.75, auto_increase_by = 0, alpha_max = alpha + (50 * auto_increase_by)) { if (auto_increase_by < 0) stop("Please set auto_increase_by to a value >= 0.") length_vector_old = length(latitudes_old) length_vector_new = length(latitudes_new) B = cbind(rep(1, length_vector_new), bs(latitudes_new, df = ceiling(alpha * length_vector_old), intercept = FALSE)) boxvec = rep(0, length_vector_new) weightvec = cos(latitudes_new / 180 * pi) for (yc in 1: length_vector_new) boxvec[yc] = which.min(abs(latitudes_old - latitudes_new[yc])) p = ncol(B) BM = matrix(nrow = length_vector_old, ncol = p) BW = sweep(B, 1, weightvec, FUN = "*") for (i in 1:length_vector_old) BM[i,] = apply(BW[which(boxvec == i), ], 2, sum) / sum(weightvec[ which(boxvec == i)]) DB = apply(B, 2, diff) md = nrow(DB) cvec = c(rep(1, md), rep(0, 2 * p)) Amat = cbind( matrix(0, nrow = length_vector_old, ncol = md), BM, -BM) bvec = values_old const.dir = rep("==", length_vector_old) Amat = rbind( Amat, cbind(-diag(md) , DB, -DB) , cbind(-diag(md), -DB, DB)) bvec = c(bvec, rep(0, 2 * md)) const.dir = c(const.dir, rep("<=", 2*md)) sol = solveLP(cvec, bvec, Amat, const.dir = const.dir, lpSolve = TRUE)$solution # if sol is NULL, interpolation could not be carried out (because function is not sufficiently continuous) if (!is.null(sol)) { sol = sol[-(1:md)] sol = sol[1:p] - sol[-(1:p)] values_interpolated = as.numeric(B %*% sol) return(values_interpolated) } else if (is.null(sol) && auto_increase_by > 0) { alpha_new = alpha + auto_increase_by if (alpha_new > alpha_max) { stop("Eek! Alpha is greater than alpha_max. Could not solve spline interpolation.") } else { print(sprintf("Could not solve spline interpolation. Auto increasing alpha parameter to %s.", alpha_new)) return(spline_interpolation(latitudes_old, values_old, latitudes_new, alpha_new, auto_increase_by)) } } else if (is.null(sol) && auto_increase_by == 0) { stop("Could not solve spline interpolation. Consider changing alpha or setting auto_increase_by to greater than 0.") } } ################################################################################ # start of the script # read in file_name from command line args <- commandArgs(trailingOnly = TRUE) # print(args) # print(length(args)) if (length(args) == 1) { file_name = args[1] grid_size = 0.5 alpha = 1.75 auto_increase_by = 0 alpha_max = alpha } else if (length(args) == 2) { file_name = args[1] grid_size = as.numeric(args[2]) alpha = 1.75 auto_increase_by = 0 alpha_max = alpha } else if (length(args) == 3) { file_name = args[1] grid_size = as.numeric(args[2]) alpha = as.numeric(args[3]) auto_increase_by = 0 alpha_max = alpha } else if (length(args) == 4) { file_name = args[1] grid_size = as.numeric(args[2]) alpha = as.numeric(args[3]) auto_increase_by = as.numeric(args[4]) alpha_max = alpha + (100 * auto_increase_by) } else if (length(args) == 5) { file_name = args[1] grid_size = as.numeric(args[2]) alpha = as.numeric(args[3]) auto_increase_by = as.numeric(args[4]) alpha_max = as.numeric(args[5]) } else { stop("Please provide the input arguments as: 1) filename, 2) grid size (optional), 3) alpha (optional), 4) auto_increase_alpha_by (optional), 5) alpha_max (optional).") } # print(sprintf("gridsize: %s, alpha: %s, auto_increase_by: %s, alpha_max: %s", grid_size, alpha, auto_increase_by, alpha_max)) # define new latitudes latitudes_new = seq(-90+grid_size/2, 90-grid_size/2, grid_size) # file_name = "/Volumes/Seagate/Work/GHG Interpolations/HISTGHGPACKAGE/code/interpolation_R/test.csv" temp = strsplit(file_name, split = ".", fixed = TRUE) file_name1 = temp[[1]][1] file_type = temp[[1]][2] if (file_type != "csv") { print(sprintf("Error: File is not a csv file: %s", file_name)) return(NULL) } # read in file data = read.csv(file_name, row.names = NULL) # for each year/month, carry out the interpolation data_new = ddply(.data = data, .variables = c("year", "month"), .fun = function(x) { # comment this out, if it's too annoying: print(sprintf("Interpolating year: %s, month: %s", unique(x$year), unique(x$month))) latitudes_old = x$lat values_old = x$data # check if all values are the same if (length(unique(values_old)) == 1) { data_interpolated = rep(unique(values_old), times = length(latitudes_new)) } else { data_interpolated = spline_interpolation(latitudes_old = latitudes_old, values_old = values_old, latitudes_new = latitudes_new, alpha = alpha, auto_increase_by = auto_increase_by, alpha_max = alpha_max) } return(data.frame(lat = latitudes_new, data = data_interpolated)) }) # }, .progress = "text") # use this progress text, if year/month print out is too annoying # write out results #write.csv(data_new, sprintf("%s_%sdeg.%s", file_name1, grid_size, file_type)) write.csv(data_new, sprintf("%s_regridded.%s", file_name1, file_type)) ```

Required to close

znichollscr commented 6 months ago

Closed by #27