Closed znichollscr closed 6 months ago
Meinshausen 2017 had their own custom interpolation. Consider re-using that here.
Full package: https://drive.google.com/drive/u/1/folders/19I8VMDAwG1JJ_K_Wzw05qrv9ENpFhRD8
```R ################################################################################ # This script interpolates the annual means to monthly values while being mean-preserving. # The input is a txt file with with two columns: # * column 1: years # * column 2: annual means # The output is a csv file with year, month, lat, data colums, where data are the values at the latitudes lat. # # This script can be called from the terminal as: # Rscript getinter_annualmeans2.R # e.g.: # Rscript getinter_annualmeans.R # Rscript getinter_annualmeans.R AnnualMeans.txt # Rscript getinter_annualmeans.R AnnualMeans.txt 1.0001 # Rscript getinter_annualmeans.R AnnualMeans.txt 1.0001 0.0001 # Rscript getinter_annualmeans.R AnnualMeans.txt 1.0001 0.0001 5.0 # spline interpolation function spline_interpolation_annual_means = function(y, alpha, auto_increase_by, alpha_max) { n = length(y) m = n*12 B = cbind(rep(1, n), bs((1:m)/12, df = ceiling(alpha*n), intercept = FALSE)) yearvec = rep(1:n, each = 12) p = ncol(B) BM = matrix(nrow = n, ncol = p) for (i in 1:n) BM[ i, ] = apply(B[ which(yearvec == i), ], 2, mean) DB = apply(B, 2, diff) md = nrow(DB) cvec = c(rep(1, md), rep(0, 2*p)) Amat = cbind( matrix(0, nrow = n, ncol = md), BM, -BM) bvec = y const.dir = rep("==", n) Amat = rbind( Amat, cbind(-diag(md) , DB, -DB) , cbind(-diag(md), -DB, DB)) bvec = c(bvec, rep(0, 2*md)) const.dir = c(const.dir, rep("<=", 2*md)) sol = solveLP(cvec, bvec, Amat, const.dir = const.dir, lpSolve = TRUE)$solution # if sol is NULL, interpolation could not be carried out (because function is not sufficiently continuous) if (!is.null(sol)) { sol = sol[-(1:md)] sol = sol[1:p] - sol[-(1:p)] values_interpolated = as.numeric(B %*% sol) return(values_interpolated) } else if (is.null(sol) && auto_increase_by > 0) { alpha_new = alpha + auto_increase_by if (alpha_new > alpha_max) { stop("Eek! Alpha is greater than alpha_max. Could not solve spline interpolation.") } else { print(sprintf("Could not solve spline interpolation. Auto increasing alpha parameter to %s.", alpha_new)) return(spline_interpolation_annual_means(y, alpha_new, auto_increase_by)) } } else if (is.null(sol) && auto_increase_by == 0) { stop("Could not solve spline interpolation. Consider changing alpha or setting auto_increase_by to greater than 0.") } } ################################################################################ # start of the script # read in file_name from command line args = commandArgs(trailingOnly = TRUE) # print(args) # print(length(args)) if (length(args) == 0) { file_name = "AnnualMeans.txt" alpha = 1.00001 auto_increase_by = 0 alpha_max = alpha } else if (length(args) == 1) { file_name = args[1] alpha = 1.00001 auto_increase_by = 0 alpha_max = alpha } else if (length(args) == 2) { file_name = args[1] alpha = as.numeric(args[2]) auto_increase_by = 0 alpha_max = alpha } else if (length(args) == 3) { file_name = args[1] alpha = as.numeric(args[2]) auto_increase_by = as.numeric(args[3]) alpha_max = alpha + (100*auto_increase_by) } else if (length(args) == 4) { file_name = args[1] alpha = as.numeric(args[2]) auto_increase_by = as.numeric(args[3]) alpha_max = as.numeric(args[4]) } else { stop("Please provide the input arguments as: 1) filename (optional, default: AnnualMeans.txt), 2) alpha (optional), 3) auto_increase_alpha_by (optional), 4) alpha_max (optional).") } # print(sprintf("filename: %s, alpha: %s, auto_increase_by: %s, alpha_max: %s", file_name, alpha, auto_increase_by, alpha_max)) # temp = strsplit(file_name, split = ".", fixed = TRUE) # file_name1 = temp[[1]][1] # file_type = temp[[1]][2] # read in file dat = read.table(file_name) x_original = dat[, 1] y_original = dat[, 2] start_index = min(which(y_original > 0)) x = x_original[start_index : length(x_original)] y = y_original[start_index : length((y_original))] yearvec = rep(x_original, each = 12) monthvec = rep(1:12, length(x_original)) x_new = yearvec + (monthvec-0.5)/12 y_new = spline_interpolation_annual_means(y, alpha, auto_increase_by, alpha_max) y_new = c(rep(0, 12*(start_index - 1)), y_new) output_data = cbind(x_new, y_new) colnames(output_data) = c("year", "smoothTrend") write.table(output_data, file = "SmoothTrend.txt", row.names = FALSE) ```
```R ################################################################################ # This script interpolates the latitude values from a coarse to a fine grid while being mean-preserving. # The input is a csv file with year, month, lat, data columns, with: # * lat: mid-points of the original grid # * data: mean values for the region: lat +/- gridsize/2 # The output is a csv file with year, month, lat, data colums, where data are the values at the latitudes lat. # # This script can be called from the terminal as: # Rscript getinter_lat2.R < # e.g.: # Rscript getinter_lat2.R test.csv # Rscript getinter_lat2.R test.csv 0.1 # Rscript getinter_lat2.R test.csv 0.1 1.75 # load libraries library(splines) library(linprog) library(plyr); library(dplyr) # define the magic box spline interpolation function spline_interpolation = function(latitudes_old, values_old, latitudes_new, alpha = 1.75, auto_increase_by = 0, alpha_max = alpha + (50 * auto_increase_by)) { if (auto_increase_by < 0) stop("Please set auto_increase_by to a value >= 0.") length_vector_old = length(latitudes_old) length_vector_new = length(latitudes_new) B = cbind(rep(1, length_vector_new), bs(latitudes_new, df = ceiling(alpha * length_vector_old), intercept = FALSE)) boxvec = rep(0, length_vector_new) weightvec = cos(latitudes_new / 180 * pi) for (yc in 1: length_vector_new) boxvec[yc] = which.min(abs(latitudes_old - latitudes_new[yc])) p = ncol(B) BM = matrix(nrow = length_vector_old, ncol = p) BW = sweep(B, 1, weightvec, FUN = "*") for (i in 1:length_vector_old) BM[i,] = apply(BW[which(boxvec == i), ], 2, sum) / sum(weightvec[ which(boxvec == i)]) DB = apply(B, 2, diff) md = nrow(DB) cvec = c(rep(1, md), rep(0, 2 * p)) Amat = cbind( matrix(0, nrow = length_vector_old, ncol = md), BM, -BM) bvec = values_old const.dir = rep("==", length_vector_old) Amat = rbind( Amat, cbind(-diag(md) , DB, -DB) , cbind(-diag(md), -DB, DB)) bvec = c(bvec, rep(0, 2 * md)) const.dir = c(const.dir, rep("<=", 2*md)) sol = solveLP(cvec, bvec, Amat, const.dir = const.dir, lpSolve = TRUE)$solution # if sol is NULL, interpolation could not be carried out (because function is not sufficiently continuous) if (!is.null(sol)) { sol = sol[-(1:md)] sol = sol[1:p] - sol[-(1:p)] values_interpolated = as.numeric(B %*% sol) return(values_interpolated) } else if (is.null(sol) && auto_increase_by > 0) { alpha_new = alpha + auto_increase_by if (alpha_new > alpha_max) { stop("Eek! Alpha is greater than alpha_max. Could not solve spline interpolation.") } else { print(sprintf("Could not solve spline interpolation. Auto increasing alpha parameter to %s.", alpha_new)) return(spline_interpolation(latitudes_old, values_old, latitudes_new, alpha_new, auto_increase_by)) } } else if (is.null(sol) && auto_increase_by == 0) { stop("Could not solve spline interpolation. Consider changing alpha or setting auto_increase_by to greater than 0.") } } ################################################################################ # start of the script # read in file_name from command line args <- commandArgs(trailingOnly = TRUE) # print(args) # print(length(args)) if (length(args) == 1) { file_name = args[1] grid_size = 0.5 alpha = 1.75 auto_increase_by = 0 alpha_max = alpha } else if (length(args) == 2) { file_name = args[1] grid_size = as.numeric(args[2]) alpha = 1.75 auto_increase_by = 0 alpha_max = alpha } else if (length(args) == 3) { file_name = args[1] grid_size = as.numeric(args[2]) alpha = as.numeric(args[3]) auto_increase_by = 0 alpha_max = alpha } else if (length(args) == 4) { file_name = args[1] grid_size = as.numeric(args[2]) alpha = as.numeric(args[3]) auto_increase_by = as.numeric(args[4]) alpha_max = alpha + (100 * auto_increase_by) } else if (length(args) == 5) { file_name = args[1] grid_size = as.numeric(args[2]) alpha = as.numeric(args[3]) auto_increase_by = as.numeric(args[4]) alpha_max = as.numeric(args[5]) } else { stop("Please provide the input arguments as: 1) filename, 2) grid size (optional), 3) alpha (optional), 4) auto_increase_alpha_by (optional), 5) alpha_max (optional).") } # print(sprintf("gridsize: %s, alpha: %s, auto_increase_by: %s, alpha_max: %s", grid_size, alpha, auto_increase_by, alpha_max)) # define new latitudes latitudes_new = seq(-90+grid_size/2, 90-grid_size/2, grid_size) # file_name = "/Volumes/Seagate/Work/GHG Interpolations/HISTGHGPACKAGE/code/interpolation_R/test.csv" temp = strsplit(file_name, split = ".", fixed = TRUE) file_name1 = temp[[1]][1] file_type = temp[[1]][2] if (file_type != "csv") { print(sprintf("Error: File is not a csv file: %s", file_name)) return(NULL) } # read in file data = read.csv(file_name, row.names = NULL) # for each year/month, carry out the interpolation data_new = ddply(.data = data, .variables = c("year", "month"), .fun = function(x) { # comment this out, if it's too annoying: print(sprintf("Interpolating year: %s, month: %s", unique(x$year), unique(x$month))) latitudes_old = x$lat values_old = x$data # check if all values are the same if (length(unique(values_old)) == 1) { data_interpolated = rep(unique(values_old), times = length(latitudes_new)) } else { data_interpolated = spline_interpolation(latitudes_old = latitudes_old, values_old = values_old, latitudes_new = latitudes_new, alpha = alpha, auto_increase_by = auto_increase_by, alpha_max = alpha_max) } return(data.frame(lat = latitudes_new, data = data_interpolated)) }) # }, .progress = "text") # use this progress text, if year/month print out is too annoying # write out results #write.csv(data_new, sprintf("%s_%sdeg.%s", file_name1, grid_size, file_type)) write.csv(data_new, sprintf("%s_regridded.%s", file_name1, file_type)) ```
Closed by #27
Meinshausen 2017 had their own custom interpolation. Consider re-using that here.
Full package: https://drive.google.com/drive/u/1/folders/19I8VMDAwG1JJ_K_Wzw05qrv9ENpFhRD8
Previous R time interpolation code
```R ################################################################################ # This script interpolates the annual means to monthly values while being mean-preserving. # The input is a txt file with with two columns: # * column 1: years # * column 2: annual means # The output is a csv file with year, month, lat, data colums, where data are the values at the latitudes lat. # # This script can be called from the terminal as: # Rscript getinter_annualmeans2.R
# e.g.:
# Rscript getinter_annualmeans.R
# Rscript getinter_annualmeans.R AnnualMeans.txt
# Rscript getinter_annualmeans.R AnnualMeans.txt 1.0001
# Rscript getinter_annualmeans.R AnnualMeans.txt 1.0001 0.0001
# Rscript getinter_annualmeans.R AnnualMeans.txt 1.0001 0.0001 5.0
# spline interpolation function
spline_interpolation_annual_means = function(y, alpha, auto_increase_by, alpha_max) {
n = length(y)
m = n*12
B = cbind(rep(1, n), bs((1:m)/12, df = ceiling(alpha*n), intercept = FALSE))
yearvec = rep(1:n, each = 12)
p = ncol(B)
BM = matrix(nrow = n, ncol = p)
for (i in 1:n) BM[ i, ] = apply(B[ which(yearvec == i), ], 2, mean)
DB = apply(B, 2, diff)
md = nrow(DB)
cvec = c(rep(1, md), rep(0, 2*p))
Amat = cbind( matrix(0, nrow = n, ncol = md), BM, -BM)
bvec = y
const.dir = rep("==", n)
Amat = rbind( Amat, cbind(-diag(md) , DB, -DB) , cbind(-diag(md), -DB, DB))
bvec = c(bvec, rep(0, 2*md))
const.dir = c(const.dir, rep("<=", 2*md))
sol = solveLP(cvec, bvec, Amat, const.dir = const.dir, lpSolve = TRUE)$solution
# if sol is NULL, interpolation could not be carried out (because function is not sufficiently continuous)
if (!is.null(sol)) {
sol = sol[-(1:md)]
sol = sol[1:p] - sol[-(1:p)]
values_interpolated = as.numeric(B %*% sol)
return(values_interpolated)
} else if (is.null(sol) && auto_increase_by > 0) {
alpha_new = alpha + auto_increase_by
if (alpha_new > alpha_max) {
stop("Eek! Alpha is greater than alpha_max. Could not solve spline interpolation.")
} else {
print(sprintf("Could not solve spline interpolation. Auto increasing alpha parameter to %s.", alpha_new))
return(spline_interpolation_annual_means(y, alpha_new, auto_increase_by))
}
} else if (is.null(sol) && auto_increase_by == 0) {
stop("Could not solve spline interpolation. Consider changing alpha or setting auto_increase_by to greater than 0.")
}
}
################################################################################
# start of the script
# read in file_name from command line
args = commandArgs(trailingOnly = TRUE)
# print(args)
# print(length(args))
if (length(args) == 0) {
file_name = "AnnualMeans.txt"
alpha = 1.00001
auto_increase_by = 0
alpha_max = alpha
} else if (length(args) == 1) {
file_name = args[1]
alpha = 1.00001
auto_increase_by = 0
alpha_max = alpha
} else if (length(args) == 2) {
file_name = args[1]
alpha = as.numeric(args[2])
auto_increase_by = 0
alpha_max = alpha
} else if (length(args) == 3) {
file_name = args[1]
alpha = as.numeric(args[2])
auto_increase_by = as.numeric(args[3])
alpha_max = alpha + (100*auto_increase_by)
} else if (length(args) == 4) {
file_name = args[1]
alpha = as.numeric(args[2])
auto_increase_by = as.numeric(args[3])
alpha_max = as.numeric(args[4])
} else {
stop("Please provide the input arguments as: 1) filename (optional, default: AnnualMeans.txt), 2) alpha (optional), 3) auto_increase_alpha_by (optional), 4) alpha_max (optional).")
}
# print(sprintf("filename: %s, alpha: %s, auto_increase_by: %s, alpha_max: %s", file_name, alpha, auto_increase_by, alpha_max))
# temp = strsplit(file_name, split = ".", fixed = TRUE)
# file_name1 = temp[[1]][1]
# file_type = temp[[1]][2]
# read in file
dat = read.table(file_name)
x_original = dat[, 1]
y_original = dat[, 2]
start_index = min(which(y_original > 0))
x = x_original[start_index : length(x_original)]
y = y_original[start_index : length((y_original))]
yearvec = rep(x_original, each = 12)
monthvec = rep(1:12, length(x_original))
x_new = yearvec + (monthvec-0.5)/12
y_new = spline_interpolation_annual_means(y, alpha, auto_increase_by, alpha_max)
y_new = c(rep(0, 12*(start_index - 1)), y_new)
output_data = cbind(x_new, y_new)
colnames(output_data) = c("year", "smoothTrend")
write.table(output_data, file = "SmoothTrend.txt", row.names = FALSE)
```
Previous R spatial interpolation code
```R ################################################################################ # This script interpolates the latitude values from a coarse to a fine grid while being mean-preserving. # The input is a csv file with year, month, lat, data columns, with: # * lat: mid-points of the original grid # * data: mean values for the region: lat +/- gridsize/2 # The output is a csv file with year, month, lat, data colums, where data are the values at the latitudes lat. # # This script can be called from the terminal as: # Rscript getinter_lat2.R <
# e.g.:
# Rscript getinter_lat2.R test.csv
# Rscript getinter_lat2.R test.csv 0.1
# Rscript getinter_lat2.R test.csv 0.1 1.75
# load libraries
library(splines)
library(linprog)
library(plyr); library(dplyr)
# define the magic box spline interpolation function
spline_interpolation = function(latitudes_old, values_old, latitudes_new,
alpha = 1.75, auto_increase_by = 0,
alpha_max = alpha + (50 * auto_increase_by)) {
if (auto_increase_by < 0)
stop("Please set auto_increase_by to a value >= 0.")
length_vector_old = length(latitudes_old)
length_vector_new = length(latitudes_new)
B = cbind(rep(1, length_vector_new),
bs(latitudes_new, df = ceiling(alpha * length_vector_old), intercept = FALSE))
boxvec = rep(0, length_vector_new)
weightvec = cos(latitudes_new / 180 * pi)
for (yc in 1: length_vector_new)
boxvec[yc] = which.min(abs(latitudes_old - latitudes_new[yc]))
p = ncol(B)
BM = matrix(nrow = length_vector_old, ncol = p)
BW = sweep(B, 1, weightvec, FUN = "*")
for (i in 1:length_vector_old)
BM[i,] = apply(BW[which(boxvec == i), ], 2, sum) / sum(weightvec[ which(boxvec == i)])
DB = apply(B, 2, diff)
md = nrow(DB)
cvec = c(rep(1, md), rep(0, 2 * p))
Amat = cbind( matrix(0, nrow = length_vector_old, ncol = md), BM, -BM)
bvec = values_old
const.dir = rep("==", length_vector_old)
Amat = rbind( Amat, cbind(-diag(md) , DB, -DB) , cbind(-diag(md), -DB, DB))
bvec = c(bvec, rep(0, 2 * md))
const.dir = c(const.dir, rep("<=", 2*md))
sol = solveLP(cvec, bvec, Amat, const.dir = const.dir, lpSolve = TRUE)$solution
# if sol is NULL, interpolation could not be carried out (because function is not sufficiently continuous)
if (!is.null(sol)) {
sol = sol[-(1:md)]
sol = sol[1:p] - sol[-(1:p)]
values_interpolated = as.numeric(B %*% sol)
return(values_interpolated)
} else if (is.null(sol) && auto_increase_by > 0) {
alpha_new = alpha + auto_increase_by
if (alpha_new > alpha_max) {
stop("Eek! Alpha is greater than alpha_max. Could not solve spline interpolation.")
} else {
print(sprintf("Could not solve spline interpolation. Auto increasing alpha parameter to %s.", alpha_new))
return(spline_interpolation(latitudes_old, values_old, latitudes_new, alpha_new, auto_increase_by))
}
} else if (is.null(sol) && auto_increase_by == 0) {
stop("Could not solve spline interpolation. Consider changing alpha or setting auto_increase_by to greater than 0.")
}
}
################################################################################
# start of the script
# read in file_name from command line
args <- commandArgs(trailingOnly = TRUE)
# print(args)
# print(length(args))
if (length(args) == 1) {
file_name = args[1]
grid_size = 0.5
alpha = 1.75
auto_increase_by = 0
alpha_max = alpha
} else if (length(args) == 2) {
file_name = args[1]
grid_size = as.numeric(args[2])
alpha = 1.75
auto_increase_by = 0
alpha_max = alpha
} else if (length(args) == 3) {
file_name = args[1]
grid_size = as.numeric(args[2])
alpha = as.numeric(args[3])
auto_increase_by = 0
alpha_max = alpha
} else if (length(args) == 4) {
file_name = args[1]
grid_size = as.numeric(args[2])
alpha = as.numeric(args[3])
auto_increase_by = as.numeric(args[4])
alpha_max = alpha + (100 * auto_increase_by)
} else if (length(args) == 5) {
file_name = args[1]
grid_size = as.numeric(args[2])
alpha = as.numeric(args[3])
auto_increase_by = as.numeric(args[4])
alpha_max = as.numeric(args[5])
} else {
stop("Please provide the input arguments as: 1) filename, 2) grid size (optional), 3) alpha (optional), 4) auto_increase_alpha_by (optional), 5) alpha_max (optional).")
}
# print(sprintf("gridsize: %s, alpha: %s, auto_increase_by: %s, alpha_max: %s", grid_size, alpha, auto_increase_by, alpha_max))
# define new latitudes
latitudes_new = seq(-90+grid_size/2, 90-grid_size/2, grid_size)
# file_name = "/Volumes/Seagate/Work/GHG Interpolations/HISTGHGPACKAGE/code/interpolation_R/test.csv"
temp = strsplit(file_name, split = ".", fixed = TRUE)
file_name1 = temp[[1]][1]
file_type = temp[[1]][2]
if (file_type != "csv") {
print(sprintf("Error: File is not a csv file: %s", file_name))
return(NULL)
}
# read in file
data = read.csv(file_name, row.names = NULL)
# for each year/month, carry out the interpolation
data_new = ddply(.data = data,
.variables = c("year", "month"),
.fun = function(x) {
# comment this out, if it's too annoying:
print(sprintf("Interpolating year: %s, month: %s", unique(x$year), unique(x$month)))
latitudes_old = x$lat
values_old = x$data
# check if all values are the same
if (length(unique(values_old)) == 1) {
data_interpolated = rep(unique(values_old), times = length(latitudes_new))
} else {
data_interpolated = spline_interpolation(latitudes_old = latitudes_old,
values_old = values_old,
latitudes_new = latitudes_new,
alpha = alpha,
auto_increase_by = auto_increase_by,
alpha_max = alpha_max)
}
return(data.frame(lat = latitudes_new, data = data_interpolated))
})
# }, .progress = "text") # use this progress text, if year/month print out is too annoying
# write out results
#write.csv(data_new, sprintf("%s_%sdeg.%s", file_name1, grid_size, file_type))
write.csv(data_new, sprintf("%s_regridded.%s", file_name1, file_type))
```
Required to close