WPringle commented 3 years ago

@zacharyburnettNOAA @ksargsyan Please check out this example script and the plots below and let me know if have any questions or see something wrong

#!/usr/bin/env python

import os
from matplotlib import pyplot
import numpy
from scipy.special import ndtri

from ensembleperturbation.uncertainty_quantification.ensemble_array import ensemble_array, read_combined_hdf, sample_points_with_equal_spacing
from ensembleperturbation.uncertainty_quantification.karhunen_loeve_expansion import karhunen_loeve_expansion, karhunen_loeve_prediction
from ensembleperturbation.uncertainty_quantification.polynomial_chaos import build_pc_expansion, evaluate_pc_expansion, evaluate_pc_sensitivity
from ensembleperturbation.plotting import plot_points, plot_coastline

##############################################################################
# MAIN SCRIPT ################################################################
##############################################################################

h5name = '../data/florence_40member.h5'
dataframes = read_combined_hdf(h5name)
keys = list(dataframes.keys())

# Load the input/outputs
np_input, np_output = ensemble_array(
        input_dataframe=dataframes[keys[0]],
        output_dataframe=dataframes[keys[1]],
    )
numpy.savetxt('xdata.dat', np_input) #because pce_eval expects xdata.dat as input

# adjusting the output to have less nodes for now (every 100 points)
points = dataframes[keys[1]][["x","y"]].to_numpy()
np_output_subset = np_output[:, ::25]
points_subset = points[::25, :]

# remove areas where any of the ensembles was a NaN
mask = numpy.isnan(np_output_subset)
mask = mask.any(axis=0)
ymodel = np_output_subset[:, ~mask].T
points_subset = points_subset[~mask,:]

print('shape of ymodel')
print(ymodel.shape)

# choose neig decimal percent of variance explained to keep
neig = 0.95  # gives  us 6 modes

## Evaluating the KL modes
# ymean is the average field                                                 : size (ngrid,)
# kl_modes is the KL modes ('principal directions')                          : size (ngrid,neig)
# eigval is the eigenvalue vector                                            : size (neig,)
# xi are the samples for the KL coefficients                                 : size (nens, neig)
ymean, kl_modes, eigval, xi = karhunen_loeve_expansion(ymodel,neig=neig,plot=False)

# evaluate the fit of the KL prediction
# ypred is the predicted value of ymodel -> equal in the limit neig = ngrid  : size (ngrid,nens)
ypred = karhunen_loeve_prediction(ymean, kl_modes, eigval, xi, ymodel)

# plot scatter points to compare ymodel and ypred spatially
for example in range(0,ymodel.shape[1],5):
    plot_points(numpy.hstack((points_subset,ymodel[:,[example]])),
        save_filename='modeled_zmax' + str(example),vmax=3.0,
    )
    pyplot.close()

    plot_points(numpy.hstack((points_subset,ypred[:,[example]])),
        save_filename='predicted_zmax' + str(example),vmax=3.0,
    )
    pyplot.close()

# Build PC for each mode in xi (each mode has nens values)
pc_type = 'HG' # Hermite-Gauss chaos
lambda_reg = 0 # regularization lambda
neig = xi.shape[1] # number of eigenvalues
pc_dim = np_input.shape[1] # dimension of the PC expansion
sens_all = numpy.empty((neig,pc_dim,2))
for k, qoi in enumerate(xi.transpose()):
    numpy.savetxt('qoi.dat', qoi)

    # compare accuracy of 2nd or 3rd order polynomials
    for poly_order in [2,3]:
        # Builds second order PC expansion for the each mode
        build_pc_expansion(x_filename='xdata.dat',y_filename='qoi.dat',
                           output_filename='coeff' + str(k+1) + '.dat',
                           pc_type=pc_type,poly_order=poly_order,
                           lambda_regularization=lambda_reg)

        # Evaluates the constructed PC at the input for comparison
        evaluate_pc_expansion(x_filename='xdata.dat',output_filename='ydata.dat',
                              parameter_filename='coeff' + str(k+1) + '.dat',
                              pc_type=pc_type,poly_order=poly_order)
        qoi_pc = numpy.loadtxt('ydata.dat')

        # shows comparison of predicted against "real" result
        pyplot.plot(qoi, qoi_pc, 'o',label='poly order = ' +  str(poly_order))
    pyplot.plot([-2,3],[-2,3], 'k--', lw=1)
    pyplot.gca().set_xlabel('predicted')
    pyplot.gca().set_ylabel('actual')
    pyplot.title('mode-' + str(k+1))
    pyplot.legend()
    pyplot.savefig('mode-' + str(k+1))

    # Evaluates the constructed PC at the input for comparison
    evaluate_pc_sensitivity(parameter_filename='coeff' + str(k+1) + '.dat',
                            pc_type=pc_type,pc_dimension=pc_dim,poly_order=poly_order)
    sens_all[k,:,0] = numpy.loadtxt('mainsens.dat')
    sens_all[k,:,1] = numpy.loadtxt('totsens.dat')

sens_labels = ["main", "total"]
for idx in [0,1]:
    lineObjects = pyplot.plot(sens_all[:,:,idx].squeeze())
    pyplot.gca().set_xlabel('mode number')
    pyplot.gca().set_ylabel('Sobol sensitivty')
    pyplot.title(sens_labels[idx] + '_sensitivity')
    pyplot.legend(lineObjects, dataframes[keys[0]].columns)
    pyplot.savefig(sens_labels[idx] + '_sensitivity')
    pyplot.close()

WPringle commented 3 years ago