Closed serbinsh closed 8 years ago
Sorry just getting to this, @serbinsh . We were definitely having this problem before, but we've done lots of testing since with SIPNET running through web interface. Does your code have the latest update to the filename stuff (#576)?
If so, can you send me the pecan.xml and workflow.R that are responsible?
Hi @ryankelly-uiuc I updated my code base yesterday morning. I was using the default workflow.R and pecan.xml that is generated through the interface:
Workflow.R
#!/usr/bin/env Rscript
args <- commandArgs(trailingOnly = TRUE)
#-------------------------------------------------------------------------------
# Copyright (c) 2012 University of Illinois, NCSA.
# All rights reserved. This program and the accompanying materials
# are made available under the terms of the
# University of Illinois/NCSA Open Source License
# which accompanies this distribution, and is available at
# http://opensource.ncsa.illinois.edu/license.html
#-------------------------------------------------------------------------------
# ----------------------------------------------------------------------
# Load required libraries
# ----------------------------------------------------------------------
library(PEcAn.all)
library(RCurl)
#--------------------------------------------------------------------------------#
# Functions used to write STATUS used by history
#--------------------------------------------------------------------------------#
status.start <- function(name) {
cat(paste(name,
format(Sys.time(), "%F %T"), sep="\t"),
file=file.path(settings$outdir, "STATUS"), append=TRUE)
}
status.end <- function(status="DONE") {
cat(paste("",
format(Sys.time(), "%F %T"),
status,
"\n", sep="\t"),
file=file.path(settings$outdir, "STATUS"), append=TRUE)
}
status.skip <- function(name) {
cat(paste(name,
format(Sys.time(), "%F %T"),
"",
format(Sys.time(), "%F %T"),
"SKIPPED",
"\n", sep="\t"),
file=file.path(settings$outdir, "STATUS"), append=TRUE)
}
options(warn=1)
options(error=quote({
status.end("ERROR")
if (!interactive()) {
q()
}
}))
#options(warning.expression=status.end("ERROR"))
# ----------------------------------------------------------------------
# PEcAn Workflow
# ----------------------------------------------------------------------
# Open and read in settings file for PEcAn run.
if (is.na(args[1])){
settings <- read.settings("pecan.xml")
} else {
settings.file = args[1]
settings <- read.settings(settings.file)
}
if (length(which(commandArgs() == "--continue")) == 0) {
# Remove existing STATUS file
file.remove(file.path(settings$outdir, "STATUS"))
#unlink(file.path(settings$outdir, "STATUS"))
# Do conversions
for(i in 1:length(settings$run$inputs)) {
input <- settings$run$inputs[[i]]
if (is.null(input)) next
input.tag <- names(settings$run$input)[i]
# fia database
if (input['input'] == 'fia') {
status.start("FIA2ED")
fia.to.psscss(settings)
status.end()
}
# met conversion
if(input.tag == 'met') {
if (is.null(input$path)) {
if (is.null(settings$browndog)) {
status.start("MET Process")
} else {
status.start("BrownDog")
}
result <- PEcAn.data.atmosphere::met.process(
site = settings$run$site,
input_met = settings$run$inputs$met,
start_date = settings$run$start.date,
end_date = settings$run$end.date,
model = settings$model$type,
host = settings$run$host,
dbparms = settings$database$bety,
dir = settings$run$dbfiles,
browndog = settings$browndog)
settings$run$inputs[[i]][['path']] <- result
status.end()
}
52,0-1 30%
status.end()
}
}
}
saveXML(listToXml(settings, "pecan"), file=file.path(settings$outdir, 'pecan.METProcess.xml'))
# Check status to avoid repeating work
check.status <- function(check.name){
status.file=file.path(settings$outdir, "STATU")
if (!file.exists(status.file)){
return (0)
}
table <- read.table(status.file, header=FALSE)
for (i in 1: nrow(table))
{
if (table[i,1] == check.name ){
if(table[i, 6] == "DONE"){
return (1)
} else if (table[i,6] == "ERROR"){
return (-1)
} else {
return (0)
}
}
}
return (0)
}
# Query the trait database for data and priors
if (check.status("TRAIT") == 0){
status.start("TRAIT")
settings$pfts <- get.trait.data(settings$pfts, settings$model$type, settings$run$dbfiles, settings$database$bety, settings$meta.analysis$update)
saveXML(listToXml(settings, "pecan"), file=file.path(settings$outdir, 'pecan.TRAIT.xml'))
status.end()
}
# Run the PEcAn meta.analysis
if (check.status("META") == 0){
status.start("META")
if('meta.analysis' %in% names(settings)) {
run.meta.analysis(settings$pfts, settings$meta.analysis$iter, settings$meta.analysis$random.effects, settings$meta.analysis$threshold, settings$run$dbfiles, settings$database$bety)
}
status.end()
}
# Write model specific configs
if (check.status("CONFIG") == 0){
status.start("CONFIG")
settings <- run.write.configs(settings, write=settings$database$bety$write, ens.sample.method="halton")
saveXML(listToXml(settings, "pecan"), file=file.path(settings$outdir, 'pecan.CONFIGS.xml'))
status.end()
}
if (length(which(commandArgs() == "--advanced")) != 0) {
status.start("ADVANCED")
q();
}
}
# Start ecosystem model runs
if (check.status("MODEL") == 0){
status.start("MODEL")
start.model.runs(settings, settings$database$bety$write)
status.end()
}
# Get results of model runs
if (check.status("OUTPUT") == 0){
status.start("OUTPUT")
get.results(settings)
status.end()
}
# Run ensemble analysis on model output.
if (check.status("ENSEMBLE") == 0){
status.start("ENSEMBLE")
run.ensemble.analysis(TRUE)
status.end()
}
# Run sensitivity analysis and variance decomposition on model output
if (check.status("SENSITIVITY") == 0){
status.start("SENSITIVITY")
run.sensitivity.analysis()
status.end()
}
# Run parameter data assimilation
if(('assim.batch' %in% names(settings))) {
status.start("PDA")
settings$assim.batch <- pda.mcmc(settings)
status.end()
}
# Pecan workflow complete
if (check.status("FINISHED") == 0){
status.start("FINISHED")
db.query(paste("UPDATE workflows SET finished_at=NOW() WHERE id=", settings$workflow$id, "AND finished_at IS NULL"), params=settings$database$bety)
status.end()
}
# Send email if configured
if (!is.null(settings$email) && !is.null(settings$email$to) && (settings$email$to != "")) {
sendmail(settings$email$from, settings$email$to,
paste0("Workflow has finished executing at ", date()),
paste0("You can find the results on ", settings$email$url))
}
# Write end time in database
if (settings$workflow$id != 'NA') {
db.query(paste0("UPDATE workflows SET finished_at=NOW() WHERE id=", settings$workflow$id, " AND finished_at IS NULL"), params=settings$database$bety)
}
status.end()
db.print.connections()
print("---------- PEcAn Workflow Complete ----------")
and the generated pecan.xml:
<?xml version="1.0"?>
<pecan>
<outdir>/data/Model_Output/pecan.output/PEcAn_2000000002</outdir>
<database>
<bety>
<user>bety</user>
<password>bety</password>
<host>localhost</host>
<dbname>bety</dbname>
<driver>PostgreSQL</driver>
<write>TRUE</write>
</bety>
</database>
<pfts>
<pft>
<name>temperate.deciduous</name>
<constants>
<num>1</num>
</constants>
<outdir>/data/Model_Output/pecan.output/PEcAn_2000000002/pft/temperate.deciduous</outdir>
</pft>
</pfts>
<meta.analysis>
<iter>3000</iter>
<random.effects>FALSE</random.effects>
<threshold>1.2</threshold>
<update>AUTO</update>
</meta.analysis>
<ensemble>
<size>5</size>
<notes></notes>
<variable>NPP</variable>
<start.year>2003</start.year>
<end.year>2003</end.year>
</ensemble>
<model>
<id>10</id>
<type>SIPNET</type>
<revision>unk</revision>
<binary>/data/software/sipnet/sipnet.runk</binary>
</model>
<workflow>
<id>2000000002</id>
</workflow>
<run>
<site>
<id>676</id>
<met.start>1998-01-01 00:00:00</met.start>
<met.end>2006-12-31 00:00:00</met.end>
<name>Willow Creek (US-WCr)</name>
<lat>45.92</lat>
<lon>-90.45</lon>
</site>
<inputs>
<met>
<id>2000000001</id>
<path>/data/Model_Data/sites/US-WCr/US-WCr.clim</path>
</met>
</inputs>
<start.date>2003/01/01</start.date>
<end.date>2003/12/31</end.date>
<dbfiles>/data/Model_Output/pecan.output/dbfiles</dbfiles>
<host>
<name>localhost</name>
<rundir>/data/Model_Output/pecan.output/PEcAn_2000000002/run</rundir>
<outdir>/data/Model_Output/pecan.output/PEcAn_2000000002/out</outdir>
</host>
</run>
<rundir>/data/Model_Output/pecan.output/PEcAn_2000000002/run</rundir>
<modeloutdir>/data/Model_Output/pecan.output/PEcAn_2000000002/out</modeloutdir>
</pecan>
Plus there is a new set of xmls (trait, METProcess and configs)
@ryankelly-uiuc Note. I just ran it again (same site, etc) but added more than 1 ensemble member and it appears to be avoiding this bug.
I take it back, still errors:
TRAIT 2015-07-29 09:42:36 2015-07-29 09:42:37 DONE
META 2015-07-29 09:42:37 2015-07-29 09:43:21 DONE
CONFIG 2015-07-29 09:43:21 2015-07-29 09:43:30 DONE
ADVANCED 2015-07-29 09:43:30 2015-07-29 09:44:19 DONE
MODEL 2015-07-29 09:44:21 2015-07-29 09:49:46 DONE
OUTPUT 2015-07-29 09:49:46 2015-07-29 09:49:48 DONE
ENSEMBLE 2015-07-29 09:49:48 2015-07-29 09:49:48 ERROR
Happens after compilation of model output into PEcAn netCDF format during the ensemble run. The MCMC sampling, etc works fine.
[1] "Years: 2003 - 2003"
[1] "----- Mean NPP : -1566.03846104431"
[1] "----- Median NPP : -876.581279533208"
2015-07-29 09:49:48 INFO [read.ensemble.output] :
reading ensemble output from run id: 2000000057
[1] "Years: 2003 - 2003"
[1] "----- Mean NPP : -959.436223561038"
[1] "----- Median NPP : -876.581279533208"
2015-07-29 09:49:48 INFO [read.ensemble.output] :
reading ensemble output from run id: 2000000058
[1] "Years: 2003 - 2003"
[1] "----- Mean NPP : -380.152091057486"
[1] "----- Median NPP : -876.581279533208"
2015-07-29 09:49:48 INFO [read.ensemble.output] :
reading ensemble output from run id: 2000000059
[1] "Years: 2003 - 2003"
[1] "----- Mean NPP : 87.0577422949539"
[1] "----- Median NPP : -876.581279533208"
2015-07-29 09:49:48 INFO [read.ensemble.output] :
reading ensemble output from run id: 2000000060
[1] "Years: 2003 - 2003"
[1] "----- Mean NPP : 433.687611494209"
[1] "----- Median NPP : -876.581279533208"
>
> # Run ensemble analysis on model output.
> if (check.status("ENSEMBLE") == 0){
+ status.start("ENSEMBLE")
+ run.ensemble.analysis(TRUE)
+ status.end()
+ }
[1] "----- Variable: NPP"
Warning in readChar(con, 5L, useBytes = TRUE) :
cannot open compressed file '/data/Model_Output/pecan.output/PEcAn_2000000003/ensemble.output.2000000003.NPP.2003.2003.Rdata', probable reason 'No such file or directory'
Error in readChar(con, 5L, useBytes = TRUE) : cannot open the connection
Calls: run.ensemble.analysis -> load -> readChar
> proc.time()
user system elapsed
315.590 12.614 328.640
Again, the file isn't found as it has an incorrect filename
-rw-rw-r--. 1 apache test 236 Jul 29 09:49 ensemble.output.NOENSEMBLEID.NPP.2003.2003.Rdata
-rw-rw-r--. 1 apache test 2341 Jul 29 09:43 ensemble.samples.2000000003.Rdata
drwxrwsr-x. 57 apache test 4096 Jul 29 09:43 out
-rw-rw-r--. 1 apache test 2140 Jul 29 09:43 pecan.CONFIGS.xml
-rw-rw-r--. 1 apache test 2019 Jul 29 09:42 pecan.METProcess.xml
-rw-rw-r--. 1 apache test 2060 Jul 29 09:42 pecan.TRAIT.xml
-rw-rw-r--. 1 apache test 2019 Jul 29 09:44 pecan.xml
drwxrwsr-x. 3 apache test 40 Jul 29 09:42 pft
drwxrwsr-x. 57 apache test 4096 Jul 29 09:43 run
-rw-rw-r--. 1 apache test 711466 Jul 29 09:43 samples.Rdata
-rw-rw-r--. 1 apache test 819 Jul 29 09:49 sensitivity.output.NOENSEMBLEID.NPP.2003.2003.Rdata
-rw-rw-r--. 1 apache test 1241 Jul 29 09:43 sensitivity.samples.2000000002.Rdata
-rw-rw-r--. 1 apache test 372 Jul 29 09:49 STATUS
-rw-rw-r--. 1 apache test 30953 Jul 29 09:49 workflow2.Rout
-rw-rw-r--. 1 apache test 6877 Jul 29 09:42 workflow.R
-rw-rw-r--. 1 apache test 27035 Jul 29 09:43 workflow.Rout
Note ensemble.samples DOES have the run ID in the filename. Any ideas on a fix? I could try and sift through code, but a suggested starting point would be helpful.
OK, looking into this. Weird that some of the files have the ID but some don't. I'll keep you posted.
OK. TO double check, I just ran everything locally (i.e. through my login at command) and it worked ok. Strange.
Also, do you know why did we removed the workflow.R from /scripts? I ask as I found the latest revision of this in the /web but wondered why we would have users look in there for an example workflow file. Is there a new suggested/default way to get folks started running through command? I just figured we leave that there as an example to get people up and running.
Others can chime in, but my understanding is that it is just asking for errors if we maintain multiple workflow.R that are supposed to do the same thing. E.g. the web/ and scripts/ versions were quite out of sync when I compared them recently and removed the latter (at @mdietze 's request). We could symlink to between the dirs if you just think it's weird to send someone in web/ to find the script.
That said, the whole point was to avoid stuff working at command prompt but not via web (or vice versa), which is apparently exactly what's happening for you anyway. Great :)
@ryankelly-uiuc No, its fine I was just trying to make sure I wasn't missing something. I think a symlink would also be fine [I like that idea but happy to defer to leaving in the web dir only if that is preferred] Now that I know they were out of sync it clarifies a few issues I was having previously!
I agree with the symlink, but let me me do a good merge between the two of them (#483). I want to add some magic as well so it will be able to skip some steps in the workflow if you ask it to.
@robkooper sounds good, but note that I already removed scripts/workflow.R
(#576). I diffed with web/
version first and didn't see anything we'd miss.
@ryankelly-uiuc I just updated and ran the web interface with Sipnet. Happy to report it finished successfully. Here is the output folder:
-rw-rw-r--. 1 apache test 4694 Aug 6 10:57 ensemble.analysis.2000000004.NPP.2003.2003.pdf
-rw-rw-r--. 1 apache test 116 Aug 6 10:57 ensemble.output.2000000004.NPP.2003.2003.Rdata
-rw-rw-r--. 1 apache test 706 Aug 6 10:57 ensemble.samples.2000000004.Rdata
-rw-rw-r--. 1 apache test 93925 Aug 6 10:57 ensemble.ts.2000000004.NPP.2003.2003.pdf
-rw-rw-r--. 1 apache test 5283 Aug 6 10:57 ensemble.ts.2000000004.NPP.2003.2003.Rdata
-rw-rw-r--. 1 apache test 12305 Aug 6 10:57 ensemble.ts.analysis.2000000004.NPP.2003.2003.Rdata
drwxrwsr-x. 3 apache test 31 Aug 6 10:57 out
-rw-rw-r--. 1 apache test 1875 Aug 6 10:57 pecan.CONFIGS.xml
-rw-rw-r--. 1 apache test 1794 Aug 6 10:56 pecan.METProcess.xml
-rw-rw-r--. 1 apache test 1835 Aug 6 10:56 pecan.TRAIT.xml
-rw-rw-r--. 1 apache test 1794 Aug 6 10:56 pecan.xml
drwxrwsr-x. 3 apache test 40 Aug 6 10:56 pft
drwxrwsr-x. 3 apache test 50 Aug 6 10:57 run
-rw-rw-r--. 1 apache test 708917 Aug 6 10:57 samples.Rdata
-rw-rw-r--. 1 apache test 456 Aug 6 10:57 STATUS
-rw-rw-r--. 1 apache test 6868 Aug 6 10:56 workflow.R
-rw-rw-r--. 1 apache test 29861 Aug 6 10:57 workflow.Rout
Great news, @serbinsh. So does that mean this issue is resolved, aside from the unrelated discussion about symlinking workflow.R
? (Regarding that, @robkooper wanted to edit workflow.R
first, though I don't think that precludes creating the symlink, since web/workflow.R
is deleted already from the mainline.)
I guess not as I am still having ensemble errors, I think only when I use advanced options.
> # Run ensemble analysis on model output.
> if (check.status("ENSEMBLE") == 0){
+ status.start("ENSEMBLE")
+ run.ensemble.analysis(TRUE)
+ status.end()
+ }
[1] "----- Variable: GPP"
Warning in readChar(con, 5L, useBytes = TRUE) :
cannot open compressed file '/data/Model_Output/pecan.output/PEcAn_2000000006/ensemble.output.2000000007.GPP.2004.2004.Rdata', probable reason 'No such file or directory'
Error in readChar(con, 5L, useBytes = TRUE) : cannot open the connection
Calls: run.ensemble.analysis -> load -> readChar
> proc.time()
user system elapsed
4148.683 37.985 4188.238
Can you verify all the .nc model output files were generated but the RData was not?
Just to chime in... @serbinsh since you said on the PEcAn call yesterday that you're working through some other issues too, I'm not going to attempt to reproduce this error for now. Keep me posted though.
Yup. I will let you know. I expect to make some minor updates today and push them up. At that time I should know if it is working well or if I am still getting errors.
Shawn
[iPhone]
On Aug 18, 2015, at 8:44 AM, ryankelly-uiuc notifications@github.com<mailto:notifications@github.com> wrote:
Just to chime in... @serbinshhttps://github.com/serbinsh since you said on the PEcAn call yesterday that you're working through some other issues too, I'm not going to attempt to reproduce this error for now. Keep me posted though.
— Reply to this email directly or view it on GitHubhttps://github.com/PecanProject/pecan/issues/589#issuecomment-132195045.
I think this has been fixed
After an update I ran an ensemble run to test ability to run SIPNET from the web interface. The model runs completed successfully but the ensemble analysis failed. I think this is related to ensemble ID. Here is the output folder on the server:
Here is the log:
Note the file is labeled: ensemble.output.NOENSEMBLEID.NPP.2003.2003.Rdata but PEcAn is looking for: ensemble.output.2000000001.NPP.2003.2003.Rdata
Is there something new I need to do so that the ensemble ID is properly added to the file name? @ryankelly-uiuc was this something you were working on?