Skip to content

Commit

Permalink
Merge pull request #579 from ryankelly-uiuc/lastminutetweaks
Browse files Browse the repository at this point in the history
Last minute tweaks
  • Loading branch information
mdietze committed Jul 22, 2015
2 parents 5e0c818 + 3bf3b59 commit 675c8e8
Show file tree
Hide file tree
Showing 8 changed files with 208 additions and 4 deletions.
12 changes: 11 additions & 1 deletion modules/assim.batch/R/pda.mcmc.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ pda.mcmc <- function(settings, params.id=NULL, param.names=NULL, prior.id=NULL,
}

## Load priors
prior <- pda.load.priors(settings, con)
temp <- pda.load.priors(settings, con)
prior <- temp$prior
settings <- temp$settings
pname <- rownames(prior)
n.param.all <- nrow(prior)

Expand Down Expand Up @@ -119,6 +121,10 @@ pda.mcmc <- function(settings, params.id=NULL, param.names=NULL, prior.id=NULL,
showWarnings=F, recursive=T)
}

## save updated settings XML. Will be overwritten at end, but useful in case of crash
saveXML(listToXml(settings, "pecan"), file=file.path(settings$outdir,
paste0('pecan.pda', settings$assim.batch$ensemble.id, '.xml')))

## --------------------------------- Main MCMC loop --------------------------------- ##
for(i in start:finish){
logger.info(paste("Data assimilation MCMC iteration",i,"of",finish))
Expand All @@ -127,6 +133,10 @@ pda.mcmc <- function(settings, params.id=NULL, param.names=NULL, prior.id=NULL,
if(i %% settings$assim.batch$jump$adapt < 1){
settings <- pda.adjust.jumps(settings, accept.rate, pnames=pname[prior.ind])
accept.rate <- numeric(n.param)

# Save updated settings XML. Will be overwritten at end, but useful in case of crash
saveXML(listToXml(settings, "pecan"), file=file.path(settings$outdir,
paste0('pecan.pda', settings$assim.batch$ensemble.id, '.xml')))
}

for(j in 1:n.param){
Expand Down
84 changes: 84 additions & 0 deletions modules/assim.batch/R/pda.mcmc.recover.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
##' Clean up a failed PDA run
##'
##' @title Clean up a failed PDA run
##' @param all params are the identically named variables in pda.mcmc / pda.emulator
##'
##' @return An updated settings list
##'
##' @author Ryan Kelly
##' @export

# This is just a quick kludgey version, that relies on temporary files to recover a failed pda.mcmc() call. It writes all outputs based on whatever runs were done, and returns the same updated settings list that would have been returned if the run completed. So, recover like this:
#
# read.settings(path/to/original/settings/file)
# settings$assim.batch <- pda.mcmc.recover(settings) # wrap up unfinished run
# settings$assim.batch <- pda.mcmc(settings) # start new pda
pda.mcmc.recover <- function(settings, params.id=NULL, param.names=NULL, prior.id=NULL, chain=NULL,
iter=NULL, adapt=NULL, adj.min=NULL, ar.target=NULL, jvar=NULL, n.knot=NULL) {

if(FALSE){
params.id <- param.names <- prior.id <- chain <- iter <- NULL
n.knot <- adapt <- adj.min <- ar.target <- jvar <- NULL
}

require(coda)

## Handle settings
settings <- pda.settings(
settings=settings, params.id=params.id, param.names=param.names,
prior.id=prior.id, chain=chain, iter=iter, adapt=adapt,
adj.min=adj.min, ar.target=ar.target, jvar=jvar, n.knot=n.knot)

## Open database connection
if(settings$database$bety$write){
con <- try(db.open(settings$database$bety), silent=TRUE)
if(is.character(con)){
con <- NULL
}
} else {
con <- NULL
}

## Load priors
prior <- pda.load.priors(settings, con)$prior
pname <- rownames(prior)
n.param.all <- nrow(prior)

# Get start and finish
params.dummy <- pda.init.params(settings, con, pname, n.param.all)
start <- params.dummy$start
finish <- params.dummy$finish

## Select parameters to constrain
prior.ind <- which(rownames(prior) %in% settings$assim.batch$param.names)
n.param <- length(prior.ind)

## Get the workflow id
if ("workflow" %in% names(settings)) {
workflow.id <- settings$workflow$id
} else {
workflow.id <- -1
}

## Get ensemble id from diagnostic plot dir
ens.ids <- as.numeric(sub("diag.pda", "", dir(settings$outdir, "diag.pda")))
settings$assim.batch$ensemble.id <- as.character(max(ens.ids))


## Load up temp file to recreate params
params = as.matrix(read.table(file.path(settings$outdir, "pda.mcmc.txt")))
colnames(params) <- pname

## Update iters
settings$assim.batch$iter <- finish - nrow(params)

## Save outputs to plots, files, and db
settings <- pda.postprocess(settings, con, params, pname, prior, prior.ind)

## close database connection
if(!is.null(con)) db.close(con)

## Output an updated settings list
return(settings$assim.batch)

} ## end pda.mcmc
2 changes: 1 addition & 1 deletion modules/assim.batch/R/pda.utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ pda.load.priors <- function(settings, con) {
"] but no prior found!"))
}

return(prior.out)
return(list(prior=prior.out, settings=settings))
}


Expand Down
93 changes: 93 additions & 0 deletions modules/assim.batch/vignettes/AssimBatchVignette.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
---
title: "PEcAn.assim.batch Vignette"
author: "Ryan Kelly"
date: "July 12, 2015"
output: html_document
---

## install package from Github

Only needs to be done the first time
```{r}
library(devtools)
install_github("PecanProject/pecan",subdir="all")
```


## Add <assim.batch> tags to pecan.xml

The easiest way to run PEcAn's parameter data assimilation is to add an `<assim.batch>` block to pecan.xml, load the file with `read.settings`, and pass the resulting settings object to `pda.mcmc()`. Here is an example `<assim.batch>` block:

```
<assim.batch>
<iter>100</iter>
<prior>
<path>/path/to/(prior/post).distns.Rdata</path>
</prior>
<param.names>
<param>Amax</param>
</param.names>
<inputs>
<file>
<id>1000000358</id>
<format>Ameriflux.L4</format>
<data.model>
<likelihood>Laplace</likelihood>
<variable.id>297</variable.id>
</data.model>
</file>
</inputs>
<jump>
<ar.target>0.5</ar.target>
<adapt>25</adapt>
<jvar>
<jvar>0.1</jvar>
</jvar>
<adj.min>0.1</adj.min>
</jump>
<diag.plot.iter>25</diag.plot.iter>
</assim.batch>
```


Here are details about the settings:


* `<iter>` Specifies the number of MCMC iterations to run. If continuing a previous MCMC, this is the number of additional iterations, which will be added to the previous total. Defaults to 100 if missing. Ignored by pda.emulator().
* `<prior>` Identifies the prior to be used for PDA. Can be one of either:
+ `<posterior.id>` A posterior ID in BETY specifying the posterior from a previous PEcAn analysis (e.g., meta-analysis or previous PDA) to be used as the prior for PDA. Defaults to the most recent relevant posterior in the database if omitted (and no `<path>` specified instead; see below).
+ `<path>` As an alternative to using a posterior ID, can specify a file path to either a `prior.distns.Rdata` or `post.distns.Rdata` file generated from an earlier analysis. Conceptually, using a posterior distribution as the prior for PDA is preferred, as this allows the multiple analyses to work together to iteratively constrain parameters. In practice, previous analyses may have over-constrained parameters to ranges that do not actually optimize model outputs, so using a less informative prior for PDA might yield better results.
* `<param.names>` The names of parameters to be constrained by assimilation, listed in individual `<param>` tags. These must be the standard names given by the trait dictionary, i.e.:

```
data(trait.dictionary, package = "PEcAn.utils")
trait.dictionary[,c("id", "figid")]
```

* `<inputs>` Observation data to be compared to the model. In principle, can be one or more datasets, specified in a variety of ways. In practice, the code is only tested for a single dataset currently, and assumes the input is Ameriflux NEE.
+ `<file>` Denotes a set of tags for a single input. Would be repeated for multiple datasets once that functionality is supported.
+ `<id>` BETY input ID for looking up the input. Will be used preferentially over `<path>` or `<source>` (below).
+ `<path>` File path to the input. Only used if no `<id>` is given.
+ `<source>` A standardized source of input data (e.g., Ameriflux). Not implemented yet, but the idea would be similar to the met workflow, PEcAn would be able to use standard data sources automatically where available. Only used if no `<id>` or `<path>` is given.
+ `<format>` An identifier to tell PEcAn how to handle the input. Currently allows either `Ameriflux.L2` or `Ameriflux.L4`.
+ `<data.model>` Block for specifying the data model to be used with this dataset. Currently ignored since all data are assumed to be Ameriflux NEE and treated identically.
+ `<likelihood>` Identifier for the likelihood to use. E.g., the Ameriflux data use a Laplacian likelihood.
+ `<variable.id>` The BETY variable ID associated with this dataset. The idea is that specific preprocessing steps (e.g., estimating heteroskedastic error for tower NEE) would be associated with particular IDs. Could automate further by assigning default `<likelihood>` to variable.id values (allowing `<likelihood>` to be omitted from pecan.xml). And/or could add a separate tag `<preprocess>` that could specify a function to override any default associated with the variable ID.
* `<jump>`
+ `<ar.target>` Target acceptance rate for the adaptive jump algorithm. Defaults to 0.5 if missing.
+ `<adapt>` Number of iterations between jump variance adaptations. Defaults to `floor(iter/10)` if missing.
+ `<jvar>` Initial jump variances for proposing parameter values, listed in individual `<jvar>` tags (one for each `<param>` specified in `<param.names>`). Will be adjusted adaptively. Defaults to 1/10 the prior variance if missing.
+ `<adj.min>` Minimum factor by which to reduce jump variance when adapting. Prevents jump variances from degenerating to 0. Defaults to 0.1 if missing.
* `<diag.plot.iter>` Interval between saving diagnostic plots. Omit or set to NULL to skip them.
* `<params.id>` (Not shown.) A BETY dbfile ID for an MCMC output from previous PDA. If specified, that file is loaded, the new MCMC starts from the last parameter values of the previous, and when finished the extended chain is saved as a new output. If missing, then MCMC starts fresh from prior median parameter values. Regardless, the MCMC parameter values of the PDA are saved to file and inserted in BETY, and the new dbfile ID is inserted into `<params.id>`. The `pda.mcmc()` funtion returns the `<assim.batch>` settings, which can then be saved. Then, calling a new round of PDA using these returned settings will automatically continue the previous MCMC.



## Run PDA







2 changes: 1 addition & 1 deletion modules/uncertainty/R/run.ensemble.analysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ read.ensemble.ts <- function(model, ensemble.id=NULL, variable=NULL, start.year=
# Can specify ensemble ids manually. If not, look in settings. If none there, just look in samples.Rdata, which for backwards compatibility still contains the sample info for (the most recent) sensitivity and ensemble analysis combined.
if(!is.null(ensemble.id)) {
fname <- ensemble.filename(settings, "ensemble.samples", "Rdata",
ensemble.id=ens.ensemble.id, all.var.yr=TRUE)
ensemble.id=ensemble.id, all.var.yr=TRUE)
} else if(!is.null(settings$ensemble$ensemble.id)) {
ensemble.id <- settings$ensemble$ensemble.id
fname <- ensemble.filename(settings, "ensemble.samples", "Rdata",
Expand Down
15 changes: 15 additions & 0 deletions scripts/workflow.pda.recover.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
library(PEcAn.all)

# Insert settings file path here. Use the same one supplied to the original failed workflow
settings.file = ""

# Read in settings
settings <- read.settings(settings.file)

# Clean and wrap up failed PDA MCMC
settings$assim.batch <- pda.mcmc.recover(settings)

# If desired, now proceed to complete the run
status.start("PDA")
settings$assim.batch <- pda.mcmc(settings)
status.end()
3 changes: 3 additions & 0 deletions utils/R/run.write.configs.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ run.write.configs <- function(settings, write = TRUE, ens.sample.method="halton"
if(!is.na(posterior.files[i])) {
# Load specified file
load(file.path(outdirs[i], posterior.files[i]))
if(!exists('prior.distns') & exists('post.distns')) {
prior.distns <- post.distns
}
} else {
# Default to most recent posterior in the workflow, or the prior if there is none
fname = file.path(outdirs[i], 'post.distns.Rdata')
Expand Down
1 change: 0 additions & 1 deletion web/workflow.R
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,6 @@ if (check.status("SENSITIVITY") == 0){
if(('assim.batch' %in% names(settings))) {
status.start("PDA")
settings$assim.batch <- pda.mcmc(settings)
saveXML(listToXml(settings, "pecan"), file=file.path(settings$outdir, 'pecan.pda.xml'))
status.end()
}

Expand Down

0 comments on commit 675c8e8

Please sign in to comment.