From 4dd4e7e3bd681e58b900ad337778bdfab72036ed Mon Sep 17 00:00:00 2001
From: abigailkeller <abigail.g.keller1@gmail.com>
Date: Fri, 13 Sep 2024 06:21:11 +0000
Subject: [PATCH] fixing inits update

---
 .Rhistory      | 964 ++++++++++++++++++++++++-------------------------
 R/jointModel.R |   2 +-
 2 files changed, 483 insertions(+), 483 deletions(-)

diff --git a/.Rhistory b/.Rhistory
index 6e6a41b..675406f 100644
--- a/.Rhistory
+++ b/.Rhistory
@@ -1,512 +1,512 @@
-paste0('https://bookdown.org/abigailkeller/eDNAjoint_',
-'vignette/usecase1.html#initialvalues'),
-sep='\n'))
-#41. initial values check: beta numeric
-n.chain <- 4
+data <- readRDS('../neha_data.rds')
+# initial values should be a list of named lists
 inits <- list()
 for(i in 1:n.chain){
 inits[[i]] <- list(
-mu <- stats::runif(2,0,1),
-p10 <- stats::runif(1,log(0.0001),log(0.08)),
-beta <- "1"
+mu_trad = rowMeans(data$count,na.rm=TRUE)
 )
-names(inits[[i]]) <- c('mu','p10','beta')
 }
-expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)),
-qPCR.N = rbind(c(3,3,3),c(3,3,NA)),
-count = rbind(c(4,1,1),c(1,1,NA))),
-initial_values = inits,
-multicore = FALSE),
-paste("Initial values for 'beta' should be numeric.",
-paste0('See the eDNAjoint guide for help formatting ',
-'initial values: '),
-paste0('https://bookdown.org/abigailkeller/eDNAjoint_',
-'vignette/usecase1.html#initialvalues'),
-sep='\n'))
-#42. initial values check: beta length
-n.chain <- 4
-inits <- list()
-for(i in 1:n.chain){
-inits[[i]] <- list(
-mu <- stats::runif(2,0,1),
-p10 <- stats::runif(1,log(0.0001),log(0.08)),
-beta <- c(1,0)
-)
-names(inits[[i]]) <- c('mu','p10','beta')
+cov = NULL
+family = 'poisson'
+p10priors = c(1,200)
+q = FALSE
+phipriors = NULL
+multicore = TRUE
+initial_values = NULL
+n.chain = 4
+n.iter.burn = 10000
+n.iter.sample = 2500
+thin = 1
+adapt_delta = 0.99
+verbose = TRUE
+seed = NULL
+###
+#convert data to long format
+'%>%' <- magrittr::`%>%`
+# convert qPCR data to long format
+#' @srrstats {G2.7} Use as.data.frame() to allow input list of any tabular
+#'   form (i.e., matrix, etc.)
+qPCR_all <- as.data.frame(data$qPCR.N) %>%
+dplyr::mutate(L = 1:dim(data$qPCR.N)[1]) %>%
+tidyr::pivot_longer(cols =! L,values_to = 'N') %>%
+#' @srrstats {G2.15} Software does not assume non-missingness and actually
+#'   expects it if the number of observations across sites is unequal
+tidyr::drop_na()
+qPCR.K_df <- as.data.frame(data$qPCR.K) %>%
+dplyr::mutate(L = 1:dim(data$qPCR.K)[1]) %>%
+tidyr::pivot_longer(cols =! L,values_to = 'K') %>%
+#' @srrstats {G2.15} Software does not assume non-missingness and actually
+#'   expects it if the number of observations across sites is unequal
+tidyr::drop_na()
+qPCR_all$K <- qPCR.K_df$K
+# convert count data to long format
+#' @srrstats {G2.7} Use as.data.frame() to allow input list of any tabular
+#'   form (i.e., matrix, etc.)
+count_all <- as.data.frame(data$count) %>%
+dplyr::mutate(L = 1:dim(data$count)[1]) %>%
+tidyr::pivot_longer(cols =! L,values_to = 'count') %>%
+#' @srrstats {G2.15} Software does not assume non-missingness and actually
+#' expects it if the number of observations across sites is unequal
+tidyr::drop_na()
+# subset count data to remove sites without traditional samples
+count_df <- as.data.frame(data$count)
+sub_count <- count_df[rowSums(is.na(count_df)) != ncol(count_df), ]
+# add site index to count data
+index_match <- as.data.frame(cbind(unique(count_all$L),
+1:dim(sub_count)[1]))
+colnames(index_match) <- c('L','R')
+count_all <- dplyr::left_join(count_all,index_match,by='L')
+# get site indices with paired and unpaired dna samples
+trad_ind <- index_match$L
+dna_ind <- unique(qPCR_all$L)[!unique(qPCR_all$L)%in% trad_ind]
+# subset qPCR data -- paired
+qPCR_all_trad <- qPCR_all[qPCR_all$L %in% trad_ind,]
+L_match_trad <- as.data.frame(cbind(unique(qPCR_all_trad$L),
+1:length(unique(qPCR_all_trad$L))))
+colnames(L_match_trad) <- c('L','L_unique')
+qPCR_all_trad <- dplyr::left_join(qPCR_all_trad,L_match_trad,by='L')
+# subset qPCR data -- unpaired
+if(length(dna_ind)>0){
+qPCR_all_dna <- qPCR_all[qPCR_all$L %in% dna_ind,]
+L_match_dna <- as.data.frame(cbind(unique(qPCR_all_dna$L),
+1:length(unique(qPCR_all_dna$L))))
+colnames(L_match_dna) <- c('L','L_unique')
+qPCR_all_dna <- dplyr::left_join(qPCR_all_dna,L_match_dna,by='L')
+} else {
+qPCR_all_dna <- as.data.frame(matrix(NA,nrow=0,ncol=4))
+colnames(qPCR_all_dna) <- c('L','N','K','L_unique')
 }
-expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)),
-qPCR.N = rbind(c(3,3,3),c(3,3,NA)),
-count = rbind(c(4,1,1),c(1,1,NA))),
-initial_values = inits,
-multicore = FALSE),
-paste("The length of initial values for 'beta' should equal 1.",
-paste0('See the eDNAjoint guide for help formatting ',
-'initial values: '),
-paste0('https://bookdown.org/abigailkeller/eDNAjoint_',
-'vignette/usecase1.html#initialvalues'),
-sep='\n'))
-#43. initial values check: alpha numeric
-n.chain <- 4
-inits <- list()
-for(i in 1:n.chain){
-inits[[i]] <- list(
-mu <- stats::runif(2,0,1),
-p10 <- stats::runif(1,log(0.0001),log(0.08)),
-alpha <- c("1","2")
+# convert p10 prior
+# p10 prior: convert beta(1,20) to lognormal distribution
+# moment match from beta(alpha,beta) to normal(mu, sigma^2)
+alpha <- p10priors[1]
+beta <- p10priors[2]
+mu <- alpha/(alpha+beta)
+sigma_2 <- (alpha*beta)/((alpha+beta)^2*(alpha+beta+1))
+# convert normal(mu, sigma^2) to lognormal(mu, sigma^2)
+mu_ln <- log(mu^2/sqrt(mu^2+sigma_2))
+sigma_2_ln <- log(1+sigma_2/mu^2)
+sigma_ln <- sqrt(sigma_2_ln)
+# create data that will be present in all model variations
+data <- list(
+S = nrow(qPCR_all_trad),
+S_dna = nrow(qPCR_all_dna),
+C = nrow(count_all),
+L = qPCR_all_trad$L_unique,
+L_dna = qPCR_all_dna$L_unique,
+R = count_all$R,
+Nloc_dna = length(unique(qPCR_all_dna$L)),
+Nloc_trad = length(unique(qPCR_all_trad$L)),
+trad_ind = trad_ind,
+dna_ind = as.array(dna_ind),
+E = count_all$count,
+N = qPCR_all_trad$N,
+K = qPCR_all_trad$K,
+N_dna = qPCR_all_dna$N,
+K_dna = qPCR_all_dna$K,
+p10priors = c(mu_ln,sigma_ln),
+control = list(adapt_delta = adapt_delta)
 )
-names(inits[[i]]) <- c('mu','p10','alpha')
-}
-site.cov <- rbind(c(4,1),c(1,1))
-colnames(site.cov) <- c('var_a','var_b')
-expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)),
-qPCR.N = rbind(c(3,3,3),c(3,3,NA)),
-count = rbind(c(4,1,1),c(1,1,NA)),
-site.cov = site.cov),
-cov = c('var_a','var_b'), initial_values = inits,
-multicore = FALSE),
-paste("Initial values for 'alpha' should be numeric.",
-paste0('See the eDNAjoint guide for help formatting ',
-'initial values: '),
-paste0('https://bookdown.org/abigailkeller/eDNAjoint_',
-'vignette/usecase2.html#initialvalues'),
-sep='\n'))
-#44. initial values check: alpha length
-n.chain <- 4
+# run model
+out <- rstan::sampling(model,
+data = data,
+cores = 4,
+#' @srrstats {G2.4,G2.4a} explicit conversion to
+#'   integers for sampling arguments
+chains = as.integer(n.chain),
+thin = as.integer(thin),
+warmup = as.integer(n.iter.burn),
+iter = (
+as.integer(n.iter.burn) + as.integer(n.iter.sample)
+),
+init = inits,
+refresh = ifelse(verbose == TRUE,500,0)
+)
+inits
+# initial values should be a list of named lists
 inits <- list()
 for(i in 1:n.chain){
 inits[[i]] <- list(
-mu <- stats::runif(2,0,1),
-p10 <- stats::runif(1,log(0.0001),log(0.08)),
-alpha <- rep(0.1,2)
+mu_trad = rowMeans(data$count,na.rm=TRUE),
+log_p10 = -5,
+beta = 1.5
 )
-names(inits[[i]]) <- c('mu','p10','alpha')
 }
-site.cov <- rbind(c(4,1),c(1,1))
-colnames(site.cov) <- c('var_a','var_b')
-expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)),
-qPCR.N = rbind(c(3,3,3),c(3,3,NA)),
-count = rbind(c(4,1,1),c(1,1,NA)),
-site.cov = site.cov),
-cov = c('var_a','var_b'), initial_values = inits,
-multicore = FALSE),
-paste(paste0("The length of initial values for 'alpha' should ",
-"equal\\: \\# covariates \\+ 1 \\(i.e., ",
-"including intercept\\)."),
-paste0('See the eDNAjoint guide for help formatting ',
-'initial values: '),
-paste0('https://bookdown.org/abigailkeller/eDNAjoint_',
-'vignette/usecase2.html#initialvalues'),
-sep='\n'))
-#45. initial values check: q numeric
-n.chain <- 4
+data <- readRDS('../neha_data.rds')
+# initial values should be a list of named lists
 inits <- list()
 for(i in 1:n.chain){
 inits[[i]] <- list(
-q <- "0.1"
+mu_trad = rowMeans(data$count,na.rm=TRUE),
+log_p10 = -5,
+beta = 1.5
 )
-names(inits[[i]]) <- c('q')
 }
-expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)),
-qPCR.N = rbind(c(3,3,3),c(3,3,NA)),
-count = rbind(c(4,1,1),c(1,1,NA)),
-count.type = rbind(c(1,2,1),c(1,1,NA))),
-initial_values = inits,
-multicore = FALSE),
-paste("Initial values for 'q' should be numeric.",
-paste0('See the eDNAjoint guide for help formatting ',
-'initial values: '),
-paste0('https://bookdown.org/abigailkeller/eDNAjoint_',
-'vignette/usecase1.html#initialvalues'),
-sep='\n'))
-#46. initial values check: q length
-n.chain <- 4
+cov = NULL
+family = 'poisson'
+p10priors = c(1,200)
+q = FALSE
+phipriors = NULL
+multicore = TRUE
+initial_values = NULL
+n.chain = 4
+n.iter.burn = 10000
+n.iter.sample = 2500
+thin = 1
+adapt_delta = 0.99
+verbose = TRUE
+seed = NULL
+###
+#convert data to long format
+'%>%' <- magrittr::`%>%`
+# convert qPCR data to long format
+#' @srrstats {G2.7} Use as.data.frame() to allow input list of any tabular
+#'   form (i.e., matrix, etc.)
+qPCR_all <- as.data.frame(data$qPCR.N) %>%
+dplyr::mutate(L = 1:dim(data$qPCR.N)[1]) %>%
+tidyr::pivot_longer(cols =! L,values_to = 'N') %>%
+#' @srrstats {G2.15} Software does not assume non-missingness and actually
+#'   expects it if the number of observations across sites is unequal
+tidyr::drop_na()
+qPCR.K_df <- as.data.frame(data$qPCR.K) %>%
+dplyr::mutate(L = 1:dim(data$qPCR.K)[1]) %>%
+tidyr::pivot_longer(cols =! L,values_to = 'K') %>%
+#' @srrstats {G2.15} Software does not assume non-missingness and actually
+#'   expects it if the number of observations across sites is unequal
+tidyr::drop_na()
+qPCR_all$K <- qPCR.K_df$K
+# convert count data to long format
+#' @srrstats {G2.7} Use as.data.frame() to allow input list of any tabular
+#'   form (i.e., matrix, etc.)
+count_all <- as.data.frame(data$count) %>%
+dplyr::mutate(L = 1:dim(data$count)[1]) %>%
+tidyr::pivot_longer(cols =! L,values_to = 'count') %>%
+#' @srrstats {G2.15} Software does not assume non-missingness and actually
+#' expects it if the number of observations across sites is unequal
+tidyr::drop_na()
+# subset count data to remove sites without traditional samples
+count_df <- as.data.frame(data$count)
+sub_count <- count_df[rowSums(is.na(count_df)) != ncol(count_df), ]
+# add site index to count data
+index_match <- as.data.frame(cbind(unique(count_all$L),
+1:dim(sub_count)[1]))
+colnames(index_match) <- c('L','R')
+count_all <- dplyr::left_join(count_all,index_match,by='L')
+# get site indices with paired and unpaired dna samples
+trad_ind <- index_match$L
+dna_ind <- unique(qPCR_all$L)[!unique(qPCR_all$L)%in% trad_ind]
+# subset qPCR data -- paired
+qPCR_all_trad <- qPCR_all[qPCR_all$L %in% trad_ind,]
+L_match_trad <- as.data.frame(cbind(unique(qPCR_all_trad$L),
+1:length(unique(qPCR_all_trad$L))))
+colnames(L_match_trad) <- c('L','L_unique')
+qPCR_all_trad <- dplyr::left_join(qPCR_all_trad,L_match_trad,by='L')
+# subset qPCR data -- unpaired
+if(length(dna_ind)>0){
+qPCR_all_dna <- qPCR_all[qPCR_all$L %in% dna_ind,]
+L_match_dna <- as.data.frame(cbind(unique(qPCR_all_dna$L),
+1:length(unique(qPCR_all_dna$L))))
+colnames(L_match_dna) <- c('L','L_unique')
+qPCR_all_dna <- dplyr::left_join(qPCR_all_dna,L_match_dna,by='L')
+} else {
+qPCR_all_dna <- as.data.frame(matrix(NA,nrow=0,ncol=4))
+colnames(qPCR_all_dna) <- c('L','N','K','L_unique')
+}
+# convert p10 prior
+# p10 prior: convert beta(1,20) to lognormal distribution
+# moment match from beta(alpha,beta) to normal(mu, sigma^2)
+alpha <- p10priors[1]
+beta <- p10priors[2]
+mu <- alpha/(alpha+beta)
+sigma_2 <- (alpha*beta)/((alpha+beta)^2*(alpha+beta+1))
+# convert normal(mu, sigma^2) to lognormal(mu, sigma^2)
+mu_ln <- log(mu^2/sqrt(mu^2+sigma_2))
+sigma_2_ln <- log(1+sigma_2/mu^2)
+sigma_ln <- sqrt(sigma_2_ln)
+# create data that will be present in all model variations
+data <- list(
+S = nrow(qPCR_all_trad),
+S_dna = nrow(qPCR_all_dna),
+C = nrow(count_all),
+L = qPCR_all_trad$L_unique,
+L_dna = qPCR_all_dna$L_unique,
+R = count_all$R,
+Nloc_dna = length(unique(qPCR_all_dna$L)),
+Nloc_trad = length(unique(qPCR_all_trad$L)),
+trad_ind = trad_ind,
+dna_ind = as.array(dna_ind),
+E = count_all$count,
+N = qPCR_all_trad$N,
+K = qPCR_all_trad$K,
+N_dna = qPCR_all_dna$N,
+K_dna = qPCR_all_dna$K,
+p10priors = c(mu_ln,sigma_ln),
+control = list(adapt_delta = adapt_delta)
+)
+# run model
+out <- rstan::sampling(model,
+data = data,
+cores = 4,
+#' @srrstats {G2.4,G2.4a} explicit conversion to
+#'   integers for sampling arguments
+chains = as.integer(n.chain),
+thin = as.integer(thin),
+warmup = as.integer(n.iter.burn),
+iter = (
+as.integer(n.iter.burn) + as.integer(n.iter.sample)
+),
+init = inits,
+refresh = ifelse(verbose == TRUE,500,0)
+)
+data <- readRDS('../neha_data.rds')
+# initial values should be a list of named lists
 inits <- list()
 for(i in 1:n.chain){
 inits[[i]] <- list(
-q <- c(0.1,0.1)
+mu_trad = rowMeans(data$count,na.rm=TRUE),
+log_p10 = -5,
+beta = 1.5
 )
-names(inits[[i]]) <- c('q')
 }
-expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)),
-qPCR.N = rbind(c(3,3,3),c(3,3,NA)),
-count = rbind(c(4,1,1),c(1,1,NA)),
-count.type = rbind(c(1,2,1),c(1,1,NA))),
-initial_values = inits,
-multicore = FALSE),
-paste(paste0("The length of initial values for 'q' should ",
-"equal: \\# unique gear types \\- 1 \\(i.e., q ",
-"for reference type = 1\\)."),
-paste0('See the eDNAjoint guide for help formatting ',
-'initial values: '),
-paste0('https://bookdown.org/abigailkeller/eDNAjoint_',
-'vignette/usecase1.html#initialvalues'),
-sep='\n'))
-#47. check length and range of n.chain
-expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)),
-qPCR.N = rbind(c(3,3,3),c(3,3,NA)),
-count = rbind(c(4,1,1),c(1,1,NA)),
-count.type = rbind(c(1,2,1),c(1,1,NA))),
-n.chain  =  c(1,1),multicore = FALSE),
-paste0("n.chain should be an integer > 0 and of length 1."))
-expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)),
-qPCR.N = rbind(c(3,3,3),c(3,3,NA)),
-count = rbind(c(4,1,1),c(1,1,NA)),
-count.type = rbind(c(1,2,1),c(1,1,NA))),
-n.chain = 0,multicore = FALSE),
-paste0("n.chain should be an integer > 0 and of length 1."))
-#48. check length and range of n.iter.sample
-expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)),
-qPCR.N = rbind(c(3,3,3),c(3,3,NA)),
-count = rbind(c(4,1,1),c(1,1,NA)),
-count.type = rbind(c(1,2,1),c(1,1,NA))),
-n.iter.sample = c(1,1),multicore = FALSE),
-paste0("n.iter.sample should be an integer > 0 and of length 1.")
-)
-expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)),
-qPCR.N = rbind(c(3,3,3),c(3,3,NA)),
-count = rbind(c(4,1,1),c(1,1,NA)),
-count.type = rbind(c(1,2,1),c(1,1,NA))),
-n.iter.sample = 0,multicore = FALSE),
-paste0("n.iter.sample should be an integer > 0 and of length 1.")
-)
-#49. check length and range of n.iter.burn
-expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)),
-qPCR.N = rbind(c(3,3,3),c(3,3,NA)),
-count = rbind(c(4,1,1),c(1,1,NA)),
-count.type = rbind(c(1,2,1),c(1,1,NA))),
-n.iter.burn = c(1,1),multicore = FALSE),
-paste0("n.iter.burn should be an integer > 0 and of length 1.")
-)
-expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)),
-qPCR.N = rbind(c(3,3,3),c(3,3,NA)),
-count = rbind(c(4,1,1),c(1,1,NA)),
-count.type = rbind(c(1,2,1),c(1,1,NA))),
-n.iter.burn = 0,multicore = FALSE),
-paste0("n.iter.burn should be an integer > 0 and of length 1.")
-)
-#50. check length and range of thin
-expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)),
-qPCR.N = rbind(c(3,3,3),c(3,3,NA)),
-count = rbind(c(4,1,1),c(1,1,NA)),
-count.type = rbind(c(1,2,1),c(1,1,NA))),
-thin = c(1,1),multicore = FALSE),
-paste0("thin should be an integer > 0 and of length 1.")
-)
-expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)),
-qPCR.N = rbind(c(3,3,3),c(3,3,NA)),
-count = rbind(c(4,1,1),c(1,1,NA)),
-count.type = rbind(c(1,2,1),c(1,1,NA))),
-thin = 0,multicore = FALSE),
-paste0("thin should be an integer > 0 and of length 1.")
-)
-#51. check length and range of adapt_delta
-expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)),
-qPCR.N = rbind(c(3,3,3),c(3,3,NA)),
-count = rbind(c(4,1,1),c(1,1,NA)),
-count.type = rbind(c(1,2,1),c(1,1,NA))),
-adapt_delta = c(0.9,0.9),multicore = FALSE),
-paste0("adapt_delta should be a numeric value > 0 and < 1 and ",
-"of length 1.")
-)
-expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)),
-qPCR.N = rbind(c(3,3,3),c(3,3,NA)),
-count = rbind(c(4,1,1),c(1,1,NA)),
-count.type = rbind(c(1,2,1),c(1,1,NA))),
-adapt_delta = 1.2,multicore = FALSE),
-paste0("adapt_delta should be a numeric value > 0 and < 1 and ",
-"of length 1.")
+cov = NULL
+family = 'poisson'
+p10priors = c(1,200)
+q = FALSE
+phipriors = NULL
+multicore = TRUE
+initial_values = NULL
+n.chain = 4
+n.iter.burn = 10000
+n.iter.sample = 2500
+thin = 1
+adapt_delta = 0.99
+verbose = TRUE
+seed = NULL
+###
+#convert data to long format
+'%>%' <- magrittr::`%>%`
+# convert qPCR data to long format
+#' @srrstats {G2.7} Use as.data.frame() to allow input list of any tabular
+#'   form (i.e., matrix, etc.)
+qPCR_all <- as.data.frame(data$qPCR.N) %>%
+dplyr::mutate(L = 1:dim(data$qPCR.N)[1]) %>%
+tidyr::pivot_longer(cols =! L,values_to = 'N') %>%
+#' @srrstats {G2.15} Software does not assume non-missingness and actually
+#'   expects it if the number of observations across sites is unequal
+tidyr::drop_na()
+qPCR.K_df <- as.data.frame(data$qPCR.K) %>%
+dplyr::mutate(L = 1:dim(data$qPCR.K)[1]) %>%
+tidyr::pivot_longer(cols =! L,values_to = 'K') %>%
+#' @srrstats {G2.15} Software does not assume non-missingness and actually
+#'   expects it if the number of observations across sites is unequal
+tidyr::drop_na()
+qPCR_all$K <- qPCR.K_df$K
+# convert count data to long format
+#' @srrstats {G2.7} Use as.data.frame() to allow input list of any tabular
+#'   form (i.e., matrix, etc.)
+count_all <- as.data.frame(data$count) %>%
+dplyr::mutate(L = 1:dim(data$count)[1]) %>%
+tidyr::pivot_longer(cols =! L,values_to = 'count') %>%
+#' @srrstats {G2.15} Software does not assume non-missingness and actually
+#' expects it if the number of observations across sites is unequal
+tidyr::drop_na()
+# subset count data to remove sites without traditional samples
+count_df <- as.data.frame(data$count)
+sub_count <- count_df[rowSums(is.na(count_df)) != ncol(count_df), ]
+# add site index to count data
+index_match <- as.data.frame(cbind(unique(count_all$L),
+1:dim(sub_count)[1]))
+colnames(index_match) <- c('L','R')
+count_all <- dplyr::left_join(count_all,index_match,by='L')
+# get site indices with paired and unpaired dna samples
+trad_ind <- index_match$L
+dna_ind <- unique(qPCR_all$L)[!unique(qPCR_all$L)%in% trad_ind]
+# subset qPCR data -- paired
+qPCR_all_trad <- qPCR_all[qPCR_all$L %in% trad_ind,]
+L_match_trad <- as.data.frame(cbind(unique(qPCR_all_trad$L),
+1:length(unique(qPCR_all_trad$L))))
+colnames(L_match_trad) <- c('L','L_unique')
+qPCR_all_trad <- dplyr::left_join(qPCR_all_trad,L_match_trad,by='L')
+# subset qPCR data -- unpaired
+if(length(dna_ind)>0){
+qPCR_all_dna <- qPCR_all[qPCR_all$L %in% dna_ind,]
+L_match_dna <- as.data.frame(cbind(unique(qPCR_all_dna$L),
+1:length(unique(qPCR_all_dna$L))))
+colnames(L_match_dna) <- c('L','L_unique')
+qPCR_all_dna <- dplyr::left_join(qPCR_all_dna,L_match_dna,by='L')
+} else {
+qPCR_all_dna <- as.data.frame(matrix(NA,nrow=0,ncol=4))
+colnames(qPCR_all_dna) <- c('L','N','K','L_unique')
+}
+# convert p10 prior
+# p10 prior: convert beta(1,20) to lognormal distribution
+# moment match from beta(alpha,beta) to normal(mu, sigma^2)
+alpha <- p10priors[1]
+beta <- p10priors[2]
+mu <- alpha/(alpha+beta)
+sigma_2 <- (alpha*beta)/((alpha+beta)^2*(alpha+beta+1))
+# convert normal(mu, sigma^2) to lognormal(mu, sigma^2)
+mu_ln <- log(mu^2/sqrt(mu^2+sigma_2))
+sigma_2_ln <- log(1+sigma_2/mu^2)
+sigma_ln <- sqrt(sigma_2_ln)
+# create data that will be present in all model variations
+data <- list(
+S = nrow(qPCR_all_trad),
+S_dna = nrow(qPCR_all_dna),
+C = nrow(count_all),
+L = qPCR_all_trad$L_unique,
+L_dna = qPCR_all_dna$L_unique,
+R = count_all$R,
+Nloc_dna = length(unique(qPCR_all_dna$L)),
+Nloc_trad = length(unique(qPCR_all_trad$L)),
+trad_ind = trad_ind,
+dna_ind = as.array(dna_ind),
+E = count_all$count,
+N = qPCR_all_trad$N,
+K = qPCR_all_trad$K,
+N_dna = qPCR_all_dna$N,
+K_dna = qPCR_all_dna$K,
+p10priors = c(mu_ln,sigma_ln),
+control = list(adapt_delta = adapt_delta)
 )
-#52. check length of seed
-expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)),
-qPCR.N = rbind(c(3,3,3),c(3,3,NA)),
-count = rbind(c(4,1,1),c(1,1,NA)),
-count.type = rbind(c(1,2,1),c(1,1,NA))),
-seed = c(1,2),multicore = FALSE),
-paste0("seed should be an integer of length 1.")
+# run model
+out <- rstan::sampling(model,
+data = data,
+cores = 4,
+#' @srrstats {G2.4,G2.4a} explicit conversion to
+#'   integers for sampling arguments
+chains = as.integer(n.chain),
+thin = as.integer(thin),
+warmup = as.integer(n.iter.burn),
+iter = (
+as.integer(n.iter.burn) + as.integer(n.iter.sample)
+),
+init = inits,
+refresh = ifelse(verbose == TRUE,500,0)
 )
-#53. check K <= N
-expect_error(jointModel(data = list(qPCR.K = rbind(c(4,1,1),c(1,1,NA)),
-qPCR.N = rbind(c(3,3,3),c(3,3,NA)),
-count = rbind(c(4,1,1),c(1,1,NA)),
-multicore = FALSE)),
-paste(paste0("N should be >= K in qPCR data. N is the number ",
-"of qPCR replicates per sample, and K is the ",
-"number of positive detections among replicates."),
-'See the eDNAjoint guide for data formatting help: ',
-paste0('https://bookdown.org/abigailkeller/eDNAjoint_',
-'vignette/usecase1.html#prepare-the-data'),
-sep='\n')
+data <- readRDS('../neha_data.rds')
+# initial values should be a list of named lists
+inits <- list()
+for(i in 1:n.chain){
+inits[[i]] <- list(
+mu_trad = rowMeans(data$count,na.rm=TRUE),
+log_p10 = -5,
+beta = 1.5
 )
-})
-devtools::install()
-library(devtools)
-install_github('abigailkeller/eDNAjoint')
-library(tidyverse)
-ggplot2::geom_point(ggplot2::aes(x = c(1,2), y = c(1,2))+
-ggplot2::ggplot()+
-ggplot2::geom_point(ggplot2::aes(x = c(1,2), y = c(1,2)))+
-ggplot2::ggtitle('# survey units necessary to detect
-species presence, given mu')+
-ggplot2::theme_minimal()
-' species presence, given mu')+
-ggplot2::ggplot()+
-ggplot2::geom_point(ggplot2::aes(x = c(1,2), y = c(1,2)))+
-ggplot2::ggtitle(paste0('# survey units necessary to detect',
-' species presence, given mu'))+
-ggplot2::theme_minimal()
-library(eDNAjoint)
-library(eDNAjoint)
-data(gobyData)
-goby.fit <- jointModel(data = gobyData)
-library(eDNAjoint)
-data(gobyData)
-# fit model
-goby.fit <- jointModel(data = gobyData)
-# summarize false positive probability (p10) probability
-jointSummarize(goby.fit$model, par = 'p10')
-library(eDNAjoint)
-data(gobyData)
-# run the joint model with two covariates
-goby.fit <- jointModel(data = gobyData,
-cov = c('Filter_time','Salinity'))
-# summarize false positive probability (p10) probability
-jointSummarize(goby.fit$model, par = 'p10')
-# summarize false positive probability (p10) probability
-jointSummarize(goby.fit$model, par = 'p10')
-library(eDNAjoint)
-data(gobyData)
-# run the joint model with two covariates
-goby.fit <- jointModel(data = gobyData,
-cov = c('Filter_time','Salinity'))
-# summarize false positive probability probability
-jointSummarize(goby.fit$model, par = 'p10')
-detectionPlot(goby.fit$model,
-mu.min = 0.1, mu.max = 1,
-cov.val = c(0,0))
-# plot effort necessary to detect presence
-detectionPlot(goby.fit$model,
-mu.min = 0.1, mu.max = 1,
-cov.val = c(0,0))
-# plot effort necessary to detect presence
-# at turbid sites
-detectionPlot(goby.fit$model,
-mu.min = 0.1, mu.max = 1,
-cov.val = c(1,0))
-A
-modelfit <- goby.fit$model
-mu.min = 0.01
-mu.max <- 1
-# create mu vector
-mu <- seq(from = mu.min, to = mu.max, by = (mu.max-mu.min)/1000)
-cov.val <- c(0,0)
-probability <- 0.9
-qPCR.N <- 3
-# get n samples df
-out <- detectionCalculate(modelfit, mu, cov.val, probability, qPCR.N)
-'%>%' <- magrittr::`%>%`
-out_long <- as.data.frame(out) %>%
-tidyr::pivot_longer(cols =! mu, names_to = 'survey_type')
-length(unique(out_long$survey_type))
-unique(out_long$survey_type)
-sort(unique(out_long$survey_type))
-# get full names
-if(length(unique(out_long$survey_type))==1){
-names <- 'traditional'
-} else if(length(unique(out_long$survey_type))==2){
-names <- c('eDNA','traditional')
-} else{
-names <- 'eDNA'
-for(i in 1:(length(unique(out_long$survey_type))-1)){
-names <- c(names, paste0('traditional (gear type ',i,')'))
-}
 }
-ggplot2::ggplot(data = out_long)+
-ggplot2::geom_line(ggplot2::aes(x = mu, y = value, color = survey_type),
-linewidth = 1)+
-ggplot2::labs(x = 'mu (expected catch rate)',y = '# survey units',
-color = 'survey type')+
-ggplot2::scale_color_manual(values = sort(unique(out_long$survey_type)),
-names = names)+
-ggplot2::theme_minimal()
-ggplot2::ggplot(data = out_long)+
-ggplot2::geom_line(ggplot2::aes(x = mu, y = value, color = survey_type),
-linewidth = 1)+
-ggplot2::labs(x = 'mu (expected catch rate)',y = '# survey units',
-color = 'survey type')+
-ggplot2::scale_color_manual(names = sort(unique(out_long$survey_type)),
-values = names)+
-ggplot2::theme_minimal()
-ggplot2::ggplot(data = out_long)+
-ggplot2::geom_line(ggplot2::aes(x = mu, y = value, color = survey_type),
-linewidth = 1)+
-ggplot2::labs(x = 'mu (expected catch rate)',y = '# survey units',
-color = 'survey type')+
-ggplot2::scale_color_manual(values = sort(unique(out_long$survey_type)),
-labels = names)+
-ggplot2::theme_minimal()
-ggplot2::ggplot(data = out_long)+
-ggplot2::geom_line(ggplot2::aes(x = mu, y = value, color = survey_type),
-linewidth = 1)+
-ggplot2::labs(x = 'mu (expected catch rate)',y = '# survey units',
-color = 'survey type')+
-ggplot2::scale_color_manual(labels = names)+
-ggplot2::theme_minimal()
-names
-scales::hue_pal()
-scales::hue_pal()[1:2]
-scales::hue_pal(2)
-scales::hue_pal()(2)
-ggplot2::ggplot(data = out_long)+
-ggplot2::geom_line(ggplot2::aes(x = mu, y = value, color = survey_type),
-linewidth = 1)+
-ggplot2::labs(x = 'mu (expected catch rate)',y = '# survey units',
-color = 'survey type')+
-ggplot2::scale_color_manual(values = scales::hue_pal()(2),
-labels = names)+
-ggplot2::theme_minimal()
-out
-mu.min
-mu.min <- 0.1
-# create mu vector
-mu <- seq(from = mu.min, to = mu.max, by = (mu.max-mu.min)/1000)
-# get n samples df
-out <- detectionCalculate(modelfit, mu, cov.val, probability, qPCR.N)
-# convert to long df
+cov = NULL
+family = 'poisson'
+p10priors = c(1,250)
+q = FALSE
+phipriors = NULL
+multicore = TRUE
+initial_values = NULL
+n.chain = 4
+n.iter.burn = 10000
+n.iter.sample = 2500
+thin = 1
+adapt_delta = 0.99
+verbose = TRUE
+seed = NULL
+###
+#convert data to long format
 '%>%' <- magrittr::`%>%`
-out_long <- as.data.frame(out) %>%
-tidyr::pivot_longer(cols =! mu, names_to = 'survey_type')
-# get full names
-if(length(unique(out_long$survey_type))==1){
-names <- 'traditional'
-} else if(length(unique(out_long$survey_type))==2){
-names <- c('eDNA','traditional')
-} else{
-names <- 'eDNA'
-for(i in 1:(length(unique(out_long$survey_type))-1)){
-names <- c(names, paste0('traditional (gear type ',i,')'))
+# convert qPCR data to long format
+#' @srrstats {G2.7} Use as.data.frame() to allow input list of any tabular
+#'   form (i.e., matrix, etc.)
+qPCR_all <- as.data.frame(data$qPCR.N) %>%
+dplyr::mutate(L = 1:dim(data$qPCR.N)[1]) %>%
+tidyr::pivot_longer(cols =! L,values_to = 'N') %>%
+#' @srrstats {G2.15} Software does not assume non-missingness and actually
+#'   expects it if the number of observations across sites is unequal
+tidyr::drop_na()
+qPCR.K_df <- as.data.frame(data$qPCR.K) %>%
+dplyr::mutate(L = 1:dim(data$qPCR.K)[1]) %>%
+tidyr::pivot_longer(cols =! L,values_to = 'K') %>%
+#' @srrstats {G2.15} Software does not assume non-missingness and actually
+#'   expects it if the number of observations across sites is unequal
+tidyr::drop_na()
+qPCR_all$K <- qPCR.K_df$K
+# convert count data to long format
+#' @srrstats {G2.7} Use as.data.frame() to allow input list of any tabular
+#'   form (i.e., matrix, etc.)
+count_all <- as.data.frame(data$count) %>%
+dplyr::mutate(L = 1:dim(data$count)[1]) %>%
+tidyr::pivot_longer(cols =! L,values_to = 'count') %>%
+#' @srrstats {G2.15} Software does not assume non-missingness and actually
+#' expects it if the number of observations across sites is unequal
+tidyr::drop_na()
+# subset count data to remove sites without traditional samples
+count_df <- as.data.frame(data$count)
+sub_count <- count_df[rowSums(is.na(count_df)) != ncol(count_df), ]
+# add site index to count data
+index_match <- as.data.frame(cbind(unique(count_all$L),
+1:dim(sub_count)[1]))
+colnames(index_match) <- c('L','R')
+count_all <- dplyr::left_join(count_all,index_match,by='L')
+# get site indices with paired and unpaired dna samples
+trad_ind <- index_match$L
+dna_ind <- unique(qPCR_all$L)[!unique(qPCR_all$L)%in% trad_ind]
+# subset qPCR data -- paired
+qPCR_all_trad <- qPCR_all[qPCR_all$L %in% trad_ind,]
+L_match_trad <- as.data.frame(cbind(unique(qPCR_all_trad$L),
+1:length(unique(qPCR_all_trad$L))))
+colnames(L_match_trad) <- c('L','L_unique')
+qPCR_all_trad <- dplyr::left_join(qPCR_all_trad,L_match_trad,by='L')
+# subset qPCR data -- unpaired
+if(length(dna_ind)>0){
+qPCR_all_dna <- qPCR_all[qPCR_all$L %in% dna_ind,]
+L_match_dna <- as.data.frame(cbind(unique(qPCR_all_dna$L),
+1:length(unique(qPCR_all_dna$L))))
+colnames(L_match_dna) <- c('L','L_unique')
+qPCR_all_dna <- dplyr::left_join(qPCR_all_dna,L_match_dna,by='L')
+} else {
+qPCR_all_dna <- as.data.frame(matrix(NA,nrow=0,ncol=4))
+colnames(qPCR_all_dna) <- c('L','N','K','L_unique')
 }
-}
-ggplot2::ggplot(data = out_long)+
-ggplot2::geom_line(ggplot2::aes(x = mu, y = value, color = survey_type),
-linewidth = 1)+
-ggplot2::labs(x = 'mu (expected catch rate)',y = '# survey units',
-color = 'survey type')+
-ggplot2::scale_color_manual(values = scales::hue_pal()(length(names)),
-labels = names)+
-ggplot2::theme_minimal()
-fit.q <- jointModel(data = greencrabData,family = "negbin", q = TRUE,
-n.chain = 1, n.iter.burn = 500, n.iter.sample = 1000)
-cov.val = NULL
-modelfit <- fit.q$model
-# create mu vector
-mu <- seq(from = mu.min, to = mu.max, by = (mu.max-mu.min)/1000)
-# get n samples df
-out <- detectionCalculate(modelfit, mu, cov.val, probability, qPCR.N)
-# convert to long df
-'%>%' <- magrittr::`%>%`
-out_long <- as.data.frame(out) %>%
-tidyr::pivot_longer(cols =! mu, names_to = 'survey_type')
-# get full names
-if(length(unique(out_long$survey_type))==1){
-names <- 'traditional'
-} else if(length(unique(out_long$survey_type))==2){
-names <- c('eDNA','traditional')
-} else{
-names <- 'eDNA'
-for(i in 1:(length(unique(out_long$survey_type))-1)){
-names <- c(names, paste0('traditional (gear type ',i,')'))
-}
-}
-names
-ggplot2::ggplot(data = out_long)+
-ggplot2::geom_line(ggplot2::aes(x = mu, y = value, color = survey_type),
-linewidth = 1)+
-ggplot2::labs(x = 'mu (expected catch rate)',y = '# survey units',
-color = 'survey type')
-ggplot2::ggplot(data = out_long)+
-ggplot2::geom_line(ggplot2::aes(x = mu, y = value, color = survey_type),
-linewidth = 1)+
-ggplot2::labs(x = 'mu (expected catch rate)',y = '# survey units',
-color = 'survey type')+
-ggplot2::scale_color_manual(values = scales::hue_pal()(length(names)),
-labels = names)+
-ggplot2::theme_minimal()
-roxygen2::roxygenise()
-install.packages('srr')
-options(repos = c(
-ropenscireviewtools = "https://ropensci-review-tools.r-universe.dev",
-CRAN = "https://cloud.r-project.org"))
-install.packages("srr")
-roxygen2::roxygenise()
-GLNA_grab <- read.csv('../neha_data/GLNA_grab.csv')
-OPAC_grab <- read.csv('../neha_data/OPAC_grab.csv')
-PRLI_grab <- read.csv('../neha_data/PRLI_grab.csv')
-GLNA_dna <- read.csv('../neha_data/eDNAJoint_GLNA_B.csv')
-OPCA_dna <- read.csv('../neha_data/eDNAJoint_OPAC_BD.csv')
-PRLI_dna <- read.csv('../neha_data/eDNAJoint_PRLI_AW.csv')
-OPCA_grab <- read.csv('../neha_data/OPAC_grab.csv')
-GLNA_grab <- read.csv('../neha_data/GLNA_grab.csv')
-OPCA_grab <- read.csv('../neha_data/OPAC_grab.csv')
-PRLI_grab <- read.csv('../neha_data/PRLI_grab.csv')
-GLNA_dna <- read.csv('../neha_data/eDNAJoint_GLNA_B.csv')
-OPCA_dna <- read.csv('../neha_data/eDNAJoint_OPAC_BD.csv')
-PRLI_dna <- read.csv('../neha_data/eDNAJoint_PRLI_AW.csv')
-qPCR_N_wide <- OPCA_dna %>%
-pivot_wider(id_cols=site,
-names_from=biological_replicate,
-values_from=N) %>%
-select(-site)
-# K (number of positive qPCR detections per biological replicate (i.e., water sample))
-qPCR_K_wide <- OPCA_dna %>%
-pivot_wider(id_cols=site,
-names_from=biological_replicate,
-values_from=K) %>%
-select(-site)
-library(tidyverse)
-qPCR_N_wide <- OPCA_dna %>%
-pivot_wider(id_cols=site,
-names_from=biological_replicate,
-values_from=N) %>%
-select(-site)
-# K (number of positive qPCR detections per biological replicate (i.e., water sample))
-qPCR_K_wide <- OPCA_dna %>%
-pivot_wider(id_cols=site,
-names_from=biological_replicate,
-values_from=K) %>%
-select(-site)
-# counts (number of individuals in traditional survey sample)
-count_wide <- OPCA_grab %>%
-pivot_wider(id_cols=site,
-names_from=grab_replicate,
-values_from=count) %>%
-select(-site)
-plotdata <- data.frame(
-x = rowMeans(count_wide,na.rm=TRUE),
-y = rowMeans(qPCR_K_wide,na.rm=TRUE),
-label = 1:dim(count_wide)[1]
-)
-ggplot(data=plotdata,aes(x=x,y=y))+
-geom_point()+
-geom_text(aes(label=factor(label), vjust = -1))+
-#scale_y_continuous(limits=c(3.5,6.8))+
-labs(x='mean grab count',y='mean qPCR detections')+
-ggtitle('GLNA')
-ggplot(data=plotdata,aes(x=x,y=y))+
-geom_point()+
-geom_text(aes(label=factor(label), vjust = -1))+
-#scale_y_continuous(limits=c(3.5,6.8))+
-labs(x='mean grab count',y='mean qPCR detections')+
-ggtitle('OPCA')
-# combine all data into one named list
-# note: make sure to convert the wide dataframes into matrices
+# convert p10 prior
+# p10 prior: convert beta(1,20) to lognormal distribution
+# moment match from beta(alpha,beta) to normal(mu, sigma^2)
+alpha <- p10priors[1]
+beta <- p10priors[2]
+mu <- alpha/(alpha+beta)
+sigma_2 <- (alpha*beta)/((alpha+beta)^2*(alpha+beta+1))
+# convert normal(mu, sigma^2) to lognormal(mu, sigma^2)
+mu_ln <- log(mu^2/sqrt(mu^2+sigma_2))
+sigma_2_ln <- log(1+sigma_2/mu^2)
+sigma_ln <- sqrt(sigma_2_ln)
+# create data that will be present in all model variations
 data <- list(
-qPCR.N = as.matrix(qPCR_N_wide),
-qPCR.K = as.matrix(qPCR_K_wide),
-count = as.matrix(count_wide)
+S = nrow(qPCR_all_trad),
+S_dna = nrow(qPCR_all_dna),
+C = nrow(count_all),
+L = qPCR_all_trad$L_unique,
+L_dna = qPCR_all_dna$L_unique,
+R = count_all$R,
+Nloc_dna = length(unique(qPCR_all_dna$L)),
+Nloc_trad = length(unique(qPCR_all_trad$L)),
+trad_ind = trad_ind,
+dna_ind = as.array(dna_ind),
+E = count_all$count,
+N = qPCR_all_trad$N,
+K = qPCR_all_trad$K,
+N_dna = qPCR_all_dna$N,
+K_dna = qPCR_all_dna$K,
+p10priors = c(mu_ln,sigma_ln),
+control = list(adapt_delta = adapt_delta)
+)
+# run model
+out <- rstan::sampling(model,
+data = data,
+cores = 4,
+#' @srrstats {G2.4,G2.4a} explicit conversion to
+#'   integers for sampling arguments
+chains = as.integer(n.chain),
+thin = as.integer(thin),
+warmup = as.integer(n.iter.burn),
+iter = (
+as.integer(n.iter.burn) + as.integer(n.iter.sample)
+),
+init = inits,
+refresh = ifelse(verbose == TRUE,500,0)
 )
-# OPCA grab
-fit <- jointModel(data=data, family='negbin', p10priors = c(1,10))
-library(eDNAjoint)
-# OPCA grab
-fit <- jointModel(data=data, family='negbin', p10priors = c(1,10))
-jointSummarize(fit$model)
diff --git a/R/jointModel.R b/R/jointModel.R
index 39eb8eb..07722ae 100644
--- a/R/jointModel.R
+++ b/R/jointModel.R
@@ -399,7 +399,7 @@ jointModel <- function(data, cov = NULL, family = 'poisson',
 
   # get initial values
   if(isCatch_type(q)){
-    inits <- get_inits(n.chain,qPCR_all,initial_values,cov,q_names,data)
+    inits <- get_inits(n.chain,qPCR_all,initial_values,cov,data,q_names)
   } else {
     inits <- get_inits(n.chain,qPCR_all,initial_values,cov,data)
   }