From 4dd4e7e3bd681e58b900ad337778bdfab72036ed Mon Sep 17 00:00:00 2001 From: abigailkeller Date: Fri, 13 Sep 2024 06:21:11 +0000 Subject: [PATCH] fixing inits update --- .Rhistory | 964 ++++++++++++++++++++++++------------------------- R/jointModel.R | 2 +- 2 files changed, 483 insertions(+), 483 deletions(-) diff --git a/.Rhistory b/.Rhistory index 6e6a41b..675406f 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,512 +1,512 @@ -paste0('https://bookdown.org/abigailkeller/eDNAjoint_', -'vignette/usecase1.html#initialvalues'), -sep='\n')) -#41. initial values check: beta numeric -n.chain <- 4 +data <- readRDS('../neha_data.rds') +# initial values should be a list of named lists inits <- list() for(i in 1:n.chain){ inits[[i]] <- list( -mu <- stats::runif(2,0,1), -p10 <- stats::runif(1,log(0.0001),log(0.08)), -beta <- "1" +mu_trad = rowMeans(data$count,na.rm=TRUE) ) -names(inits[[i]]) <- c('mu','p10','beta') } -expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)), -qPCR.N = rbind(c(3,3,3),c(3,3,NA)), -count = rbind(c(4,1,1),c(1,1,NA))), -initial_values = inits, -multicore = FALSE), -paste("Initial values for 'beta' should be numeric.", -paste0('See the eDNAjoint guide for help formatting ', -'initial values: '), -paste0('https://bookdown.org/abigailkeller/eDNAjoint_', -'vignette/usecase1.html#initialvalues'), -sep='\n')) -#42. initial values check: beta length -n.chain <- 4 -inits <- list() -for(i in 1:n.chain){ -inits[[i]] <- list( -mu <- stats::runif(2,0,1), -p10 <- stats::runif(1,log(0.0001),log(0.08)), -beta <- c(1,0) -) -names(inits[[i]]) <- c('mu','p10','beta') +cov = NULL +family = 'poisson' +p10priors = c(1,200) +q = FALSE +phipriors = NULL +multicore = TRUE +initial_values = NULL +n.chain = 4 +n.iter.burn = 10000 +n.iter.sample = 2500 +thin = 1 +adapt_delta = 0.99 +verbose = TRUE +seed = NULL +### +#convert data to long format +'%>%' <- magrittr::`%>%` +# convert qPCR data to long format +#' @srrstats {G2.7} Use as.data.frame() to allow input list of any tabular +#' form (i.e., matrix, etc.) +qPCR_all <- as.data.frame(data$qPCR.N) %>% +dplyr::mutate(L = 1:dim(data$qPCR.N)[1]) %>% +tidyr::pivot_longer(cols =! L,values_to = 'N') %>% +#' @srrstats {G2.15} Software does not assume non-missingness and actually +#' expects it if the number of observations across sites is unequal +tidyr::drop_na() +qPCR.K_df <- as.data.frame(data$qPCR.K) %>% +dplyr::mutate(L = 1:dim(data$qPCR.K)[1]) %>% +tidyr::pivot_longer(cols =! L,values_to = 'K') %>% +#' @srrstats {G2.15} Software does not assume non-missingness and actually +#' expects it if the number of observations across sites is unequal +tidyr::drop_na() +qPCR_all$K <- qPCR.K_df$K +# convert count data to long format +#' @srrstats {G2.7} Use as.data.frame() to allow input list of any tabular +#' form (i.e., matrix, etc.) +count_all <- as.data.frame(data$count) %>% +dplyr::mutate(L = 1:dim(data$count)[1]) %>% +tidyr::pivot_longer(cols =! L,values_to = 'count') %>% +#' @srrstats {G2.15} Software does not assume non-missingness and actually +#' expects it if the number of observations across sites is unequal +tidyr::drop_na() +# subset count data to remove sites without traditional samples +count_df <- as.data.frame(data$count) +sub_count <- count_df[rowSums(is.na(count_df)) != ncol(count_df), ] +# add site index to count data +index_match <- as.data.frame(cbind(unique(count_all$L), +1:dim(sub_count)[1])) +colnames(index_match) <- c('L','R') +count_all <- dplyr::left_join(count_all,index_match,by='L') +# get site indices with paired and unpaired dna samples +trad_ind <- index_match$L +dna_ind <- unique(qPCR_all$L)[!unique(qPCR_all$L)%in% trad_ind] +# subset qPCR data -- paired +qPCR_all_trad <- qPCR_all[qPCR_all$L %in% trad_ind,] +L_match_trad <- as.data.frame(cbind(unique(qPCR_all_trad$L), +1:length(unique(qPCR_all_trad$L)))) +colnames(L_match_trad) <- c('L','L_unique') +qPCR_all_trad <- dplyr::left_join(qPCR_all_trad,L_match_trad,by='L') +# subset qPCR data -- unpaired +if(length(dna_ind)>0){ +qPCR_all_dna <- qPCR_all[qPCR_all$L %in% dna_ind,] +L_match_dna <- as.data.frame(cbind(unique(qPCR_all_dna$L), +1:length(unique(qPCR_all_dna$L)))) +colnames(L_match_dna) <- c('L','L_unique') +qPCR_all_dna <- dplyr::left_join(qPCR_all_dna,L_match_dna,by='L') +} else { +qPCR_all_dna <- as.data.frame(matrix(NA,nrow=0,ncol=4)) +colnames(qPCR_all_dna) <- c('L','N','K','L_unique') } -expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)), -qPCR.N = rbind(c(3,3,3),c(3,3,NA)), -count = rbind(c(4,1,1),c(1,1,NA))), -initial_values = inits, -multicore = FALSE), -paste("The length of initial values for 'beta' should equal 1.", -paste0('See the eDNAjoint guide for help formatting ', -'initial values: '), -paste0('https://bookdown.org/abigailkeller/eDNAjoint_', -'vignette/usecase1.html#initialvalues'), -sep='\n')) -#43. initial values check: alpha numeric -n.chain <- 4 -inits <- list() -for(i in 1:n.chain){ -inits[[i]] <- list( -mu <- stats::runif(2,0,1), -p10 <- stats::runif(1,log(0.0001),log(0.08)), -alpha <- c("1","2") +# convert p10 prior +# p10 prior: convert beta(1,20) to lognormal distribution +# moment match from beta(alpha,beta) to normal(mu, sigma^2) +alpha <- p10priors[1] +beta <- p10priors[2] +mu <- alpha/(alpha+beta) +sigma_2 <- (alpha*beta)/((alpha+beta)^2*(alpha+beta+1)) +# convert normal(mu, sigma^2) to lognormal(mu, sigma^2) +mu_ln <- log(mu^2/sqrt(mu^2+sigma_2)) +sigma_2_ln <- log(1+sigma_2/mu^2) +sigma_ln <- sqrt(sigma_2_ln) +# create data that will be present in all model variations +data <- list( +S = nrow(qPCR_all_trad), +S_dna = nrow(qPCR_all_dna), +C = nrow(count_all), +L = qPCR_all_trad$L_unique, +L_dna = qPCR_all_dna$L_unique, +R = count_all$R, +Nloc_dna = length(unique(qPCR_all_dna$L)), +Nloc_trad = length(unique(qPCR_all_trad$L)), +trad_ind = trad_ind, +dna_ind = as.array(dna_ind), +E = count_all$count, +N = qPCR_all_trad$N, +K = qPCR_all_trad$K, +N_dna = qPCR_all_dna$N, +K_dna = qPCR_all_dna$K, +p10priors = c(mu_ln,sigma_ln), +control = list(adapt_delta = adapt_delta) ) -names(inits[[i]]) <- c('mu','p10','alpha') -} -site.cov <- rbind(c(4,1),c(1,1)) -colnames(site.cov) <- c('var_a','var_b') -expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)), -qPCR.N = rbind(c(3,3,3),c(3,3,NA)), -count = rbind(c(4,1,1),c(1,1,NA)), -site.cov = site.cov), -cov = c('var_a','var_b'), initial_values = inits, -multicore = FALSE), -paste("Initial values for 'alpha' should be numeric.", -paste0('See the eDNAjoint guide for help formatting ', -'initial values: '), -paste0('https://bookdown.org/abigailkeller/eDNAjoint_', -'vignette/usecase2.html#initialvalues'), -sep='\n')) -#44. initial values check: alpha length -n.chain <- 4 +# run model +out <- rstan::sampling(model, +data = data, +cores = 4, +#' @srrstats {G2.4,G2.4a} explicit conversion to +#' integers for sampling arguments +chains = as.integer(n.chain), +thin = as.integer(thin), +warmup = as.integer(n.iter.burn), +iter = ( +as.integer(n.iter.burn) + as.integer(n.iter.sample) +), +init = inits, +refresh = ifelse(verbose == TRUE,500,0) +) +inits +# initial values should be a list of named lists inits <- list() for(i in 1:n.chain){ inits[[i]] <- list( -mu <- stats::runif(2,0,1), -p10 <- stats::runif(1,log(0.0001),log(0.08)), -alpha <- rep(0.1,2) +mu_trad = rowMeans(data$count,na.rm=TRUE), +log_p10 = -5, +beta = 1.5 ) -names(inits[[i]]) <- c('mu','p10','alpha') } -site.cov <- rbind(c(4,1),c(1,1)) -colnames(site.cov) <- c('var_a','var_b') -expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)), -qPCR.N = rbind(c(3,3,3),c(3,3,NA)), -count = rbind(c(4,1,1),c(1,1,NA)), -site.cov = site.cov), -cov = c('var_a','var_b'), initial_values = inits, -multicore = FALSE), -paste(paste0("The length of initial values for 'alpha' should ", -"equal\\: \\# covariates \\+ 1 \\(i.e., ", -"including intercept\\)."), -paste0('See the eDNAjoint guide for help formatting ', -'initial values: '), -paste0('https://bookdown.org/abigailkeller/eDNAjoint_', -'vignette/usecase2.html#initialvalues'), -sep='\n')) -#45. initial values check: q numeric -n.chain <- 4 +data <- readRDS('../neha_data.rds') +# initial values should be a list of named lists inits <- list() for(i in 1:n.chain){ inits[[i]] <- list( -q <- "0.1" +mu_trad = rowMeans(data$count,na.rm=TRUE), +log_p10 = -5, +beta = 1.5 ) -names(inits[[i]]) <- c('q') } -expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)), -qPCR.N = rbind(c(3,3,3),c(3,3,NA)), -count = rbind(c(4,1,1),c(1,1,NA)), -count.type = rbind(c(1,2,1),c(1,1,NA))), -initial_values = inits, -multicore = FALSE), -paste("Initial values for 'q' should be numeric.", -paste0('See the eDNAjoint guide for help formatting ', -'initial values: '), -paste0('https://bookdown.org/abigailkeller/eDNAjoint_', -'vignette/usecase1.html#initialvalues'), -sep='\n')) -#46. initial values check: q length -n.chain <- 4 +cov = NULL +family = 'poisson' +p10priors = c(1,200) +q = FALSE +phipriors = NULL +multicore = TRUE +initial_values = NULL +n.chain = 4 +n.iter.burn = 10000 +n.iter.sample = 2500 +thin = 1 +adapt_delta = 0.99 +verbose = TRUE +seed = NULL +### +#convert data to long format +'%>%' <- magrittr::`%>%` +# convert qPCR data to long format +#' @srrstats {G2.7} Use as.data.frame() to allow input list of any tabular +#' form (i.e., matrix, etc.) +qPCR_all <- as.data.frame(data$qPCR.N) %>% +dplyr::mutate(L = 1:dim(data$qPCR.N)[1]) %>% +tidyr::pivot_longer(cols =! L,values_to = 'N') %>% +#' @srrstats {G2.15} Software does not assume non-missingness and actually +#' expects it if the number of observations across sites is unequal +tidyr::drop_na() +qPCR.K_df <- as.data.frame(data$qPCR.K) %>% +dplyr::mutate(L = 1:dim(data$qPCR.K)[1]) %>% +tidyr::pivot_longer(cols =! L,values_to = 'K') %>% +#' @srrstats {G2.15} Software does not assume non-missingness and actually +#' expects it if the number of observations across sites is unequal +tidyr::drop_na() +qPCR_all$K <- qPCR.K_df$K +# convert count data to long format +#' @srrstats {G2.7} Use as.data.frame() to allow input list of any tabular +#' form (i.e., matrix, etc.) +count_all <- as.data.frame(data$count) %>% +dplyr::mutate(L = 1:dim(data$count)[1]) %>% +tidyr::pivot_longer(cols =! L,values_to = 'count') %>% +#' @srrstats {G2.15} Software does not assume non-missingness and actually +#' expects it if the number of observations across sites is unequal +tidyr::drop_na() +# subset count data to remove sites without traditional samples +count_df <- as.data.frame(data$count) +sub_count <- count_df[rowSums(is.na(count_df)) != ncol(count_df), ] +# add site index to count data +index_match <- as.data.frame(cbind(unique(count_all$L), +1:dim(sub_count)[1])) +colnames(index_match) <- c('L','R') +count_all <- dplyr::left_join(count_all,index_match,by='L') +# get site indices with paired and unpaired dna samples +trad_ind <- index_match$L +dna_ind <- unique(qPCR_all$L)[!unique(qPCR_all$L)%in% trad_ind] +# subset qPCR data -- paired +qPCR_all_trad <- qPCR_all[qPCR_all$L %in% trad_ind,] +L_match_trad <- as.data.frame(cbind(unique(qPCR_all_trad$L), +1:length(unique(qPCR_all_trad$L)))) +colnames(L_match_trad) <- c('L','L_unique') +qPCR_all_trad <- dplyr::left_join(qPCR_all_trad,L_match_trad,by='L') +# subset qPCR data -- unpaired +if(length(dna_ind)>0){ +qPCR_all_dna <- qPCR_all[qPCR_all$L %in% dna_ind,] +L_match_dna <- as.data.frame(cbind(unique(qPCR_all_dna$L), +1:length(unique(qPCR_all_dna$L)))) +colnames(L_match_dna) <- c('L','L_unique') +qPCR_all_dna <- dplyr::left_join(qPCR_all_dna,L_match_dna,by='L') +} else { +qPCR_all_dna <- as.data.frame(matrix(NA,nrow=0,ncol=4)) +colnames(qPCR_all_dna) <- c('L','N','K','L_unique') +} +# convert p10 prior +# p10 prior: convert beta(1,20) to lognormal distribution +# moment match from beta(alpha,beta) to normal(mu, sigma^2) +alpha <- p10priors[1] +beta <- p10priors[2] +mu <- alpha/(alpha+beta) +sigma_2 <- (alpha*beta)/((alpha+beta)^2*(alpha+beta+1)) +# convert normal(mu, sigma^2) to lognormal(mu, sigma^2) +mu_ln <- log(mu^2/sqrt(mu^2+sigma_2)) +sigma_2_ln <- log(1+sigma_2/mu^2) +sigma_ln <- sqrt(sigma_2_ln) +# create data that will be present in all model variations +data <- list( +S = nrow(qPCR_all_trad), +S_dna = nrow(qPCR_all_dna), +C = nrow(count_all), +L = qPCR_all_trad$L_unique, +L_dna = qPCR_all_dna$L_unique, +R = count_all$R, +Nloc_dna = length(unique(qPCR_all_dna$L)), +Nloc_trad = length(unique(qPCR_all_trad$L)), +trad_ind = trad_ind, +dna_ind = as.array(dna_ind), +E = count_all$count, +N = qPCR_all_trad$N, +K = qPCR_all_trad$K, +N_dna = qPCR_all_dna$N, +K_dna = qPCR_all_dna$K, +p10priors = c(mu_ln,sigma_ln), +control = list(adapt_delta = adapt_delta) +) +# run model +out <- rstan::sampling(model, +data = data, +cores = 4, +#' @srrstats {G2.4,G2.4a} explicit conversion to +#' integers for sampling arguments +chains = as.integer(n.chain), +thin = as.integer(thin), +warmup = as.integer(n.iter.burn), +iter = ( +as.integer(n.iter.burn) + as.integer(n.iter.sample) +), +init = inits, +refresh = ifelse(verbose == TRUE,500,0) +) +data <- readRDS('../neha_data.rds') +# initial values should be a list of named lists inits <- list() for(i in 1:n.chain){ inits[[i]] <- list( -q <- c(0.1,0.1) +mu_trad = rowMeans(data$count,na.rm=TRUE), +log_p10 = -5, +beta = 1.5 ) -names(inits[[i]]) <- c('q') } -expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)), -qPCR.N = rbind(c(3,3,3),c(3,3,NA)), -count = rbind(c(4,1,1),c(1,1,NA)), -count.type = rbind(c(1,2,1),c(1,1,NA))), -initial_values = inits, -multicore = FALSE), -paste(paste0("The length of initial values for 'q' should ", -"equal: \\# unique gear types \\- 1 \\(i.e., q ", -"for reference type = 1\\)."), -paste0('See the eDNAjoint guide for help formatting ', -'initial values: '), -paste0('https://bookdown.org/abigailkeller/eDNAjoint_', -'vignette/usecase1.html#initialvalues'), -sep='\n')) -#47. check length and range of n.chain -expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)), -qPCR.N = rbind(c(3,3,3),c(3,3,NA)), -count = rbind(c(4,1,1),c(1,1,NA)), -count.type = rbind(c(1,2,1),c(1,1,NA))), -n.chain = c(1,1),multicore = FALSE), -paste0("n.chain should be an integer > 0 and of length 1.")) -expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)), -qPCR.N = rbind(c(3,3,3),c(3,3,NA)), -count = rbind(c(4,1,1),c(1,1,NA)), -count.type = rbind(c(1,2,1),c(1,1,NA))), -n.chain = 0,multicore = FALSE), -paste0("n.chain should be an integer > 0 and of length 1.")) -#48. check length and range of n.iter.sample -expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)), -qPCR.N = rbind(c(3,3,3),c(3,3,NA)), -count = rbind(c(4,1,1),c(1,1,NA)), -count.type = rbind(c(1,2,1),c(1,1,NA))), -n.iter.sample = c(1,1),multicore = FALSE), -paste0("n.iter.sample should be an integer > 0 and of length 1.") -) -expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)), -qPCR.N = rbind(c(3,3,3),c(3,3,NA)), -count = rbind(c(4,1,1),c(1,1,NA)), -count.type = rbind(c(1,2,1),c(1,1,NA))), -n.iter.sample = 0,multicore = FALSE), -paste0("n.iter.sample should be an integer > 0 and of length 1.") -) -#49. check length and range of n.iter.burn -expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)), -qPCR.N = rbind(c(3,3,3),c(3,3,NA)), -count = rbind(c(4,1,1),c(1,1,NA)), -count.type = rbind(c(1,2,1),c(1,1,NA))), -n.iter.burn = c(1,1),multicore = FALSE), -paste0("n.iter.burn should be an integer > 0 and of length 1.") -) -expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)), -qPCR.N = rbind(c(3,3,3),c(3,3,NA)), -count = rbind(c(4,1,1),c(1,1,NA)), -count.type = rbind(c(1,2,1),c(1,1,NA))), -n.iter.burn = 0,multicore = FALSE), -paste0("n.iter.burn should be an integer > 0 and of length 1.") -) -#50. check length and range of thin -expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)), -qPCR.N = rbind(c(3,3,3),c(3,3,NA)), -count = rbind(c(4,1,1),c(1,1,NA)), -count.type = rbind(c(1,2,1),c(1,1,NA))), -thin = c(1,1),multicore = FALSE), -paste0("thin should be an integer > 0 and of length 1.") -) -expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)), -qPCR.N = rbind(c(3,3,3),c(3,3,NA)), -count = rbind(c(4,1,1),c(1,1,NA)), -count.type = rbind(c(1,2,1),c(1,1,NA))), -thin = 0,multicore = FALSE), -paste0("thin should be an integer > 0 and of length 1.") -) -#51. check length and range of adapt_delta -expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)), -qPCR.N = rbind(c(3,3,3),c(3,3,NA)), -count = rbind(c(4,1,1),c(1,1,NA)), -count.type = rbind(c(1,2,1),c(1,1,NA))), -adapt_delta = c(0.9,0.9),multicore = FALSE), -paste0("adapt_delta should be a numeric value > 0 and < 1 and ", -"of length 1.") -) -expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)), -qPCR.N = rbind(c(3,3,3),c(3,3,NA)), -count = rbind(c(4,1,1),c(1,1,NA)), -count.type = rbind(c(1,2,1),c(1,1,NA))), -adapt_delta = 1.2,multicore = FALSE), -paste0("adapt_delta should be a numeric value > 0 and < 1 and ", -"of length 1.") +cov = NULL +family = 'poisson' +p10priors = c(1,200) +q = FALSE +phipriors = NULL +multicore = TRUE +initial_values = NULL +n.chain = 4 +n.iter.burn = 10000 +n.iter.sample = 2500 +thin = 1 +adapt_delta = 0.99 +verbose = TRUE +seed = NULL +### +#convert data to long format +'%>%' <- magrittr::`%>%` +# convert qPCR data to long format +#' @srrstats {G2.7} Use as.data.frame() to allow input list of any tabular +#' form (i.e., matrix, etc.) +qPCR_all <- as.data.frame(data$qPCR.N) %>% +dplyr::mutate(L = 1:dim(data$qPCR.N)[1]) %>% +tidyr::pivot_longer(cols =! L,values_to = 'N') %>% +#' @srrstats {G2.15} Software does not assume non-missingness and actually +#' expects it if the number of observations across sites is unequal +tidyr::drop_na() +qPCR.K_df <- as.data.frame(data$qPCR.K) %>% +dplyr::mutate(L = 1:dim(data$qPCR.K)[1]) %>% +tidyr::pivot_longer(cols =! L,values_to = 'K') %>% +#' @srrstats {G2.15} Software does not assume non-missingness and actually +#' expects it if the number of observations across sites is unequal +tidyr::drop_na() +qPCR_all$K <- qPCR.K_df$K +# convert count data to long format +#' @srrstats {G2.7} Use as.data.frame() to allow input list of any tabular +#' form (i.e., matrix, etc.) +count_all <- as.data.frame(data$count) %>% +dplyr::mutate(L = 1:dim(data$count)[1]) %>% +tidyr::pivot_longer(cols =! L,values_to = 'count') %>% +#' @srrstats {G2.15} Software does not assume non-missingness and actually +#' expects it if the number of observations across sites is unequal +tidyr::drop_na() +# subset count data to remove sites without traditional samples +count_df <- as.data.frame(data$count) +sub_count <- count_df[rowSums(is.na(count_df)) != ncol(count_df), ] +# add site index to count data +index_match <- as.data.frame(cbind(unique(count_all$L), +1:dim(sub_count)[1])) +colnames(index_match) <- c('L','R') +count_all <- dplyr::left_join(count_all,index_match,by='L') +# get site indices with paired and unpaired dna samples +trad_ind <- index_match$L +dna_ind <- unique(qPCR_all$L)[!unique(qPCR_all$L)%in% trad_ind] +# subset qPCR data -- paired +qPCR_all_trad <- qPCR_all[qPCR_all$L %in% trad_ind,] +L_match_trad <- as.data.frame(cbind(unique(qPCR_all_trad$L), +1:length(unique(qPCR_all_trad$L)))) +colnames(L_match_trad) <- c('L','L_unique') +qPCR_all_trad <- dplyr::left_join(qPCR_all_trad,L_match_trad,by='L') +# subset qPCR data -- unpaired +if(length(dna_ind)>0){ +qPCR_all_dna <- qPCR_all[qPCR_all$L %in% dna_ind,] +L_match_dna <- as.data.frame(cbind(unique(qPCR_all_dna$L), +1:length(unique(qPCR_all_dna$L)))) +colnames(L_match_dna) <- c('L','L_unique') +qPCR_all_dna <- dplyr::left_join(qPCR_all_dna,L_match_dna,by='L') +} else { +qPCR_all_dna <- as.data.frame(matrix(NA,nrow=0,ncol=4)) +colnames(qPCR_all_dna) <- c('L','N','K','L_unique') +} +# convert p10 prior +# p10 prior: convert beta(1,20) to lognormal distribution +# moment match from beta(alpha,beta) to normal(mu, sigma^2) +alpha <- p10priors[1] +beta <- p10priors[2] +mu <- alpha/(alpha+beta) +sigma_2 <- (alpha*beta)/((alpha+beta)^2*(alpha+beta+1)) +# convert normal(mu, sigma^2) to lognormal(mu, sigma^2) +mu_ln <- log(mu^2/sqrt(mu^2+sigma_2)) +sigma_2_ln <- log(1+sigma_2/mu^2) +sigma_ln <- sqrt(sigma_2_ln) +# create data that will be present in all model variations +data <- list( +S = nrow(qPCR_all_trad), +S_dna = nrow(qPCR_all_dna), +C = nrow(count_all), +L = qPCR_all_trad$L_unique, +L_dna = qPCR_all_dna$L_unique, +R = count_all$R, +Nloc_dna = length(unique(qPCR_all_dna$L)), +Nloc_trad = length(unique(qPCR_all_trad$L)), +trad_ind = trad_ind, +dna_ind = as.array(dna_ind), +E = count_all$count, +N = qPCR_all_trad$N, +K = qPCR_all_trad$K, +N_dna = qPCR_all_dna$N, +K_dna = qPCR_all_dna$K, +p10priors = c(mu_ln,sigma_ln), +control = list(adapt_delta = adapt_delta) ) -#52. check length of seed -expect_error(jointModel(data = list(qPCR.K = rbind(c(1,1,1),c(1,1,NA)), -qPCR.N = rbind(c(3,3,3),c(3,3,NA)), -count = rbind(c(4,1,1),c(1,1,NA)), -count.type = rbind(c(1,2,1),c(1,1,NA))), -seed = c(1,2),multicore = FALSE), -paste0("seed should be an integer of length 1.") +# run model +out <- rstan::sampling(model, +data = data, +cores = 4, +#' @srrstats {G2.4,G2.4a} explicit conversion to +#' integers for sampling arguments +chains = as.integer(n.chain), +thin = as.integer(thin), +warmup = as.integer(n.iter.burn), +iter = ( +as.integer(n.iter.burn) + as.integer(n.iter.sample) +), +init = inits, +refresh = ifelse(verbose == TRUE,500,0) ) -#53. check K <= N -expect_error(jointModel(data = list(qPCR.K = rbind(c(4,1,1),c(1,1,NA)), -qPCR.N = rbind(c(3,3,3),c(3,3,NA)), -count = rbind(c(4,1,1),c(1,1,NA)), -multicore = FALSE)), -paste(paste0("N should be >= K in qPCR data. N is the number ", -"of qPCR replicates per sample, and K is the ", -"number of positive detections among replicates."), -'See the eDNAjoint guide for data formatting help: ', -paste0('https://bookdown.org/abigailkeller/eDNAjoint_', -'vignette/usecase1.html#prepare-the-data'), -sep='\n') +data <- readRDS('../neha_data.rds') +# initial values should be a list of named lists +inits <- list() +for(i in 1:n.chain){ +inits[[i]] <- list( +mu_trad = rowMeans(data$count,na.rm=TRUE), +log_p10 = -5, +beta = 1.5 ) -}) -devtools::install() -library(devtools) -install_github('abigailkeller/eDNAjoint') -library(tidyverse) -ggplot2::geom_point(ggplot2::aes(x = c(1,2), y = c(1,2))+ -ggplot2::ggplot()+ -ggplot2::geom_point(ggplot2::aes(x = c(1,2), y = c(1,2)))+ -ggplot2::ggtitle('# survey units necessary to detect -species presence, given mu')+ -ggplot2::theme_minimal() -' species presence, given mu')+ -ggplot2::ggplot()+ -ggplot2::geom_point(ggplot2::aes(x = c(1,2), y = c(1,2)))+ -ggplot2::ggtitle(paste0('# survey units necessary to detect', -' species presence, given mu'))+ -ggplot2::theme_minimal() -library(eDNAjoint) -library(eDNAjoint) -data(gobyData) -goby.fit <- jointModel(data = gobyData) -library(eDNAjoint) -data(gobyData) -# fit model -goby.fit <- jointModel(data = gobyData) -# summarize false positive probability (p10) probability -jointSummarize(goby.fit$model, par = 'p10') -library(eDNAjoint) -data(gobyData) -# run the joint model with two covariates -goby.fit <- jointModel(data = gobyData, -cov = c('Filter_time','Salinity')) -# summarize false positive probability (p10) probability -jointSummarize(goby.fit$model, par = 'p10') -# summarize false positive probability (p10) probability -jointSummarize(goby.fit$model, par = 'p10') -library(eDNAjoint) -data(gobyData) -# run the joint model with two covariates -goby.fit <- jointModel(data = gobyData, -cov = c('Filter_time','Salinity')) -# summarize false positive probability probability -jointSummarize(goby.fit$model, par = 'p10') -detectionPlot(goby.fit$model, -mu.min = 0.1, mu.max = 1, -cov.val = c(0,0)) -# plot effort necessary to detect presence -detectionPlot(goby.fit$model, -mu.min = 0.1, mu.max = 1, -cov.val = c(0,0)) -# plot effort necessary to detect presence -# at turbid sites -detectionPlot(goby.fit$model, -mu.min = 0.1, mu.max = 1, -cov.val = c(1,0)) -A -modelfit <- goby.fit$model -mu.min = 0.01 -mu.max <- 1 -# create mu vector -mu <- seq(from = mu.min, to = mu.max, by = (mu.max-mu.min)/1000) -cov.val <- c(0,0) -probability <- 0.9 -qPCR.N <- 3 -# get n samples df -out <- detectionCalculate(modelfit, mu, cov.val, probability, qPCR.N) -'%>%' <- magrittr::`%>%` -out_long <- as.data.frame(out) %>% -tidyr::pivot_longer(cols =! mu, names_to = 'survey_type') -length(unique(out_long$survey_type)) -unique(out_long$survey_type) -sort(unique(out_long$survey_type)) -# get full names -if(length(unique(out_long$survey_type))==1){ -names <- 'traditional' -} else if(length(unique(out_long$survey_type))==2){ -names <- c('eDNA','traditional') -} else{ -names <- 'eDNA' -for(i in 1:(length(unique(out_long$survey_type))-1)){ -names <- c(names, paste0('traditional (gear type ',i,')')) -} } -ggplot2::ggplot(data = out_long)+ -ggplot2::geom_line(ggplot2::aes(x = mu, y = value, color = survey_type), -linewidth = 1)+ -ggplot2::labs(x = 'mu (expected catch rate)',y = '# survey units', -color = 'survey type')+ -ggplot2::scale_color_manual(values = sort(unique(out_long$survey_type)), -names = names)+ -ggplot2::theme_minimal() -ggplot2::ggplot(data = out_long)+ -ggplot2::geom_line(ggplot2::aes(x = mu, y = value, color = survey_type), -linewidth = 1)+ -ggplot2::labs(x = 'mu (expected catch rate)',y = '# survey units', -color = 'survey type')+ -ggplot2::scale_color_manual(names = sort(unique(out_long$survey_type)), -values = names)+ -ggplot2::theme_minimal() -ggplot2::ggplot(data = out_long)+ -ggplot2::geom_line(ggplot2::aes(x = mu, y = value, color = survey_type), -linewidth = 1)+ -ggplot2::labs(x = 'mu (expected catch rate)',y = '# survey units', -color = 'survey type')+ -ggplot2::scale_color_manual(values = sort(unique(out_long$survey_type)), -labels = names)+ -ggplot2::theme_minimal() -ggplot2::ggplot(data = out_long)+ -ggplot2::geom_line(ggplot2::aes(x = mu, y = value, color = survey_type), -linewidth = 1)+ -ggplot2::labs(x = 'mu (expected catch rate)',y = '# survey units', -color = 'survey type')+ -ggplot2::scale_color_manual(labels = names)+ -ggplot2::theme_minimal() -names -scales::hue_pal() -scales::hue_pal()[1:2] -scales::hue_pal(2) -scales::hue_pal()(2) -ggplot2::ggplot(data = out_long)+ -ggplot2::geom_line(ggplot2::aes(x = mu, y = value, color = survey_type), -linewidth = 1)+ -ggplot2::labs(x = 'mu (expected catch rate)',y = '# survey units', -color = 'survey type')+ -ggplot2::scale_color_manual(values = scales::hue_pal()(2), -labels = names)+ -ggplot2::theme_minimal() -out -mu.min -mu.min <- 0.1 -# create mu vector -mu <- seq(from = mu.min, to = mu.max, by = (mu.max-mu.min)/1000) -# get n samples df -out <- detectionCalculate(modelfit, mu, cov.val, probability, qPCR.N) -# convert to long df +cov = NULL +family = 'poisson' +p10priors = c(1,250) +q = FALSE +phipriors = NULL +multicore = TRUE +initial_values = NULL +n.chain = 4 +n.iter.burn = 10000 +n.iter.sample = 2500 +thin = 1 +adapt_delta = 0.99 +verbose = TRUE +seed = NULL +### +#convert data to long format '%>%' <- magrittr::`%>%` -out_long <- as.data.frame(out) %>% -tidyr::pivot_longer(cols =! mu, names_to = 'survey_type') -# get full names -if(length(unique(out_long$survey_type))==1){ -names <- 'traditional' -} else if(length(unique(out_long$survey_type))==2){ -names <- c('eDNA','traditional') -} else{ -names <- 'eDNA' -for(i in 1:(length(unique(out_long$survey_type))-1)){ -names <- c(names, paste0('traditional (gear type ',i,')')) +# convert qPCR data to long format +#' @srrstats {G2.7} Use as.data.frame() to allow input list of any tabular +#' form (i.e., matrix, etc.) +qPCR_all <- as.data.frame(data$qPCR.N) %>% +dplyr::mutate(L = 1:dim(data$qPCR.N)[1]) %>% +tidyr::pivot_longer(cols =! L,values_to = 'N') %>% +#' @srrstats {G2.15} Software does not assume non-missingness and actually +#' expects it if the number of observations across sites is unequal +tidyr::drop_na() +qPCR.K_df <- as.data.frame(data$qPCR.K) %>% +dplyr::mutate(L = 1:dim(data$qPCR.K)[1]) %>% +tidyr::pivot_longer(cols =! L,values_to = 'K') %>% +#' @srrstats {G2.15} Software does not assume non-missingness and actually +#' expects it if the number of observations across sites is unequal +tidyr::drop_na() +qPCR_all$K <- qPCR.K_df$K +# convert count data to long format +#' @srrstats {G2.7} Use as.data.frame() to allow input list of any tabular +#' form (i.e., matrix, etc.) +count_all <- as.data.frame(data$count) %>% +dplyr::mutate(L = 1:dim(data$count)[1]) %>% +tidyr::pivot_longer(cols =! L,values_to = 'count') %>% +#' @srrstats {G2.15} Software does not assume non-missingness and actually +#' expects it if the number of observations across sites is unequal +tidyr::drop_na() +# subset count data to remove sites without traditional samples +count_df <- as.data.frame(data$count) +sub_count <- count_df[rowSums(is.na(count_df)) != ncol(count_df), ] +# add site index to count data +index_match <- as.data.frame(cbind(unique(count_all$L), +1:dim(sub_count)[1])) +colnames(index_match) <- c('L','R') +count_all <- dplyr::left_join(count_all,index_match,by='L') +# get site indices with paired and unpaired dna samples +trad_ind <- index_match$L +dna_ind <- unique(qPCR_all$L)[!unique(qPCR_all$L)%in% trad_ind] +# subset qPCR data -- paired +qPCR_all_trad <- qPCR_all[qPCR_all$L %in% trad_ind,] +L_match_trad <- as.data.frame(cbind(unique(qPCR_all_trad$L), +1:length(unique(qPCR_all_trad$L)))) +colnames(L_match_trad) <- c('L','L_unique') +qPCR_all_trad <- dplyr::left_join(qPCR_all_trad,L_match_trad,by='L') +# subset qPCR data -- unpaired +if(length(dna_ind)>0){ +qPCR_all_dna <- qPCR_all[qPCR_all$L %in% dna_ind,] +L_match_dna <- as.data.frame(cbind(unique(qPCR_all_dna$L), +1:length(unique(qPCR_all_dna$L)))) +colnames(L_match_dna) <- c('L','L_unique') +qPCR_all_dna <- dplyr::left_join(qPCR_all_dna,L_match_dna,by='L') +} else { +qPCR_all_dna <- as.data.frame(matrix(NA,nrow=0,ncol=4)) +colnames(qPCR_all_dna) <- c('L','N','K','L_unique') } -} -ggplot2::ggplot(data = out_long)+ -ggplot2::geom_line(ggplot2::aes(x = mu, y = value, color = survey_type), -linewidth = 1)+ -ggplot2::labs(x = 'mu (expected catch rate)',y = '# survey units', -color = 'survey type')+ -ggplot2::scale_color_manual(values = scales::hue_pal()(length(names)), -labels = names)+ -ggplot2::theme_minimal() -fit.q <- jointModel(data = greencrabData,family = "negbin", q = TRUE, -n.chain = 1, n.iter.burn = 500, n.iter.sample = 1000) -cov.val = NULL -modelfit <- fit.q$model -# create mu vector -mu <- seq(from = mu.min, to = mu.max, by = (mu.max-mu.min)/1000) -# get n samples df -out <- detectionCalculate(modelfit, mu, cov.val, probability, qPCR.N) -# convert to long df -'%>%' <- magrittr::`%>%` -out_long <- as.data.frame(out) %>% -tidyr::pivot_longer(cols =! mu, names_to = 'survey_type') -# get full names -if(length(unique(out_long$survey_type))==1){ -names <- 'traditional' -} else if(length(unique(out_long$survey_type))==2){ -names <- c('eDNA','traditional') -} else{ -names <- 'eDNA' -for(i in 1:(length(unique(out_long$survey_type))-1)){ -names <- c(names, paste0('traditional (gear type ',i,')')) -} -} -names -ggplot2::ggplot(data = out_long)+ -ggplot2::geom_line(ggplot2::aes(x = mu, y = value, color = survey_type), -linewidth = 1)+ -ggplot2::labs(x = 'mu (expected catch rate)',y = '# survey units', -color = 'survey type') -ggplot2::ggplot(data = out_long)+ -ggplot2::geom_line(ggplot2::aes(x = mu, y = value, color = survey_type), -linewidth = 1)+ -ggplot2::labs(x = 'mu (expected catch rate)',y = '# survey units', -color = 'survey type')+ -ggplot2::scale_color_manual(values = scales::hue_pal()(length(names)), -labels = names)+ -ggplot2::theme_minimal() -roxygen2::roxygenise() -install.packages('srr') -options(repos = c( -ropenscireviewtools = "https://ropensci-review-tools.r-universe.dev", -CRAN = "https://cloud.r-project.org")) -install.packages("srr") -roxygen2::roxygenise() -GLNA_grab <- read.csv('../neha_data/GLNA_grab.csv') -OPAC_grab <- read.csv('../neha_data/OPAC_grab.csv') -PRLI_grab <- read.csv('../neha_data/PRLI_grab.csv') -GLNA_dna <- read.csv('../neha_data/eDNAJoint_GLNA_B.csv') -OPCA_dna <- read.csv('../neha_data/eDNAJoint_OPAC_BD.csv') -PRLI_dna <- read.csv('../neha_data/eDNAJoint_PRLI_AW.csv') -OPCA_grab <- read.csv('../neha_data/OPAC_grab.csv') -GLNA_grab <- read.csv('../neha_data/GLNA_grab.csv') -OPCA_grab <- read.csv('../neha_data/OPAC_grab.csv') -PRLI_grab <- read.csv('../neha_data/PRLI_grab.csv') -GLNA_dna <- read.csv('../neha_data/eDNAJoint_GLNA_B.csv') -OPCA_dna <- read.csv('../neha_data/eDNAJoint_OPAC_BD.csv') -PRLI_dna <- read.csv('../neha_data/eDNAJoint_PRLI_AW.csv') -qPCR_N_wide <- OPCA_dna %>% -pivot_wider(id_cols=site, -names_from=biological_replicate, -values_from=N) %>% -select(-site) -# K (number of positive qPCR detections per biological replicate (i.e., water sample)) -qPCR_K_wide <- OPCA_dna %>% -pivot_wider(id_cols=site, -names_from=biological_replicate, -values_from=K) %>% -select(-site) -library(tidyverse) -qPCR_N_wide <- OPCA_dna %>% -pivot_wider(id_cols=site, -names_from=biological_replicate, -values_from=N) %>% -select(-site) -# K (number of positive qPCR detections per biological replicate (i.e., water sample)) -qPCR_K_wide <- OPCA_dna %>% -pivot_wider(id_cols=site, -names_from=biological_replicate, -values_from=K) %>% -select(-site) -# counts (number of individuals in traditional survey sample) -count_wide <- OPCA_grab %>% -pivot_wider(id_cols=site, -names_from=grab_replicate, -values_from=count) %>% -select(-site) -plotdata <- data.frame( -x = rowMeans(count_wide,na.rm=TRUE), -y = rowMeans(qPCR_K_wide,na.rm=TRUE), -label = 1:dim(count_wide)[1] -) -ggplot(data=plotdata,aes(x=x,y=y))+ -geom_point()+ -geom_text(aes(label=factor(label), vjust = -1))+ -#scale_y_continuous(limits=c(3.5,6.8))+ -labs(x='mean grab count',y='mean qPCR detections')+ -ggtitle('GLNA') -ggplot(data=plotdata,aes(x=x,y=y))+ -geom_point()+ -geom_text(aes(label=factor(label), vjust = -1))+ -#scale_y_continuous(limits=c(3.5,6.8))+ -labs(x='mean grab count',y='mean qPCR detections')+ -ggtitle('OPCA') -# combine all data into one named list -# note: make sure to convert the wide dataframes into matrices +# convert p10 prior +# p10 prior: convert beta(1,20) to lognormal distribution +# moment match from beta(alpha,beta) to normal(mu, sigma^2) +alpha <- p10priors[1] +beta <- p10priors[2] +mu <- alpha/(alpha+beta) +sigma_2 <- (alpha*beta)/((alpha+beta)^2*(alpha+beta+1)) +# convert normal(mu, sigma^2) to lognormal(mu, sigma^2) +mu_ln <- log(mu^2/sqrt(mu^2+sigma_2)) +sigma_2_ln <- log(1+sigma_2/mu^2) +sigma_ln <- sqrt(sigma_2_ln) +# create data that will be present in all model variations data <- list( -qPCR.N = as.matrix(qPCR_N_wide), -qPCR.K = as.matrix(qPCR_K_wide), -count = as.matrix(count_wide) +S = nrow(qPCR_all_trad), +S_dna = nrow(qPCR_all_dna), +C = nrow(count_all), +L = qPCR_all_trad$L_unique, +L_dna = qPCR_all_dna$L_unique, +R = count_all$R, +Nloc_dna = length(unique(qPCR_all_dna$L)), +Nloc_trad = length(unique(qPCR_all_trad$L)), +trad_ind = trad_ind, +dna_ind = as.array(dna_ind), +E = count_all$count, +N = qPCR_all_trad$N, +K = qPCR_all_trad$K, +N_dna = qPCR_all_dna$N, +K_dna = qPCR_all_dna$K, +p10priors = c(mu_ln,sigma_ln), +control = list(adapt_delta = adapt_delta) +) +# run model +out <- rstan::sampling(model, +data = data, +cores = 4, +#' @srrstats {G2.4,G2.4a} explicit conversion to +#' integers for sampling arguments +chains = as.integer(n.chain), +thin = as.integer(thin), +warmup = as.integer(n.iter.burn), +iter = ( +as.integer(n.iter.burn) + as.integer(n.iter.sample) +), +init = inits, +refresh = ifelse(verbose == TRUE,500,0) ) -# OPCA grab -fit <- jointModel(data=data, family='negbin', p10priors = c(1,10)) -library(eDNAjoint) -# OPCA grab -fit <- jointModel(data=data, family='negbin', p10priors = c(1,10)) -jointSummarize(fit$model) diff --git a/R/jointModel.R b/R/jointModel.R index 39eb8eb..07722ae 100644 --- a/R/jointModel.R +++ b/R/jointModel.R @@ -399,7 +399,7 @@ jointModel <- function(data, cov = NULL, family = 'poisson', # get initial values if(isCatch_type(q)){ - inits <- get_inits(n.chain,qPCR_all,initial_values,cov,q_names,data) + inits <- get_inits(n.chain,qPCR_all,initial_values,cov,data,q_names) } else { inits <- get_inits(n.chain,qPCR_all,initial_values,cov,data) }