Skip to content

Commit

Permalink
Merge pull request #282 from USEPA/198-update-github-actions-to-inclu…
Browse files Browse the repository at this point in the history
…de-dev-branch

Large update to unit test suite including mocking invitrodb and CTX API data
  • Loading branch information
cthunes authored Aug 20, 2024
2 parents d6f5645 + f489c04 commit 558d8f6
Show file tree
Hide file tree
Showing 40 changed files with 3,914 additions and 1,197 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test-on-PR.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
pull_request:
branches: [main]
branches: [main, dev]

name: test-coverage

Expand Down
256 changes: 255 additions & 1 deletion R/data.R

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions R/tcplConf.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ tcplConf <- function (drvr = NULL, user = NULL, pass = NULL, host = NULL,

if (drvr == "API") {
options("TCPL_DRVR" = "API")
if (is.null(pass)) stop("'API' driver requires an API-key, supply it to
the 'pass' parameter. To request a key, send an
email to [email protected].")
if (is.null(pass)) stop("'API' driver requires an API-key, supply it to ",
"the 'pass' parameter. To request a key, send an ",
"email to [email protected].")
if (is.null(host)) options("TCPL_HOST" = "https://api-ccte.epa.gov/bioactivity")
register_ctx_api_key(key = pass)
}
Expand Down
3 changes: 3 additions & 0 deletions R/tcplLoadChem.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ tcplLoadChem <- function(field = NULL, val = NULL, exact = TRUE,
if (tolower(field) != "spid") stop("When drvr option is set to 'API', only 'spid' is a valid 'field' value.")
if (!exact) exact <- TRUE
dat <- tcplQueryAPI(resource = "data", fld = "spid", val = val, return_flds = c("spid", "chid", "casn", "chnm", "dsstox_substance_id"))
if (!length(colnames(dat))) {
return(dat)
}
setorder(dat, "spid")
} else {
tbl <- c("chemical", "sample")
Expand Down
6 changes: 4 additions & 2 deletions R/tcplLoadConcUnit.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ tcplLoadConcUnit <- function(spid) {

if (getOption("TCPL_DRVR") == "API") {
dat <- tcplQueryAPI(resource = "data", fld = "spid", val = spid, return_flds = c("spid", "tested_conc_unit"))
setnames(dat, "tested_conc_unit", "conc_unit")
setorder(dat, "spid")
if (length(colnames(dat))) {
setnames(dat, "tested_conc_unit", "conc_unit")
setorder(dat, "spid")
}
return(unique(dat, by = c("spid", "conc_unit")))
}

Expand Down
48 changes: 30 additions & 18 deletions R/tcplLoadData.R
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ tcplLoadData <- function(lvl, fld = NULL, val = NULL, type = "mc", add.fld = TRU
}
else if (lvl == 1L) {
sc1 <- sc_vignette[["sc1"]]
sc1 <- sc1[,c("s0id","s1id","spid","acid","aeid","apid","rowi","coli","wllt","logc","resp")]
sc1 <- sc1[,c("s0id","s1id","spid","acid","aeid","apid","rowi","coli","wllt","conc","resp")]
return(sc1)
}

Expand All @@ -121,7 +121,7 @@ tcplLoadData <- function(lvl, fld = NULL, val = NULL, type = "mc", add.fld = TRU
sc1 <- sc_vignette[["sc1"]]
sc2 <- sc_vignette[["sc2"]]
agg <- sc1[sc2, on = c("spid","aeid")]
agg <- agg[,c("aeid","s2id","s1id","s0id","logc","resp")]
agg <- agg[,c("aeid","s2id","s1id","s0id","conc","resp")]
return(agg)
}
else stop("example tables for sc0, sc1, sc2, agg available.")
Expand All @@ -146,14 +146,17 @@ tcplLoadData <- function(lvl, fld = NULL, val = NULL, type = "mc", add.fld = TRU
}
else if (lvl == 3L) {
mc3 <- mc_vignette[["mc3"]]
mc3 <- mc3[,c("m0id","m1id","m2id","m3id","spid","aeid","logc","resp","cndx","wllt","apid","rowi","coli","repi")]
mc3 <- mc3[,c("m0id","m1id","m2id","m3id","spid","aeid","conc","resp","cndx","wllt","apid","rowi","coli","repi")]
return(mc3)
}
else if (lvl == 4L) {
mc4 <- mc_vignette[["mc4"]]
if (!add.fld) {
mc4 <- mc4[,c("m4id","aeid","spid","bmad","resp_max","resp_min","max_mean","max_mean_conc","max_med","max_med_conc",
"logc_max","logc_min","nconc","npts","nrep","nmed_gtbl")]
mc4 <- mc4[,c("m4id", "aeid", "spid", "bmad", "resp_max", "resp_min",
"max_mean", "max_mean_conc", "min_mean", "min_mean_conc",
"max_med", "max_med_conc", "min_med", "min_med_conc",
"max_med_diff", "max_med_diff_conc", "conc_max", "conc_min",
"nconc", "npts", "nrep", "nmed_gtbl_pos", "nmed_gtbl_neg")]
} else {
mc4 <- mc4[,!c("chid","casn","chnm","dsstox_substance_id","code","aenm","resp_unit","conc_unit")]
setcolorder(mc4, c("m4id", "aeid", "spid"))
Expand All @@ -163,10 +166,15 @@ tcplLoadData <- function(lvl, fld = NULL, val = NULL, type = "mc", add.fld = TRU
else if (lvl == 5L) {
mc5 <- mc_vignette[["mc5"]]
if (!add.fld){
mc5 <- mc5[,c("m5id","m4id","aeid","spid","bmad","resp_max","resp_min","max_mean","max_mean_conc","max_med",
"max_med_conc","logc_max","logc_min","nconc","npts","nrep","nmed_gtbl","hitc","modl","fitc","coff")]
mc5 <- mc5[,c("m5id","m4id", "aeid", "spid", "bmad", "resp_max", "resp_min",
"max_mean", "max_mean_conc", "min_mean", "min_mean_conc",
"max_med", "max_med_conc", "min_med", "min_med_conc",
"max_med_diff", "max_med_diff_conc", "conc_max", "conc_min",
"nconc", "npts", "nrep", "nmed_gtbl_pos", "nmed_gtbl_neg",
"hitc", "modl", "fitc", "coff")]
} else {
mc5 <- mc5[,!c("chid","casn","chnm","dsstox_substance_id","code","aenm","resp_unit","conc_unit","tp","ga","q","la","ac50_loss")]
mc5 <- mc5[,!c("chid","casn","chnm","dsstox_substance_id","code","aenm",
"resp_unit","conc_unit","tp","ga","q","la","ac50_loss")]
setcolorder(mc5, c("m5id", "m4id","aeid", "spid"))
}
return(mc5)
Expand All @@ -175,7 +183,8 @@ tcplLoadData <- function(lvl, fld = NULL, val = NULL, type = "mc", add.fld = TRU
mc3 <- mc_vignette[["mc3"]]
mc4 <- mc_vignette[["mc4"]]
agg <- mc3[mc4, on = c("spid","aeid")]
agg <- agg[, c("aeid", "m4id", "m3id", "m2id", "m1id", "m0id", "spid", "logc", "resp")]
agg <- agg[, c("aeid", "m4id", "m3id", "m2id", "m1id", "m0id", "spid",
"conc", "resp")]
return(agg)

}
Expand Down Expand Up @@ -204,15 +213,18 @@ tcplLoadData <- function(lvl, fld = NULL, val = NULL, type = "mc", add.fld = TRU
# query the API
dat <- tcplQueryAPI(fld = fld, val = val, return_flds = cols)

if (lvl == 3) {
dat$resp <- lapply(dat$resp, unlist)
dat$logc <- lapply(dat$logc, unlist)
dat <- unnest_longer(dat, c(conc, logc, resp)) %>% as.data.table()
}

if (lvl == 6) {
dat$flag <- lapply(dat$flag, unlist)
dat <- unnest_longer(dat, flag) %>% filter(flag != "NULL") %>% as.data.table()
if (length(colnames(dat))) {
if (lvl == 3 | lvl == "agg") {
dat$resp <- lapply(dat$resp, unlist)
dat$logc <- lapply(dat$logc, unlist)
if (lvl == 3) dat <- unnest_longer(dat, c(conc, logc, resp)) %>% as.data.table()
else dat <- unnest_longer(dat, c(logc, resp)) %>% as.data.table()
}

if (lvl == 6) {
dat$flag <- lapply(dat$flag, unlist)
dat <- unnest_longer(dat, flag) %>% filter(flag != "NULL") %>% as.data.table()
}
}

return(dat)
Expand Down
14 changes: 8 additions & 6 deletions R/tcplPlot.R
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ tcplPlotlyPlot <- function(dat, lvl = 5){
# extract range from level 3 data for creating plotting all the functions
# increase resolution to get smoother curves
resolution <- 100
x_min_max <- range(l3_dat_both$conc)
x_min_max <- range(l3_dat_both$conc, na.rm=TRUE)
#if the overall minimum conc is greater than 0 (test wells)
if (x_min_max[1] > 0) {
hline_range <- 10^(seq(from = log10(x_min_max[1]/100), to = log10(x_min_max[2]*100), length.out = resolution))
Expand Down Expand Up @@ -413,11 +413,13 @@ tcplPlotlyPlot <- function(dat, lvl = 5){
}

# compare data
if (!is.null(compare.dat$coff) && compare.dat$max_med < 0) {
compare.dat$coff <- compare.dat$coff * -1
}
if (!is.null(compare.dat$coff) && !is.null(compare.dat$hitc) && compare.dat$hitc < 0) {
compare.dat$coff <- compare.dat$coff * -1
if (nrow(compare.dat) > 0) {
if (!is.null(compare.dat$coff) && compare.dat$max_med < 0) {
compare.dat$coff <- compare.dat$coff * -1
}
if (!is.null(compare.dat$coff) && !is.null(compare.dat$hitc) && compare.dat$hitc < 0) {
compare.dat$coff <- compare.dat$coff * -1
}
}
}

Expand Down
3 changes: 2 additions & 1 deletion R/tcplPlotUtils.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,15 @@ tcplPlotSetYRange <- function(dat,yuniform,yrange,type){
}


tcplPlotValidate <- function(type = "mc",flags = NULL,output = "none",multi = FALSE,verbose = FALSE){
tcplPlotValidate <- function(type = "mc",flags = NULL,output = "none",multi = NULL,verbose = FALSE){

# set lvl based on type
lvl <- 5
if (type == "sc") {
lvl <- 2
if (flags == TRUE) {
warning("'flags' was set to TRUE - no flags exist for plotting single concentration")
flags = FALSE
}
}

Expand Down
161 changes: 161 additions & 0 deletions data-raw/mc_test.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#==============================================================================#
# NOTE: This script is written such that it is run from 'top' to 'bottom'
# or programmatically via the Terminal.
# ('R CMD BATCH --vanilla <script.R>'.)
# Please do not jump around when running this script.
#==============================================================================#
# NOTE: You MUST temporarily update tcplQuery() by adding a line at the top of the
# function: print(query).
# This is because the queries will be captured from output and saved as
# part of the mocking data.
#==============================================================================#
## r packages
devtools::load_all()

library(here)
library(dplyr)
library(stringr)
#---------------------------#
## code to prepare `mc_test` dataset goes here
# source the user ID, password, host, and database information for connection
# - NOTE: To replicate one will need to save their own 'db_cred.R', including
# the 'userid', 'userpwd', 'host', and DB collection via 'ivtdb'.
source(file = here::here("data-raw/db_cred.R"),verbose = FALSE)
# connect to the DB
tcplConf(user = userid,
pass = userpwd,
host = host,
db = ivtdb,
drvr = "MySQL")

# pick endpoints and ids
# load the number of rows and max hitc per aeid
mc5_counts <- tcplQuery("SELECT DISTINCT aeid,
COUNT( aeid ) as n,
max(hitc) as max_hitc
FROM invitrodb.mc5 GROUP BY aeid")
# filter to only include where at least one sample is active and n < 10
mc5_counts <- mc5_counts %>% filter(max_hitc > 0.9 & n == 2)
# pick one aeid
aeid <- selected <- mc5_counts[sample(1:nrow(mc5_counts),size = 1,replace = FALSE),aeid]
# obtain the acid for the example dataset
acid <- tcplLoadAcid(fld = 'aeid',val = aeid)$acid
# pick one sample/row from each level (lvl 3 contains ids back to lvl 0 and lvl 6 does back to lvl 4)
l3 <- tcplLoadData(lvl = 3, fld = "acid", val = acid)
l3_sample1 <- l3[sample(1:nrow(l3),size = 1,replace = FALSE)]
l3_sample2 <- l3[sample(1:nrow(l3),size = 2,replace = FALSE)]
l5 <- tcplLoadData(lvl = 5, fld = "aeid", val = aeid, add.fld = FALSE)
l5_sample1 <- l5[sample(1:nrow(l5),size = 1,replace = FALSE)]
l5_sample2 <- l5[sample(1:nrow(l5),size = 2,replace = FALSE)]
l6 <- tcplLoadData(lvl = 6, fld = "aeid", val = aeid, add.fld = FALSE)
l6_sample1 <- l6[sample(1:nrow(l6),size = 1,replace = FALSE)]
l6_sample2 <- l6[sample(1:nrow(l6),size = 2,replace = FALSE)]
l7 <- tcplLoadData(lvl = 7, fld = "aeid", val = aeid, add.fld = FALSE)
l7_sample1 <- l7[sample(1:nrow(l7),size = 1,replace = FALSE)]
l7_sample2 <- l7[sample(1:nrow(l7),size = 2,replace = FALSE)]
# pick compare.val endpoints and ids
# be sure to only allow to choose from endpoints with the same number of samples
mc5_counts <- filter(mc5_counts, n == mc5_counts[aeid == selected]$n & aeid != selected)
compare.aeid <- mc5_counts[sample(1:nrow(mc5_counts),size = 1,replace = FALSE),aeid]
compare.l5 <- tcplLoadData(lvl = 5, fld = "aeid", val = compare.aeid)
compare.l5_sample1 <- compare.l5[sample(1:nrow(compare.l5),size = 1,replace = FALSE)]
compare.l5_sample2 <- compare.l5[sample(1:nrow(compare.l5),size = 2,replace = FALSE)]


get_query_data <- function(lvl, fld, val, compare.val = NULL, add.fld = TRUE, func = "tcplLoadData") {
message(compare.val)
if (func == "tcplLoadData") {
# IMPORTANT || MUST ADD TEMPORARY LINE TO TCPLQUERY --------------------------
# add temporary line to top of tcplQuery to get the query string: print(query)
query_strings <- capture.output(result<-tcplLoadData(lvl = lvl, fld = fld, val = val, add.fld = add.fld))
} else if (func == "tcplPlot") {
query_strings <- capture.output(result<-tcplPlot(type = "mc", fld = fld,
val = val, compare.val = compare.val,
output = "pdf", multi = TRUE, flags = TRUE,
fileprefix = "temp_tcplPlot"))
file.remove(stringr::str_subset(list.files(), "^temp_tcplPlot")) # clean up
}

query_strings <- unique(gsub("\\\\", "\\\"", gsub("\"", "", gsub("\\\\n", "\\\n", gsub("\\[1\\] ", "", query_strings)))))

# use queries to save data
dat <- lapply(query_strings, function(query_string) {
return(tcplQuery(query_string))
})
names(dat) <- query_strings

# also store fld and val in list object for use in test case
dat[fld] <- val
if (!is.null(compare.val)) dat[sprintf("compare.%s", fld)] <- compare.val
return(dat)

}


# to add more tests with new/different data to test-tcplLoadData.R, add lines below and run script
mc_test <- list(
tcplConfQuery = tcplQuery("SHOW VARIABLES LIKE 'max_allowed_packet'"),
mc0_by_m0id = get_query_data(lvl = 0, fld = "m0id", val = l3_sample1$m0id),
mc0_by_acid = get_query_data(lvl = 0, fld = "acid", val = acid),
mc1_by_m1id = get_query_data(lvl = 1, fld = "m1id", val = l3_sample1$m1id),
mc1_by_acid = get_query_data(lvl = 1, fld = "acid", val = acid),
mc2_by_m2id = get_query_data(lvl = 2, fld = "m2id", val = l3_sample1$m2id),
mc2_by_acid = get_query_data(lvl = 2, fld = "acid", val = acid),
mc3_by_m3id = get_query_data(lvl = 3, fld = "m3id", val = l3_sample1$m3id),
mc3_by_aeid = get_query_data(lvl = 3, fld = "aeid", val = aeid),
mc4_by_m4id = get_query_data(lvl = 4, fld = "m4id", val = l5_sample1$m4id),
mc4_by_aeid = get_query_data(lvl = 4, fld = "aeid", val = aeid, add.fld = FALSE),
mc5_by_m5id = get_query_data(lvl = 5, fld = "m5id", val = l5_sample1$m5id),
mc5_by_aeid = get_query_data(lvl = 5, fld = "aeid", val = aeid, add.fld = FALSE),
mc6_by_m6id = get_query_data(lvl = 6, fld = "m6id", val = l6_sample1$m6id),
mc6_by_aeid = get_query_data(lvl = 6, fld = "aeid", val = aeid),
mc7_by_m7id = get_query_data(lvl = 7, fld = "m7id", val = l7_sample1$m7id),
mc7_by_aeid = get_query_data(lvl = 7, fld = "aeid", val = aeid),
mcagg_by_aeid = get_query_data(lvl = "agg", fld = "aeid", val = aeid),
plot_single_m4id = get_query_data(fld = "m4id",
val = l5_sample1$m4id,
func = "tcplPlot"),
plot_multiple_m4id = get_query_data(fld = "m4id",
val = list(l5_sample2$m4id),
func = "tcplPlot"),
plot_single_aeid = get_query_data(fld = "aeid",
val = aeid,
func = "tcplPlot"),
plot_multiple_aeid = get_query_data(fld = "aeid",
val = list(c(aeid, compare.aeid)),
func = "tcplPlot"),
plot_single_spid = get_query_data(fld = c("spid", "aeid"),
val = list(l5_sample1$spid, aeid),
func = "tcplPlot"),
plot_multiple_spid = get_query_data(fld = c("spid", "aeid"),
val = list(l5_sample2$spid, aeid),
func = "tcplPlot"),
plot_single_m4id_compare = get_query_data(fld = "m4id",
val = l5_sample1$m4id,
compare.val = compare.l5_sample1$m4id,
func = "tcplPlot"),
plot_multiple_m4id_compare = get_query_data(fld = "m4id",
val = list(l5_sample2$m4id),
compare.val = list(compare.l5_sample2$m4id),
func = "tcplPlot"),
plot_single_aeid_compare = get_query_data(fld = "aeid",
val = aeid,
compare.val = compare.aeid,
func = "tcplPlot"),
plot_multiple_aeid_compare = get_query_data(fld = "aeid",
val = list(c(aeid, compare.aeid)),
compare.val = list(c(compare.aeid, aeid)),
func = "tcplPlot"),
plot_single_spid_compare = get_query_data(fld = c("spid", "aeid"),
val = list(l5_sample1$spid, aeid),
compare.val = list(compare.l5_sample1$spid, compare.aeid),
func = "tcplPlot"),
plot_multiple_spid_compare = get_query_data(fld = c("spid", "aeid"),
val = list(l5_sample2$spid, aeid),
compare.val = list(compare.l5_sample2$spid, compare.aeid),
func = "tcplPlot")
)
#---------------------------#
## save the data
usethis::use_data(mc_test, overwrite = TRUE)
#---------------------------#
Loading

0 comments on commit 558d8f6

Please sign in to comment.