From 43a92cec201cdba7c83c6b5d6aaaff5bf5122ea8 Mon Sep 17 00:00:00 2001 From: Kirtana Veeraraghavan Date: Thu, 1 Aug 2024 09:30:08 -0400 Subject: [PATCH 1/2] ammend make_evidence_qc_table.py to add scramble metrics to the qc table and update wdl inputs to script accordingly --- .../scripts/make_evidence_qc_table.py | 31 +++++++++++++++---- wdl/EvidenceQC.wdl | 2 ++ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/sv-pipeline/scripts/make_evidence_qc_table.py b/src/sv-pipeline/scripts/make_evidence_qc_table.py index 9b1c3b633..566a0f334 100644 --- a/src/sv-pipeline/scripts/make_evidence_qc_table.py +++ b/src/sv-pipeline/scripts/make_evidence_qc_table.py @@ -117,12 +117,13 @@ def read_outlier(filename: str, outlier_col_label: str) -> pd.DataFrame: return outlier_df -def read_all_outlier(outlier_manta_df: pd.DataFrame, outlier_melt_df: pd.DataFrame, outlier_wham_df: pd.DataFrame, outlier_type: str) -> pd.DataFrame: +def read_all_outlier(outlier_manta_df: pd.DataFrame, outlier_melt_df: pd.DataFrame, outlier_wham_df: pd.DataFrame, outlier_scramble_df: pd.DataFrame,outlier_type: str) -> pd.DataFrame: """ Args: outlier_manta_df: Outliers determined in EvidenceQC for Manta. outlier_melt_df: Outliers determined in EvidenceQC for MELT. outlier_wham_df: Outliers determined in EvidenceQC for Wham. + outlier_scramble_df: Outliers determined in EvidenceQC for Scramble outlier_type: high or low. Determined in EvidenceQC for each of the three callers. Returns: The total number of times that a sample appears as an outlier @@ -140,8 +141,12 @@ def read_all_outlier(outlier_manta_df: pd.DataFrame, outlier_melt_df: pd.DataFra col_name = get_col_name("wham", outlier_type) dict_wham = dict(zip(outlier_wham_df[ID_COL], outlier_wham_df[col_name])) + # Scramble: + col_name = get_col_name("scramble", outlier_type) + dict_scramble = dict(zip(outlier_scramble_df[ID_COL], outlier_scramble_df[col_name])) + # merging all the dictionaries - outlier_dicts = [dict_manta, dict_melt, dict_wham] + outlier_dicts = [dict_manta, dict_melt, dict_wham, dict_scramble] merged_dicts = Counter() for counted in outlier_dicts: merged_dicts.update(counted) @@ -162,9 +167,11 @@ def merge_evidence_qc_table( filename_high_manta: str, filename_high_melt: str, filename_high_wham: str, + filename_high_scramble: str, filename_low_manta: str, filename_low_melt: str, filename_low_wham: str, + filename_low_scramble: str, filename_melt_insert_size: str, output_prefix: str) -> None: """ @@ -178,17 +185,19 @@ def merge_evidence_qc_table( df_manta_high_outlier = read_outlier(filename_high_manta, get_col_name("manta", "high")) df_melt_high_outlier = read_outlier(filename_high_melt, get_col_name("melt", "high")) df_wham_high_outlier = read_outlier(filename_high_wham, get_col_name("wham", "high")) - df_total_high_outliers = read_all_outlier(df_manta_high_outlier, df_melt_high_outlier, df_wham_high_outlier, "high") + df_scramble_high_outlier = read_outlier(filename_high_scramble, get_col_name("scramble", "high")) + df_total_high_outliers = read_all_outlier(df_manta_high_outlier, df_melt_high_outlier, df_wham_high_outlier, df_scramble_high_outlier, "high") df_manta_low_outlier = read_outlier(filename_low_manta, get_col_name("manta", "low")) df_melt_low_outlier = read_outlier(filename_low_melt, get_col_name("melt", "low")) df_wham_low_outlier = read_outlier(filename_low_wham, get_col_name("wham", "low")) - df_total_low_outliers = read_all_outlier(df_manta_low_outlier, df_melt_low_outlier, df_wham_low_outlier, "low") + df_scramble_low_outlier = read_outlier(filename_low_scramble, get_col_name("scramble", "low")) + df_total_low_outliers = read_all_outlier(df_manta_low_outlier, df_melt_low_outlier, df_wham_low_outlier, df_scramble_low_outlier, "low") df_melt_insert_size = read_melt_insert_size(filename_melt_insert_size) # all data frames dfs = [df_ploidy, df_bincov_median, df_wgd_scores, df_non_diploid, df_manta_high_outlier, - df_melt_high_outlier, df_wham_high_outlier, df_total_high_outliers, - df_manta_low_outlier, df_melt_low_outlier, df_wham_low_outlier, df_total_low_outliers, + df_melt_high_outlier, df_wham_high_outlier, df_scramble_high_outlier, df_total_high_outliers, + df_manta_low_outlier, df_melt_low_outlier, df_wham_low_outlier, df_scramble_low_outlier, df_total_low_outliers, df_melt_insert_size] for df in dfs: df[ID_COL] = df[ID_COL].astype(object) @@ -241,6 +250,14 @@ def main(): "-r", "--wham-qc-outlier-low-filename", help="Sets the filename containing Wham QC outlier low.") + parser.add_argument( + "-x", "--scramble-qc-outlier-low-filename", + help="Sets the filename containing Scramble QC outlier low.") + + parser.add_argument( + "-t", "--scramble-qc-outlier-high-filename", + help="Sets the filename containing Scramble QC outlier high.") + parser.add_argument( "-m", "--melt-insert-size-filename", help="Sets the filename containing Melt insert size. " @@ -263,9 +280,11 @@ def main(): args.manta_qc_outlier_high_filename, args.melt_qc_outlier_high_filename, args.wham_qc_outlier_high_filename, + args.scramble_qc_outlier_high_filename, args.manta_qc_outlier_low_filename, args.melt_qc_outlier_low_filename, args.wham_qc_outlier_low_filename, + args.scramble_qc_outlier_low_filename, args.melt_insert_size_filename, args.output_prefix) diff --git a/wdl/EvidenceQC.wdl b/wdl/EvidenceQC.wdl index 91daa1ab4..16928cd50 100644 --- a/wdl/EvidenceQC.wdl +++ b/wdl/EvidenceQC.wdl @@ -246,9 +246,11 @@ task MakeQcTable { ~{"--manta-qc-outlier-high-filename " + manta_qc_high} \ ~{"--melt-qc-outlier-high-filename " + melt_qc_high} \ ~{"--wham-qc-outlier-high-filename " + wham_qc_high} \ + ~{"--scramble-qc-outlier-high-filename " + scramble_qc_high} \ ~{"--manta-qc-outlier-low-filename " + manta_qc_low} \ ~{"--melt-qc-outlier-low-filename " + melt_qc_low} \ ~{"--wham-qc-outlier-low-filename " + wham_qc_low} \ + ~{"--scramble-qc-outlier-low-filename " + scramble_qc_low} \ ~{if (length(melt_insert_size) > 0) then "--melt-insert-size mean_insert_size.tsv" else ""} \ ~{"--output-prefix " + output_prefix} >>> From d2ece9f64d7ae504a27682597ed7550e791937f0 Mon Sep 17 00:00:00 2001 From: Kirtana Veeraraghavan Date: Thu, 1 Aug 2024 09:35:33 -0400 Subject: [PATCH 2/2] address linting issue --- src/sv-pipeline/scripts/make_evidence_qc_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sv-pipeline/scripts/make_evidence_qc_table.py b/src/sv-pipeline/scripts/make_evidence_qc_table.py index 566a0f334..128af0e21 100644 --- a/src/sv-pipeline/scripts/make_evidence_qc_table.py +++ b/src/sv-pipeline/scripts/make_evidence_qc_table.py @@ -117,7 +117,7 @@ def read_outlier(filename: str, outlier_col_label: str) -> pd.DataFrame: return outlier_df -def read_all_outlier(outlier_manta_df: pd.DataFrame, outlier_melt_df: pd.DataFrame, outlier_wham_df: pd.DataFrame, outlier_scramble_df: pd.DataFrame,outlier_type: str) -> pd.DataFrame: +def read_all_outlier(outlier_manta_df: pd.DataFrame, outlier_melt_df: pd.DataFrame, outlier_wham_df: pd.DataFrame, outlier_scramble_df: pd.DataFrame, outlier_type: str) -> pd.DataFrame: """ Args: outlier_manta_df: Outliers determined in EvidenceQC for Manta.