Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Scramble metrics to the qc table generated in EvidenceQC #702

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 25 additions & 6 deletions src/sv-pipeline/scripts/make_evidence_qc_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,13 @@ def read_outlier(filename: str, outlier_col_label: str) -> pd.DataFrame:
return outlier_df


def read_all_outlier(outlier_manta_df: pd.DataFrame, outlier_melt_df: pd.DataFrame, outlier_wham_df: pd.DataFrame, outlier_type: str) -> pd.DataFrame:
def read_all_outlier(outlier_manta_df: pd.DataFrame, outlier_melt_df: pd.DataFrame, outlier_wham_df: pd.DataFrame, outlier_scramble_df: pd.DataFrame, outlier_type: str) -> pd.DataFrame:
"""
Args:
outlier_manta_df: Outliers determined in EvidenceQC for Manta.
outlier_melt_df: Outliers determined in EvidenceQC for MELT.
outlier_wham_df: Outliers determined in EvidenceQC for Wham.
outlier_scramble_df: Outliers determined in EvidenceQC for Scramble
outlier_type: high or low. Determined in EvidenceQC for each of the three callers.
Returns:
The total number of times that a sample appears as an outlier
Expand All @@ -140,8 +141,12 @@ def read_all_outlier(outlier_manta_df: pd.DataFrame, outlier_melt_df: pd.DataFra
col_name = get_col_name("wham", outlier_type)
dict_wham = dict(zip(outlier_wham_df[ID_COL], outlier_wham_df[col_name]))

# Scramble:
col_name = get_col_name("scramble", outlier_type)
dict_scramble = dict(zip(outlier_scramble_df[ID_COL], outlier_scramble_df[col_name]))

# merging all the dictionaries
outlier_dicts = [dict_manta, dict_melt, dict_wham]
outlier_dicts = [dict_manta, dict_melt, dict_wham, dict_scramble]
merged_dicts = Counter()
for counted in outlier_dicts:
merged_dicts.update(counted)
Expand All @@ -162,9 +167,11 @@ def merge_evidence_qc_table(
filename_high_manta: str,
filename_high_melt: str,
filename_high_wham: str,
filename_high_scramble: str,
filename_low_manta: str,
filename_low_melt: str,
filename_low_wham: str,
filename_low_scramble: str,
filename_melt_insert_size: str,
output_prefix: str) -> None:
"""
Expand All @@ -178,17 +185,19 @@ def merge_evidence_qc_table(
df_manta_high_outlier = read_outlier(filename_high_manta, get_col_name("manta", "high"))
df_melt_high_outlier = read_outlier(filename_high_melt, get_col_name("melt", "high"))
df_wham_high_outlier = read_outlier(filename_high_wham, get_col_name("wham", "high"))
df_total_high_outliers = read_all_outlier(df_manta_high_outlier, df_melt_high_outlier, df_wham_high_outlier, "high")
df_scramble_high_outlier = read_outlier(filename_high_scramble, get_col_name("scramble", "high"))
df_total_high_outliers = read_all_outlier(df_manta_high_outlier, df_melt_high_outlier, df_wham_high_outlier, df_scramble_high_outlier, "high")
df_manta_low_outlier = read_outlier(filename_low_manta, get_col_name("manta", "low"))
df_melt_low_outlier = read_outlier(filename_low_melt, get_col_name("melt", "low"))
df_wham_low_outlier = read_outlier(filename_low_wham, get_col_name("wham", "low"))
df_total_low_outliers = read_all_outlier(df_manta_low_outlier, df_melt_low_outlier, df_wham_low_outlier, "low")
df_scramble_low_outlier = read_outlier(filename_low_scramble, get_col_name("scramble", "low"))
df_total_low_outliers = read_all_outlier(df_manta_low_outlier, df_melt_low_outlier, df_wham_low_outlier, df_scramble_low_outlier, "low")
df_melt_insert_size = read_melt_insert_size(filename_melt_insert_size)

# all data frames
dfs = [df_ploidy, df_bincov_median, df_wgd_scores, df_non_diploid, df_manta_high_outlier,
df_melt_high_outlier, df_wham_high_outlier, df_total_high_outliers,
df_manta_low_outlier, df_melt_low_outlier, df_wham_low_outlier, df_total_low_outliers,
df_melt_high_outlier, df_wham_high_outlier, df_scramble_high_outlier, df_total_high_outliers,
df_manta_low_outlier, df_melt_low_outlier, df_wham_low_outlier, df_scramble_low_outlier, df_total_low_outliers,
df_melt_insert_size]
for df in dfs:
df[ID_COL] = df[ID_COL].astype(object)
Expand Down Expand Up @@ -241,6 +250,14 @@ def main():
"-r", "--wham-qc-outlier-low-filename",
help="Sets the filename containing Wham QC outlier low.")

parser.add_argument(
"-x", "--scramble-qc-outlier-low-filename",
help="Sets the filename containing Scramble QC outlier low.")

parser.add_argument(
"-t", "--scramble-qc-outlier-high-filename",
help="Sets the filename containing Scramble QC outlier high.")

parser.add_argument(
"-m", "--melt-insert-size-filename",
help="Sets the filename containing Melt insert size. "
Expand All @@ -263,9 +280,11 @@ def main():
args.manta_qc_outlier_high_filename,
args.melt_qc_outlier_high_filename,
args.wham_qc_outlier_high_filename,
args.scramble_qc_outlier_high_filename,
args.manta_qc_outlier_low_filename,
args.melt_qc_outlier_low_filename,
args.wham_qc_outlier_low_filename,
args.scramble_qc_outlier_low_filename,
args.melt_insert_size_filename,
args.output_prefix)

Expand Down
2 changes: 2 additions & 0 deletions wdl/EvidenceQC.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,11 @@ task MakeQcTable {
~{"--manta-qc-outlier-high-filename " + manta_qc_high} \
~{"--melt-qc-outlier-high-filename " + melt_qc_high} \
~{"--wham-qc-outlier-high-filename " + wham_qc_high} \
~{"--scramble-qc-outlier-high-filename " + scramble_qc_high} \
~{"--manta-qc-outlier-low-filename " + manta_qc_low} \
~{"--melt-qc-outlier-low-filename " + melt_qc_low} \
~{"--wham-qc-outlier-low-filename " + wham_qc_low} \
~{"--scramble-qc-outlier-low-filename " + scramble_qc_low} \
~{if (length(melt_insert_size) > 0) then "--melt-insert-size mean_insert_size.tsv" else ""} \
~{"--output-prefix " + output_prefix}
>>>
Expand Down
Loading