diff --git a/build.gradle.kts b/build.gradle.kts index 3600d40ed..b7554fd79 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -134,13 +134,13 @@ val toObfuscate: Configuration by configurations.creating { val obfuscationLibs: Configuration by configurations.creating -val mixcrAlgoVersion = "4.7.0-2-fix-freeze" +val mixcrAlgoVersion = "4.7.0-18-mitool-tag-types" // may be blank (will be inherited from mixcr-algo) -val milibVersion = "3.5.0-1-add-progress-to-fasta" +val milibVersion = "" // may be blank (will be inherited from mixcr-algo or milib) val miuVersion = "" // may be blank (will be inherited from mixcr-algo) -val mitoolVersion = "" +val mitoolVersion = "2.3.0-17-mitool-tag-types" // may be blank (will be inherited from mixcr-algo) val repseqioVersion = "" diff --git a/changelogs/v4.7.1.md b/changelogs/v4.7.1.md index 98a4e1b29..10cf9cbd3 100644 --- a/changelogs/v4.7.1.md +++ b/changelogs/v4.7.1.md @@ -1,3 +1,8 @@ ## 🛠️ Other improvements & fixes - Tracking the progres of fasta[.gz] inputs on `align`. + + +## 📚 New Presets + +- Added preset `bruker-human-sc-xcr-vdj-beacon` for TCR/BCR analyses of Bruker Beacon data \ No newline at end of file diff --git a/regression/presets/analyze/10x-sc-xcr-vdj.yaml b/regression/presets/analyze/10x-sc-xcr-vdj.yaml index b682481ad..5ab31db0a 100644 --- a/regression/presets/analyze/10x-sc-xcr-vdj.yaml +++ b/regression/presets/analyze/10x-sc-xcr-vdj.yaml @@ -20,10 +20,11 @@ mitool: trimmingQualityThreshold: null trimmingWindowSize: null replaceWildcards: true + splitBySample: true refineTags: - tags: - - CELL - - UMI + tagTypes: + - Cell + - Molecule whitelists: CELL: builtin:737K-august-2016 parameters: @@ -65,11 +66,11 @@ mitool: minRecordWeightForUse: 0 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - - UMI - payloadTags: - - R2 + groupingTagTypes: + - Cell + - Molecule + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true @@ -102,11 +103,11 @@ mitool: minRecordWeightForUse: 0 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - - UMI - payloadTags: - - R2 + groupingTagTypes: + - Cell + - Molecule + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true @@ -139,10 +140,10 @@ mitool: minRecordWeightForUse: 0 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - payloadTags: - - R2 + groupingTagTypes: + - Cell + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true @@ -175,10 +176,10 @@ mitool: minRecordWeightForUse: 2 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - payloadTags: - - R2 + groupingTagTypes: + - Cell + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true diff --git a/regression/presets/analyze/10x-vdj-bcr-full-length.yaml b/regression/presets/analyze/10x-vdj-bcr-full-length.yaml index b682481ad..5ab31db0a 100644 --- a/regression/presets/analyze/10x-vdj-bcr-full-length.yaml +++ b/regression/presets/analyze/10x-vdj-bcr-full-length.yaml @@ -20,10 +20,11 @@ mitool: trimmingQualityThreshold: null trimmingWindowSize: null replaceWildcards: true + splitBySample: true refineTags: - tags: - - CELL - - UMI + tagTypes: + - Cell + - Molecule whitelists: CELL: builtin:737K-august-2016 parameters: @@ -65,11 +66,11 @@ mitool: minRecordWeightForUse: 0 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - - UMI - payloadTags: - - R2 + groupingTagTypes: + - Cell + - Molecule + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true @@ -102,11 +103,11 @@ mitool: minRecordWeightForUse: 0 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - - UMI - payloadTags: - - R2 + groupingTagTypes: + - Cell + - Molecule + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true @@ -139,10 +140,10 @@ mitool: minRecordWeightForUse: 0 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - payloadTags: - - R2 + groupingTagTypes: + - Cell + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true @@ -175,10 +176,10 @@ mitool: minRecordWeightForUse: 2 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - payloadTags: - - R2 + groupingTagTypes: + - Cell + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true diff --git a/regression/presets/analyze/10x-vdj-bcr.yaml b/regression/presets/analyze/10x-vdj-bcr.yaml index b682481ad..5ab31db0a 100644 --- a/regression/presets/analyze/10x-vdj-bcr.yaml +++ b/regression/presets/analyze/10x-vdj-bcr.yaml @@ -20,10 +20,11 @@ mitool: trimmingQualityThreshold: null trimmingWindowSize: null replaceWildcards: true + splitBySample: true refineTags: - tags: - - CELL - - UMI + tagTypes: + - Cell + - Molecule whitelists: CELL: builtin:737K-august-2016 parameters: @@ -65,11 +66,11 @@ mitool: minRecordWeightForUse: 0 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - - UMI - payloadTags: - - R2 + groupingTagTypes: + - Cell + - Molecule + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true @@ -102,11 +103,11 @@ mitool: minRecordWeightForUse: 0 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - - UMI - payloadTags: - - R2 + groupingTagTypes: + - Cell + - Molecule + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true @@ -139,10 +140,10 @@ mitool: minRecordWeightForUse: 0 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - payloadTags: - - R2 + groupingTagTypes: + - Cell + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true @@ -175,10 +176,10 @@ mitool: minRecordWeightForUse: 2 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - payloadTags: - - R2 + groupingTagTypes: + - Cell + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true diff --git a/regression/presets/analyze/10x-vdj-tcr-alias-multi-barcode-test.yaml b/regression/presets/analyze/10x-vdj-tcr-alias-multi-barcode-test.yaml index 243638b9d..d5fecb92a 100644 --- a/regression/presets/analyze/10x-vdj-tcr-alias-multi-barcode-test.yaml +++ b/regression/presets/analyze/10x-vdj-tcr-alias-multi-barcode-test.yaml @@ -20,10 +20,11 @@ mitool: trimmingQualityThreshold: null trimmingWindowSize: null replaceWildcards: true + splitBySample: true refineTags: - tags: - - CELL - - UMI + tagTypes: + - Cell + - Molecule whitelists: CELL: builtin:737K-august-2016 parameters: @@ -65,11 +66,11 @@ mitool: minRecordWeightForUse: 0 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - - UMI - payloadTags: - - R2 + groupingTagTypes: + - Cell + - Molecule + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true @@ -102,11 +103,11 @@ mitool: minRecordWeightForUse: 0 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - - UMI - payloadTags: - - R2 + groupingTagTypes: + - Cell + - Molecule + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true @@ -139,10 +140,10 @@ mitool: minRecordWeightForUse: 0 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - payloadTags: - - R2 + groupingTagTypes: + - Cell + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true @@ -175,10 +176,10 @@ mitool: minRecordWeightForUse: 2 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - payloadTags: - - R2 + groupingTagTypes: + - Cell + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true diff --git a/regression/presets/analyze/10x-vdj-tcr-full-length.yaml b/regression/presets/analyze/10x-vdj-tcr-full-length.yaml index b682481ad..5ab31db0a 100644 --- a/regression/presets/analyze/10x-vdj-tcr-full-length.yaml +++ b/regression/presets/analyze/10x-vdj-tcr-full-length.yaml @@ -20,10 +20,11 @@ mitool: trimmingQualityThreshold: null trimmingWindowSize: null replaceWildcards: true + splitBySample: true refineTags: - tags: - - CELL - - UMI + tagTypes: + - Cell + - Molecule whitelists: CELL: builtin:737K-august-2016 parameters: @@ -65,11 +66,11 @@ mitool: minRecordWeightForUse: 0 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - - UMI - payloadTags: - - R2 + groupingTagTypes: + - Cell + - Molecule + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true @@ -102,11 +103,11 @@ mitool: minRecordWeightForUse: 0 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - - UMI - payloadTags: - - R2 + groupingTagTypes: + - Cell + - Molecule + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true @@ -139,10 +140,10 @@ mitool: minRecordWeightForUse: 0 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - payloadTags: - - R2 + groupingTagTypes: + - Cell + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true @@ -175,10 +176,10 @@ mitool: minRecordWeightForUse: 2 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - payloadTags: - - R2 + groupingTagTypes: + - Cell + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true diff --git a/regression/presets/analyze/10x-vdj-tcr-qc-test.yaml b/regression/presets/analyze/10x-vdj-tcr-qc-test.yaml index 633b422c1..1bef54ae9 100644 --- a/regression/presets/analyze/10x-vdj-tcr-qc-test.yaml +++ b/regression/presets/analyze/10x-vdj-tcr-qc-test.yaml @@ -20,10 +20,11 @@ mitool: trimmingQualityThreshold: null trimmingWindowSize: null replaceWildcards: true + splitBySample: true refineTags: - tags: - - CELL - - UMI + tagTypes: + - Cell + - Molecule whitelists: CELL: builtin:737K-august-2016 parameters: @@ -65,11 +66,11 @@ mitool: minRecordWeightForUse: 0 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - - UMI - payloadTags: - - R2 + groupingTagTypes: + - Cell + - Molecule + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true @@ -102,11 +103,11 @@ mitool: minRecordWeightForUse: 0 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - - UMI - payloadTags: - - R2 + groupingTagTypes: + - Cell + - Molecule + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true @@ -139,10 +140,10 @@ mitool: minRecordWeightForUse: 0 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - payloadTags: - - R2 + groupingTagTypes: + - Cell + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true @@ -175,10 +176,10 @@ mitool: minRecordWeightForUse: 2 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - payloadTags: - - R2 + groupingTagTypes: + - Cell + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true diff --git a/regression/presets/analyze/10x-vdj-tcr.yaml b/regression/presets/analyze/10x-vdj-tcr.yaml index b682481ad..5ab31db0a 100644 --- a/regression/presets/analyze/10x-vdj-tcr.yaml +++ b/regression/presets/analyze/10x-vdj-tcr.yaml @@ -20,10 +20,11 @@ mitool: trimmingQualityThreshold: null trimmingWindowSize: null replaceWildcards: true + splitBySample: true refineTags: - tags: - - CELL - - UMI + tagTypes: + - Cell + - Molecule whitelists: CELL: builtin:737K-august-2016 parameters: @@ -65,11 +66,11 @@ mitool: minRecordWeightForUse: 0 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - - UMI - payloadTags: - - R2 + groupingTagTypes: + - Cell + - Molecule + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true @@ -102,11 +103,11 @@ mitool: minRecordWeightForUse: 0 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - - UMI - payloadTags: - - R2 + groupingTagTypes: + - Cell + - Molecule + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true @@ -139,10 +140,10 @@ mitool: minRecordWeightForUse: 0 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - payloadTags: - - R2 + groupingTagTypes: + - Cell + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true @@ -175,10 +176,10 @@ mitool: minRecordWeightForUse: 2 maxIterationsPerConsensus: 8 maxConsensuses: 0 - groupingTags: - - CELL - payloadTags: - - R2 + groupingTagTypes: + - Cell + payloadTagTypes: + - Targets safeUnusedReads: true safeDiscardedReads: true inferMinRecordsPerConsensus: true diff --git a/regression/presets/analyze/bruker-human-sc-xcr-vdj-beacon.yaml b/regression/presets/analyze/bruker-human-sc-xcr-vdj-beacon.yaml new file mode 100644 index 000000000..0661e54aa --- /dev/null +++ b/regression/presets/analyze/bruker-human-sc-xcr-vdj-beacon.yaml @@ -0,0 +1,578 @@ +flags: [] +pipeline: + - mitool-parse + - mitool-refine-tags + - mitool-consensus + - align + - refineTagsAndSort + - assemble + - assembleCells + - exportClones + - exportCloneGroups +mitool: + parse: + pattern: "^(CELL:N{10})ggg\\^(R2:*)" + unstranded: false + maxErrorBudget: 10.0 + trimmingQualityThreshold: null + trimmingWindowSize: null + replaceWildcards: true + splitBySample: true + refineTags: + tagTypes: + - Cell + whitelists: + CELL: + - TGGTAGGCTG + - GTTAGCTGCT + - TACATAAAGA + - AGCCCTATCA + - ACCTACCGCC + - TCTCCAAGAC + - GTATACATTA + - AGACTCGATT + - CCAGGATTAA + - CTCCTTCAAG + - ACTACTTCTG + - GCCTTGTTGT + parameters: + correctionPower: 0.001 + backgroundSubstitutionRate: 0.001 + backgroundIndelRate: 1.0E-5 + minQuality: 12 + maxSubstitutions: 2 + maxIndels: 2 + maxTotalErrors: 3 + postFilter: null + consensus: + - parameters: + aAssemblerParameters: + bandWidth: 4 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -4)" + gapPenalty: -19 + minAlignmentScore: 80 + maxNormalizedAlignmentPenalty: 0.13 + trimMinimalSumQuality: 0 + trimReferenceRegion: true + maxQuality: 45 + kMapParameters: + k: 9 + useQ1Score: true + coreSequenceKMers: 20 + coreSequenceRecordScore: 20 + neighbourhoodSize: 2 + positiveMinQualityShare: 0.13 + positiveMinIntersectionQuality: 150 + minAlignmentScoreForDiscardRecord: 0 + maxNormalizedAlignmentPenaltyForDiscardRecord: 0.0 + minRecordSharePerConsensus: 0.001 + minRecordsPerConsensus: 3 + useWeightsForRecordsPerConsensus: true + minRecordWeightForUse: 0 + maxIterationsPerConsensus: 8 + maxConsensuses: 0 + groupingTagTypes: + - Cell + payloadTagTypes: + - Targets + safeUnusedReads: true + safeDiscardedReads: true + inferMinRecordsPerConsensus: true + - parameters: + aAssemblerParameters: + bandWidth: 4 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -4)" + gapPenalty: -19 + minAlignmentScore: 71 + maxNormalizedAlignmentPenalty: 0.1 + trimMinimalSumQuality: 0 + trimReferenceRegion: true + maxQuality: 45 + kMapParameters: + k: 9 + useQ1Score: true + coreSequenceKMers: 20 + coreSequenceRecordScore: 20 + neighbourhoodSize: 2 + positiveMinQualityShare: 0.14 + positiveMinIntersectionQuality: 239 + minAlignmentScoreForDiscardRecord: 0 + maxNormalizedAlignmentPenaltyForDiscardRecord: 0.0 + minRecordSharePerConsensus: 1.0 + minRecordsPerConsensus: 3 + useWeightsForRecordsPerConsensus: true + minRecordWeightForUse: 0 + maxIterationsPerConsensus: 8 + maxConsensuses: 0 + groupingTagTypes: + - Cell + payloadTagTypes: + - Targets + safeUnusedReads: true + safeDiscardedReads: true + inferMinRecordsPerConsensus: true +align: + species: hsa + libraryName: default + trimmingQualityThreshold: 0 + trimmingWindowSize: 6 + chains: ALL + replaceWildcards: true + overlapPairedReads: true + bamDropNonVDJ: false + writeFailedAlignments: false + tagPattern: "^(CELL:N{10})ggg\\^(R2:*)" + tagUnstranded: false + tagMaxBudget: 10.0 + headerExtractors: [] + readIdAsCellTag: false + sampleTable: null + tagsValidations: + - type: MustContainTagType + tagType: Cell + - type: MustNotContainTagType + tagType: Molecule + - type: MustContainTagType + tagType: Cell + splitBySample: true + limit: null + parameters: + vParameters: + geneFeatureToAlign: VTranscriptWithP + minSumScore: 60 + relativeMinScore: 0.7 + maxHits: 5 + edgeRealignmentMinScoreOverride: 35 + parameters: + type: kaligner2 + mapperNValue: 9 + mapperKValue: 1 + floatingLeftBound: true + floatingRightBound: true + mapperAbsoluteMinClusterScore: 250 + mapperExtraClusterScore: -38 + mapperMatchScore: 127 + mapperMismatchScore: -14 + mapperOffsetShiftScore: -82 + mapperSlotCount: 6 + mapperMaxClusters: 15 + mapperMaxClusterIndels: 4 + mapperKMersPerPosition: 4 + mapperAbsoluteMinScore: 250 + mapperRelativeMinScore: 0.7 + mapperMinSeedsDistance: 6 + mapperMaxSeedsDistance: 6 + alignmentStopPenalty: 0 + absoluteMinScore: 150 + relativeMinScore: 0.7 + maxHits: 3 + scoring: + type: affine + alphabet: nucleotide + subsMatrix: "simple(match = 10, mismatch = -19)" + gapOpenPenalty: -62 + gapExtensionPenalty: -11 + rightForceRealignmentTrigger: CDR3Begin(-6) + dParameters: + geneFeatureToAlign: DRegionWithP + relativeMinScore: 0.85 + absoluteMinScore: 25.0 + maxHits: 3 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -9)" + gapPenalty: -12 + jParameters: + geneFeatureToAlign: JRegionWithP + minSumScore: 150 + relativeMinScore: 0.97 + maxHits: 5 + parameters: + type: kaligner2 + mapperNValue: 8 + mapperKValue: 1 + floatingLeftBound: true + floatingRightBound: false + mapperAbsoluteMinClusterScore: 102 + mapperExtraClusterScore: -38 + mapperMatchScore: 95 + mapperMismatchScore: -14 + mapperOffsetShiftScore: -82 + mapperSlotCount: 6 + mapperMaxClusters: 4 + mapperMaxClusterIndels: 4 + mapperKMersPerPosition: 4 + mapperAbsoluteMinScore: 100 + mapperRelativeMinScore: 0.8 + mapperMinSeedsDistance: 5 + mapperMaxSeedsDistance: 5 + alignmentStopPenalty: 0 + absoluteMinScore: 150 + relativeMinScore: 0.8 + maxHits: 3 + scoring: + type: affine + alphabet: nucleotide + subsMatrix: "simple(match = 10, mismatch = -19)" + gapOpenPenalty: -62 + gapExtensionPenalty: -11 + cParameters: + geneFeatureToAlign: CExon1 + minSumScore: 40 + relativeMinScore: 0.97 + maxHits: 5 + parameters: + type: kaligner + mapperKValue: 5 + floatingLeftBound: false + floatingRightBound: true + mapperAbsoluteMinScore: 95.0 + mapperRelativeMinScore: 0.63 + mapperMatchScore: 128.0 + mapperMismatchPenalty: -0.1 + mapperOffsetShiftPenalty: -0.3 + mapperMinSeedsDistance: 8 + mapperMaxSeedsDistance: 18 + minAlignmentLength: 15 + maxAdjacentIndels: 2 + alignmentStopPenalty: -1000 + absoluteMinScore: 40.0 + relativeMinScore: 0.87 + maxHits: 4 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -9)" + gapPenalty: -12 + vjAlignmentOrder: VThenJ + libraryStructure: Unknown + includeDScore: false + includeCScore: false + minSumScore: 120.0 + relativeMinVFR3CDR3Score: 0.7 + allowPartialAlignments: true + allowNoCDR3PartAlignments: true + allowChimeras: false + readsLayout: ReverseOnly + mergerParameters: + qualityMergingAlgorithm: MaxSubtraction + partsLayout: null + minimalOverlap: 13 + minimalMatchQualitySum: 364 + maxQuality: 50 + minimalIdentity: 0.7 + identityType: MinimalQualityWeighted + fixSeed: true + alignmentBoundaryTolerance: 5 + minChimeraDetectionScore: 120 + vjOverlapWindow: 3 + saveOriginalSequence: false + saveOriginalReads: false + smartForceEdgeAlignments: true + retriesForPartialAlignments: 3 +refineTagsAndSort: + whitelists: {} + runCorrection: false + parameters: + correctionPower: 0.001 + backgroundSubstitutionRate: 0.001 + backgroundIndelRate: 1.0E-5 + minQuality: 12 + maxSubstitutions: 2 + maxIndels: 2 + maxTotalErrors: 3 + postFilter: null +assemblePartial: + overlappedOnly: false + dropPartial: false + cellLevel: false + parameters: + kValue: 12 + kOffset: -7 + minimalAssembleOverlap: 12 + minimalNOverlap: 7 + minimalNOverlapShare: 0.65 + minimalAlignmentMergeIdentity: 0.85 + mergerParameters: + qualityMergingAlgorithm: SumSubtraction + partsLayout: CollinearDirect + minimalOverlap: 20 + minimalMatchQualitySum: 0 + maxQuality: 45 + minimalIdentity: 0.95 + identityType: Unweighted + maxLeftParts: 256000 + maxLeftMatches: 6144 +extend: + vAnchor: CDR3Begin + jAnchor: FR4Begin + minimalVScore: 50 + minimalJScore: 50 +assemble: + sortBySequence: false + clnaOutput: false + cellLevel: true + consensusAssemblerParameters: + assembler: + aAssemblerParameters: + bandWidth: 4 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -4)" + gapPenalty: -14 + minAlignmentScore: 40 + maxNormalizedAlignmentPenalty: 0.2 + trimMinimalSumQuality: 0 + trimReferenceRegion: false + maxQuality: 45 + maxIterations: 6 + minAltSeedQualityScore: 11 + minAltSeedNormalizedPenalty: 0.35 + altSeedPenaltyTolerance: 0.5 + minRecordSharePerConsensus: 0.02 + minRecordsPerConsensus: 0 + minRecursiveRecordShare: 0.1 + minQualityScore: 0 + maxConsensuses: 2 + minTagSuffixShare: 0.0 + isolateChains: true + empiricalAlignmentAssignment: + uniqGeneOccurrenceWithinTag: + - V + - J + uniqGeneOccurrenceWithinCluster: + - V + - J + matchAlignmentsWithoutVJHitsByTags: true + cloneAssemblerParameters: + assemblingFeatures: + - VDJRegion + minimalClonalSequenceLength: 12 + qualityAggregationType: BetaScore + cloneClusteringParameters: + searchDepth: 2 + allowedMutationsInNRegions: 1 + searchParameters: twoMismatchesOrIndels + clusteringFilter: + type: advanced + correctionPower: 0.001 + backgroundSubstitutionRate: 5.0E-4 + backgroundIndelRate: 2.0E-4 + cloneFactoryParameters: + vParameters: + relativeMinScore: 0.97 + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + jParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + cParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + dParameters: + relativeMinScore: null + absoluteMinScore: null + maxHits: null + scoring: null + separateByV: false + separateByJ: false + separateByC: false + maximalPreClusteringRatio: 1.0 + preClusteringScoreFilteringRatio: 2.0 + preClusteringCountFilteringRatio: 2.0 + addReadsCountOnClustering: false + badQualityThreshold: 0 + maxBadPointsPercent: 0.7 + mappingThreshold: 2of5 + minimalQuality: 0 + postFilters: null + inferMinRecordsPerConsensus: false +assembleContigs: + ignoreTags: false + parameters: + branchingMinimalQualityShare: 0.1 + branchingMinimalSumQuality: 60 + decisiveBranchingSumQualityThreshold: 120 + alignedSequenceEdgeDelta: 3 + alignmentEdgeRegionSize: 7 + minimalNonEdgePointsFraction: 0.25 + minimalMeanNormalizedQuality: 5.0 + outputMinimalQualityShare: 0.75 + outputMinimalSumQuality: 0 + subCloningRegions: null + assemblingRegions: null + postFiltering: + type: NoFiltering + trimmingParameters: + averageQualityThreshold: 10.0 + windowSize: 8 + minimalContigLength: 20 + alignedRegionsOnly: false + discardAmbiguousNucleotideCalls: false + cloneFactoryParameters: + vParameters: + relativeMinScore: 0.97 + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + jParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + cParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + dParameters: + relativeMinScore: null + absoluteMinScore: null + maxHits: null + scoring: null +assembleCells: + algorithm: + type: SingleCellGroupingByOverlappingCellIds + minOverlapForSmaller: + percentage: 0.8 + roundingMode: UP + minOverlapForBigger: + percentage: 0.2 + roundingMode: UP + countCellBarcodesWithOneClone: true + thresholdForAssigningLeftoverCells: + percentage: 0.6 + roundingMode: DOWN + nonFunctional: + type: OverlapWithFoundGroups + minOverlapForClone: + percentage: 0.9 + roundingMode: UP + minOverlapForGroup: + percentage: 0.5 + roundingMode: UP +exportAlignments: + chains: ALL + noHeader: false + fields: + - field: -readIds + - field: -tags + args: + - Cell + - field: -targetSequences + - field: -targetQualities + - field: -vHitsWithScore + - field: -dHitsWithScore + - field: -jHitsWithScore + - field: -cHitsWithScore + - field: -vAlignments + - field: -dAlignments + - field: -jAlignments + - field: -cAlignments + - field: -allNFeaturesWithMinQuality + - field: -allAAFeatures + - field: -defaultAnchorPoints + - field: -topChains +exportClones: + splitByTagType: null + filterOutOfFrames: false + filterStops: false + chains: ALL + noHeader: false + fields: + - field: -cloneId + - field: -cellGroup + - field: -tags + args: + - Cell + - field: -readCount + - field: -readFraction + - field: -targetSequences + - field: -targetQualities + - field: -vHit + - field: -dHit + - field: -jHit + - field: -cHit + - field: -vAlignment + - field: -dAlignment + - field: -jAlignment + - field: -cAlignment + - field: -allNFeaturesWithMinQuality + - field: -nFeature + args: + - VDJRegion + - field: -aaFeature + args: + - VDJRegion + - field: -defaultAnchorPoints + - field: -topChains + splitFilesBy: [] + groupClonesBy: [] +exportCloneGroups: + filterOutOfFrames: false + filterStops: false + filterOutGroupsWithOneClone: false + showSecondaryChains: true + types: [] + noHeader: false + fields: + - field: -cellGroup + - field: -groupReadCount + - field: -groupUniqueTagCount + args: + - Cell + - field: -uniqClonesPerChainCount + - field: -readCount + - field: -readFraction + - field: -vHit + - field: -dHit + - field: -jHit + - field: -cHit + - field: -nFeature + args: + - CDR3 + - field: -aaFeature + args: + - CDR3 + sortChainsBy: Auto +qc: + checks: + - type: SuccessfullyAlignedReads + upper: 0.85 + middle: 0.7 + label: Successfully aligned reads + - type: OffTargetReads + upper: 0.2 + middle: 0.1 + label: Off target (non TCR/IG) reads + - type: ReadsWithNoVOrJHits + upper: 0.2 + middle: 0.1 + label: Reads with no V or J hits + - type: ReadsUsedInClonotypes + upper: 0.9 + middle: 0.7 + label: Reads used in clonotypes + - type: AlignmentsWithNoAssemblingFeature + upper: 0.15 + middle: 0.05 + label: Alignments without assembling feature + - type: AlignmentsDroppedLowQuality + upper: 0.05 + middle: 0.01 + label: Alignments dropped due to low sequence quality + - type: ClonesDroppedInPostFiltering + upper: 0.05 + middle: 0.01 + label: Clones dropped in post-filtering + - type: AlignmentsDroppedInPostFiltering + upper: 0.05 + middle: 0.01 + label: Alignments dropped in clones post-filtering + - type: CellBarcodesWithFoundGroups + upper: 0.9 + middle: 0.8 + label: Cell barcodes used in result groups diff --git a/regression/presets/analyze/bruker-sc-xcr-vdj-beacon.yaml b/regression/presets/analyze/bruker-sc-xcr-vdj-beacon.yaml new file mode 100644 index 000000000..0661e54aa --- /dev/null +++ b/regression/presets/analyze/bruker-sc-xcr-vdj-beacon.yaml @@ -0,0 +1,578 @@ +flags: [] +pipeline: + - mitool-parse + - mitool-refine-tags + - mitool-consensus + - align + - refineTagsAndSort + - assemble + - assembleCells + - exportClones + - exportCloneGroups +mitool: + parse: + pattern: "^(CELL:N{10})ggg\\^(R2:*)" + unstranded: false + maxErrorBudget: 10.0 + trimmingQualityThreshold: null + trimmingWindowSize: null + replaceWildcards: true + splitBySample: true + refineTags: + tagTypes: + - Cell + whitelists: + CELL: + - TGGTAGGCTG + - GTTAGCTGCT + - TACATAAAGA + - AGCCCTATCA + - ACCTACCGCC + - TCTCCAAGAC + - GTATACATTA + - AGACTCGATT + - CCAGGATTAA + - CTCCTTCAAG + - ACTACTTCTG + - GCCTTGTTGT + parameters: + correctionPower: 0.001 + backgroundSubstitutionRate: 0.001 + backgroundIndelRate: 1.0E-5 + minQuality: 12 + maxSubstitutions: 2 + maxIndels: 2 + maxTotalErrors: 3 + postFilter: null + consensus: + - parameters: + aAssemblerParameters: + bandWidth: 4 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -4)" + gapPenalty: -19 + minAlignmentScore: 80 + maxNormalizedAlignmentPenalty: 0.13 + trimMinimalSumQuality: 0 + trimReferenceRegion: true + maxQuality: 45 + kMapParameters: + k: 9 + useQ1Score: true + coreSequenceKMers: 20 + coreSequenceRecordScore: 20 + neighbourhoodSize: 2 + positiveMinQualityShare: 0.13 + positiveMinIntersectionQuality: 150 + minAlignmentScoreForDiscardRecord: 0 + maxNormalizedAlignmentPenaltyForDiscardRecord: 0.0 + minRecordSharePerConsensus: 0.001 + minRecordsPerConsensus: 3 + useWeightsForRecordsPerConsensus: true + minRecordWeightForUse: 0 + maxIterationsPerConsensus: 8 + maxConsensuses: 0 + groupingTagTypes: + - Cell + payloadTagTypes: + - Targets + safeUnusedReads: true + safeDiscardedReads: true + inferMinRecordsPerConsensus: true + - parameters: + aAssemblerParameters: + bandWidth: 4 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -4)" + gapPenalty: -19 + minAlignmentScore: 71 + maxNormalizedAlignmentPenalty: 0.1 + trimMinimalSumQuality: 0 + trimReferenceRegion: true + maxQuality: 45 + kMapParameters: + k: 9 + useQ1Score: true + coreSequenceKMers: 20 + coreSequenceRecordScore: 20 + neighbourhoodSize: 2 + positiveMinQualityShare: 0.14 + positiveMinIntersectionQuality: 239 + minAlignmentScoreForDiscardRecord: 0 + maxNormalizedAlignmentPenaltyForDiscardRecord: 0.0 + minRecordSharePerConsensus: 1.0 + minRecordsPerConsensus: 3 + useWeightsForRecordsPerConsensus: true + minRecordWeightForUse: 0 + maxIterationsPerConsensus: 8 + maxConsensuses: 0 + groupingTagTypes: + - Cell + payloadTagTypes: + - Targets + safeUnusedReads: true + safeDiscardedReads: true + inferMinRecordsPerConsensus: true +align: + species: hsa + libraryName: default + trimmingQualityThreshold: 0 + trimmingWindowSize: 6 + chains: ALL + replaceWildcards: true + overlapPairedReads: true + bamDropNonVDJ: false + writeFailedAlignments: false + tagPattern: "^(CELL:N{10})ggg\\^(R2:*)" + tagUnstranded: false + tagMaxBudget: 10.0 + headerExtractors: [] + readIdAsCellTag: false + sampleTable: null + tagsValidations: + - type: MustContainTagType + tagType: Cell + - type: MustNotContainTagType + tagType: Molecule + - type: MustContainTagType + tagType: Cell + splitBySample: true + limit: null + parameters: + vParameters: + geneFeatureToAlign: VTranscriptWithP + minSumScore: 60 + relativeMinScore: 0.7 + maxHits: 5 + edgeRealignmentMinScoreOverride: 35 + parameters: + type: kaligner2 + mapperNValue: 9 + mapperKValue: 1 + floatingLeftBound: true + floatingRightBound: true + mapperAbsoluteMinClusterScore: 250 + mapperExtraClusterScore: -38 + mapperMatchScore: 127 + mapperMismatchScore: -14 + mapperOffsetShiftScore: -82 + mapperSlotCount: 6 + mapperMaxClusters: 15 + mapperMaxClusterIndels: 4 + mapperKMersPerPosition: 4 + mapperAbsoluteMinScore: 250 + mapperRelativeMinScore: 0.7 + mapperMinSeedsDistance: 6 + mapperMaxSeedsDistance: 6 + alignmentStopPenalty: 0 + absoluteMinScore: 150 + relativeMinScore: 0.7 + maxHits: 3 + scoring: + type: affine + alphabet: nucleotide + subsMatrix: "simple(match = 10, mismatch = -19)" + gapOpenPenalty: -62 + gapExtensionPenalty: -11 + rightForceRealignmentTrigger: CDR3Begin(-6) + dParameters: + geneFeatureToAlign: DRegionWithP + relativeMinScore: 0.85 + absoluteMinScore: 25.0 + maxHits: 3 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -9)" + gapPenalty: -12 + jParameters: + geneFeatureToAlign: JRegionWithP + minSumScore: 150 + relativeMinScore: 0.97 + maxHits: 5 + parameters: + type: kaligner2 + mapperNValue: 8 + mapperKValue: 1 + floatingLeftBound: true + floatingRightBound: false + mapperAbsoluteMinClusterScore: 102 + mapperExtraClusterScore: -38 + mapperMatchScore: 95 + mapperMismatchScore: -14 + mapperOffsetShiftScore: -82 + mapperSlotCount: 6 + mapperMaxClusters: 4 + mapperMaxClusterIndels: 4 + mapperKMersPerPosition: 4 + mapperAbsoluteMinScore: 100 + mapperRelativeMinScore: 0.8 + mapperMinSeedsDistance: 5 + mapperMaxSeedsDistance: 5 + alignmentStopPenalty: 0 + absoluteMinScore: 150 + relativeMinScore: 0.8 + maxHits: 3 + scoring: + type: affine + alphabet: nucleotide + subsMatrix: "simple(match = 10, mismatch = -19)" + gapOpenPenalty: -62 + gapExtensionPenalty: -11 + cParameters: + geneFeatureToAlign: CExon1 + minSumScore: 40 + relativeMinScore: 0.97 + maxHits: 5 + parameters: + type: kaligner + mapperKValue: 5 + floatingLeftBound: false + floatingRightBound: true + mapperAbsoluteMinScore: 95.0 + mapperRelativeMinScore: 0.63 + mapperMatchScore: 128.0 + mapperMismatchPenalty: -0.1 + mapperOffsetShiftPenalty: -0.3 + mapperMinSeedsDistance: 8 + mapperMaxSeedsDistance: 18 + minAlignmentLength: 15 + maxAdjacentIndels: 2 + alignmentStopPenalty: -1000 + absoluteMinScore: 40.0 + relativeMinScore: 0.87 + maxHits: 4 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -9)" + gapPenalty: -12 + vjAlignmentOrder: VThenJ + libraryStructure: Unknown + includeDScore: false + includeCScore: false + minSumScore: 120.0 + relativeMinVFR3CDR3Score: 0.7 + allowPartialAlignments: true + allowNoCDR3PartAlignments: true + allowChimeras: false + readsLayout: ReverseOnly + mergerParameters: + qualityMergingAlgorithm: MaxSubtraction + partsLayout: null + minimalOverlap: 13 + minimalMatchQualitySum: 364 + maxQuality: 50 + minimalIdentity: 0.7 + identityType: MinimalQualityWeighted + fixSeed: true + alignmentBoundaryTolerance: 5 + minChimeraDetectionScore: 120 + vjOverlapWindow: 3 + saveOriginalSequence: false + saveOriginalReads: false + smartForceEdgeAlignments: true + retriesForPartialAlignments: 3 +refineTagsAndSort: + whitelists: {} + runCorrection: false + parameters: + correctionPower: 0.001 + backgroundSubstitutionRate: 0.001 + backgroundIndelRate: 1.0E-5 + minQuality: 12 + maxSubstitutions: 2 + maxIndels: 2 + maxTotalErrors: 3 + postFilter: null +assemblePartial: + overlappedOnly: false + dropPartial: false + cellLevel: false + parameters: + kValue: 12 + kOffset: -7 + minimalAssembleOverlap: 12 + minimalNOverlap: 7 + minimalNOverlapShare: 0.65 + minimalAlignmentMergeIdentity: 0.85 + mergerParameters: + qualityMergingAlgorithm: SumSubtraction + partsLayout: CollinearDirect + minimalOverlap: 20 + minimalMatchQualitySum: 0 + maxQuality: 45 + minimalIdentity: 0.95 + identityType: Unweighted + maxLeftParts: 256000 + maxLeftMatches: 6144 +extend: + vAnchor: CDR3Begin + jAnchor: FR4Begin + minimalVScore: 50 + minimalJScore: 50 +assemble: + sortBySequence: false + clnaOutput: false + cellLevel: true + consensusAssemblerParameters: + assembler: + aAssemblerParameters: + bandWidth: 4 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -4)" + gapPenalty: -14 + minAlignmentScore: 40 + maxNormalizedAlignmentPenalty: 0.2 + trimMinimalSumQuality: 0 + trimReferenceRegion: false + maxQuality: 45 + maxIterations: 6 + minAltSeedQualityScore: 11 + minAltSeedNormalizedPenalty: 0.35 + altSeedPenaltyTolerance: 0.5 + minRecordSharePerConsensus: 0.02 + minRecordsPerConsensus: 0 + minRecursiveRecordShare: 0.1 + minQualityScore: 0 + maxConsensuses: 2 + minTagSuffixShare: 0.0 + isolateChains: true + empiricalAlignmentAssignment: + uniqGeneOccurrenceWithinTag: + - V + - J + uniqGeneOccurrenceWithinCluster: + - V + - J + matchAlignmentsWithoutVJHitsByTags: true + cloneAssemblerParameters: + assemblingFeatures: + - VDJRegion + minimalClonalSequenceLength: 12 + qualityAggregationType: BetaScore + cloneClusteringParameters: + searchDepth: 2 + allowedMutationsInNRegions: 1 + searchParameters: twoMismatchesOrIndels + clusteringFilter: + type: advanced + correctionPower: 0.001 + backgroundSubstitutionRate: 5.0E-4 + backgroundIndelRate: 2.0E-4 + cloneFactoryParameters: + vParameters: + relativeMinScore: 0.97 + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + jParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + cParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + dParameters: + relativeMinScore: null + absoluteMinScore: null + maxHits: null + scoring: null + separateByV: false + separateByJ: false + separateByC: false + maximalPreClusteringRatio: 1.0 + preClusteringScoreFilteringRatio: 2.0 + preClusteringCountFilteringRatio: 2.0 + addReadsCountOnClustering: false + badQualityThreshold: 0 + maxBadPointsPercent: 0.7 + mappingThreshold: 2of5 + minimalQuality: 0 + postFilters: null + inferMinRecordsPerConsensus: false +assembleContigs: + ignoreTags: false + parameters: + branchingMinimalQualityShare: 0.1 + branchingMinimalSumQuality: 60 + decisiveBranchingSumQualityThreshold: 120 + alignedSequenceEdgeDelta: 3 + alignmentEdgeRegionSize: 7 + minimalNonEdgePointsFraction: 0.25 + minimalMeanNormalizedQuality: 5.0 + outputMinimalQualityShare: 0.75 + outputMinimalSumQuality: 0 + subCloningRegions: null + assemblingRegions: null + postFiltering: + type: NoFiltering + trimmingParameters: + averageQualityThreshold: 10.0 + windowSize: 8 + minimalContigLength: 20 + alignedRegionsOnly: false + discardAmbiguousNucleotideCalls: false + cloneFactoryParameters: + vParameters: + relativeMinScore: 0.97 + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + jParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + cParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + dParameters: + relativeMinScore: null + absoluteMinScore: null + maxHits: null + scoring: null +assembleCells: + algorithm: + type: SingleCellGroupingByOverlappingCellIds + minOverlapForSmaller: + percentage: 0.8 + roundingMode: UP + minOverlapForBigger: + percentage: 0.2 + roundingMode: UP + countCellBarcodesWithOneClone: true + thresholdForAssigningLeftoverCells: + percentage: 0.6 + roundingMode: DOWN + nonFunctional: + type: OverlapWithFoundGroups + minOverlapForClone: + percentage: 0.9 + roundingMode: UP + minOverlapForGroup: + percentage: 0.5 + roundingMode: UP +exportAlignments: + chains: ALL + noHeader: false + fields: + - field: -readIds + - field: -tags + args: + - Cell + - field: -targetSequences + - field: -targetQualities + - field: -vHitsWithScore + - field: -dHitsWithScore + - field: -jHitsWithScore + - field: -cHitsWithScore + - field: -vAlignments + - field: -dAlignments + - field: -jAlignments + - field: -cAlignments + - field: -allNFeaturesWithMinQuality + - field: -allAAFeatures + - field: -defaultAnchorPoints + - field: -topChains +exportClones: + splitByTagType: null + filterOutOfFrames: false + filterStops: false + chains: ALL + noHeader: false + fields: + - field: -cloneId + - field: -cellGroup + - field: -tags + args: + - Cell + - field: -readCount + - field: -readFraction + - field: -targetSequences + - field: -targetQualities + - field: -vHit + - field: -dHit + - field: -jHit + - field: -cHit + - field: -vAlignment + - field: -dAlignment + - field: -jAlignment + - field: -cAlignment + - field: -allNFeaturesWithMinQuality + - field: -nFeature + args: + - VDJRegion + - field: -aaFeature + args: + - VDJRegion + - field: -defaultAnchorPoints + - field: -topChains + splitFilesBy: [] + groupClonesBy: [] +exportCloneGroups: + filterOutOfFrames: false + filterStops: false + filterOutGroupsWithOneClone: false + showSecondaryChains: true + types: [] + noHeader: false + fields: + - field: -cellGroup + - field: -groupReadCount + - field: -groupUniqueTagCount + args: + - Cell + - field: -uniqClonesPerChainCount + - field: -readCount + - field: -readFraction + - field: -vHit + - field: -dHit + - field: -jHit + - field: -cHit + - field: -nFeature + args: + - CDR3 + - field: -aaFeature + args: + - CDR3 + sortChainsBy: Auto +qc: + checks: + - type: SuccessfullyAlignedReads + upper: 0.85 + middle: 0.7 + label: Successfully aligned reads + - type: OffTargetReads + upper: 0.2 + middle: 0.1 + label: Off target (non TCR/IG) reads + - type: ReadsWithNoVOrJHits + upper: 0.2 + middle: 0.1 + label: Reads with no V or J hits + - type: ReadsUsedInClonotypes + upper: 0.9 + middle: 0.7 + label: Reads used in clonotypes + - type: AlignmentsWithNoAssemblingFeature + upper: 0.15 + middle: 0.05 + label: Alignments without assembling feature + - type: AlignmentsDroppedLowQuality + upper: 0.05 + middle: 0.01 + label: Alignments dropped due to low sequence quality + - type: ClonesDroppedInPostFiltering + upper: 0.05 + middle: 0.01 + label: Clones dropped in post-filtering + - type: AlignmentsDroppedInPostFiltering + upper: 0.05 + middle: 0.01 + label: Alignments dropped in clones post-filtering + - type: CellBarcodesWithFoundGroups + upper: 0.9 + middle: 0.8 + label: Cell barcodes used in result groups diff --git a/regression/presets/list.txt b/regression/presets/list.txt index 03d7bc7f4..33982fee3 100644 --- a/regression/presets/list.txt +++ b/regression/presets/list.txt @@ -318,6 +318,9 @@ neb-mouse-rna-xcr-umi-nebnext (NEBNext® Immune Sequencing Kit (Mouse) BCR & TCR Required args: --species +-----Bruker----- +bruker-human-sc-xcr-vdj-beacon (Single cell VDJ) + -----AbHelix----- abhelix-human-rna-xcr (Human immunoglobulin repertoire) diff --git a/regression/schemas/analyzeBundle/mixins/AddTagsValidation.schema.yaml b/regression/schemas/analyzeBundle/mixins/AddTagsValidation.schema.yaml index 8752c579c..934ffe573 100644 --- a/regression/schemas/analyzeBundle/mixins/AddTagsValidation.schema.yaml +++ b/regression/schemas/analyzeBundle/mixins/AddTagsValidation.schema.yaml @@ -6,6 +6,7 @@ $defs: - Sample - Cell - Molecule + - Targets - Technical allOf: - type: object diff --git a/regression/schemas/analyzeBundle/mixins/DontCorrectTagType.schema.yaml b/regression/schemas/analyzeBundle/mixins/DontCorrectTagType.schema.yaml index d7943478c..a68d4d9d2 100644 --- a/regression/schemas/analyzeBundle/mixins/DontCorrectTagType.schema.yaml +++ b/regression/schemas/analyzeBundle/mixins/DontCorrectTagType.schema.yaml @@ -12,6 +12,7 @@ allOf: - Sample - Cell - Molecule + - Targets - Technical - type: object properties: diff --git a/regression/schemas/analyzeBundle/mixins/RemoveTagsValidation.schema.yaml b/regression/schemas/analyzeBundle/mixins/RemoveTagsValidation.schema.yaml index 926e23b44..f2a5eefd8 100644 --- a/regression/schemas/analyzeBundle/mixins/RemoveTagsValidation.schema.yaml +++ b/regression/schemas/analyzeBundle/mixins/RemoveTagsValidation.schema.yaml @@ -6,6 +6,7 @@ $defs: - Sample - Cell - Molecule + - Targets - Technical allOf: - type: object diff --git a/regression/schemas/analyzeBundle/parameters/align.schema.yaml b/regression/schemas/analyzeBundle/parameters/align.schema.yaml index 9c08bf75d..a3f8531aa 100644 --- a/regression/schemas/analyzeBundle/parameters/align.schema.yaml +++ b/regression/schemas/analyzeBundle/parameters/align.schema.yaml @@ -160,6 +160,7 @@ $defs: - Sample - Cell - Molecule + - Targets - Technical type: object properties: diff --git a/regression/schemas/analyzeBundle/parameters/consensus.schema.yaml b/regression/schemas/analyzeBundle/parameters/consensus.schema.yaml index f9f9af300..0727cac4c 100644 --- a/regression/schemas/analyzeBundle/parameters/consensus.schema.yaml +++ b/regression/schemas/analyzeBundle/parameters/consensus.schema.yaml @@ -1,9 +1,22 @@ $schema: https://json-schema.org/draft/2020-12/schema +$defs: + TagType: + type: string + enum: + - Sample + - Cell + - Molecule + - Targets + - Technical type: object properties: params: type: object properties: + groupingTagTypes: + type: array + items: + $ref: '#/$defs/TagType' groupingTags: type: array items: @@ -82,6 +95,10 @@ properties: type: number useWeightsForRecordsPerConsensus: type: boolean + payloadTagTypes: + type: array + items: + $ref: '#/$defs/TagType' payloadTags: type: array items: diff --git a/regression/schemas/analyzeBundle/parameters/exportClones.schema.yaml b/regression/schemas/analyzeBundle/parameters/exportClones.schema.yaml index 25f8b1bd9..61dee03a3 100644 --- a/regression/schemas/analyzeBundle/parameters/exportClones.schema.yaml +++ b/regression/schemas/analyzeBundle/parameters/exportClones.schema.yaml @@ -38,6 +38,7 @@ properties: - Sample - Cell - Molecule + - Targets - Technical splitFilesBy: type: array diff --git a/regression/schemas/analyzeBundle/parameters/parse.schema.yaml b/regression/schemas/analyzeBundle/parameters/parse.schema.yaml index 92b7557c4..4087ff5b9 100644 --- a/regression/schemas/analyzeBundle/parameters/parse.schema.yaml +++ b/regression/schemas/analyzeBundle/parameters/parse.schema.yaml @@ -1,17 +1,280 @@ $schema: https://json-schema.org/draft/2020-12/schema +$defs: + GTNConcatenate-1: + type: object + properties: + separator: + type: string + sources: + type: array + items: + anyOf: + - $ref: '#/$defs/GTNMultiply-2' + - $ref: '#/$defs/GTNPlus-2' + - $ref: '#/$defs/GTNConcatenate-2' + - $ref: '#/$defs/GTNConstantNumber' + - $ref: '#/$defs/GTNConstantSequence' + - $ref: '#/$defs/GTNConstantSequenceAndQuality' + - $ref: '#/$defs/GTNConstantString' + - $ref: '#/$defs/GTNTag' + GTNConcatenate-2: + $ref: '#/$defs/GTNConcatenate-1' + type: object + properties: + type: + const: concatenate + required: + - type + GTNConstantNumber: + allOf: + - type: object + properties: + value: + type: integer + - type: object + properties: + type: + const: constantNumber + required: + - type + GTNConstantSequence: + allOf: + - type: object + properties: + sequence: + $ref: '#/$defs/NucleotideSequence' + - type: object + properties: + type: + const: constantSequence + required: + - type + GTNConstantSequenceAndQuality: + allOf: + - type: object + properties: + quality: + type: object + properties: + data: + type: array + items: + type: string + sequence: + $ref: '#/$defs/NucleotideSequence' + - type: object + properties: + type: + const: constantSequenceAndQuality + required: + - type + GTNConstantString: + allOf: + - type: object + properties: + value: + type: string + - type: object + properties: + type: + const: constantString + required: + - type + GTNMultiply-1: + type: object + properties: + sources: + type: array + items: + anyOf: + - $ref: '#/$defs/GTNMultiply-2' + - $ref: '#/$defs/GTNPlus-2' + - $ref: '#/$defs/GTNConcatenate-2' + - $ref: '#/$defs/GTNConstantNumber' + - $ref: '#/$defs/GTNConstantSequence' + - $ref: '#/$defs/GTNConstantSequenceAndQuality' + - $ref: '#/$defs/GTNConstantString' + - $ref: '#/$defs/GTNTag' + GTNMultiply-2: + $ref: '#/$defs/GTNMultiply-1' + type: object + properties: + type: + const: multiply + required: + - type + GTNPlus-1: + type: object + properties: + sources: + type: array + items: + anyOf: + - $ref: '#/$defs/GTNMultiply-2' + - $ref: '#/$defs/GTNPlus-2' + - $ref: '#/$defs/GTNConcatenate-2' + - $ref: '#/$defs/GTNConstantNumber' + - $ref: '#/$defs/GTNConstantSequence' + - $ref: '#/$defs/GTNConstantSequenceAndQuality' + - $ref: '#/$defs/GTNConstantString' + - $ref: '#/$defs/GTNTag' + GTNPlus-2: + $ref: '#/$defs/GTNPlus-1' + type: object + properties: + type: + const: plus + required: + - type + GTNTag: + allOf: + - type: object + properties: + name: + type: string + - type: object + properties: + type: + const: tag + required: + - type + NucleotideSequence: + type: object + properties: + data: + type: array + items: + type: string type: object properties: params: type: object properties: + headerExtractors: + type: array + items: + type: object + properties: + mappings: + type: array + items: + type: object + properties: + groupIndex: + type: integer + groupName: + type: string + tagName: + type: string + patter: + type: string + readIndices: + type: array + items: + type: integer limit: type: integer maxErrorBudget: type: number pattern: type: string + readIdAsCellTag: + type: boolean replaceWildcards: type: boolean + splitBySample: + type: boolean + tagTransformationSteps: + type: array + items: + anyOf: + - type: object + properties: + type: + const: cutTechnicalTags + required: + - type + - allOf: + - type: object + properties: + destinationTag: + type: string + keepTags: + type: array + items: + type: string + name: + type: string + transformation: + anyOf: + - $ref: '#/$defs/GTNMultiply-2' + - $ref: '#/$defs/GTNPlus-2' + - $ref: '#/$defs/GTNConcatenate-2' + - $ref: '#/$defs/GTNConstantNumber' + - $ref: '#/$defs/GTNConstantSequence' + - $ref: '#/$defs/GTNConstantSequenceAndQuality' + - $ref: '#/$defs/GTNConstantString' + - $ref: '#/$defs/GTNTag' + - type: object + properties: + type: + const: generic + required: + - type + - allOf: + - type: object + properties: + allowOneMismatch: + type: boolean + matchTags: + type: array + items: + type: string + name: + type: string + newTags: + type: array + items: + type: string + preserveMatchedTags: + type: array + items: + type: string + reportingLevel: + type: string + enum: + - NoReport + - SimpleReport + - VerboseReport + rules: + type: array + items: + type: object + properties: + from: + type: array + items: + type: string + to: + type: array + items: + type: string + - type: object + properties: + type: + const: mapTags + required: + - type + trimTagTypes: + type: array + items: + type: string + enum: + - Sample + - Cell + - Molecule + - Targets + - Technical trimTags: type: array items: diff --git a/regression/schemas/analyzeBundle/parameters/refineTags.schema.yaml b/regression/schemas/analyzeBundle/parameters/refineTags.schema.yaml index bfde2af56..c4cd63a4b 100644 --- a/regression/schemas/analyzeBundle/parameters/refineTags.schema.yaml +++ b/regression/schemas/analyzeBundle/parameters/refineTags.schema.yaml @@ -445,6 +445,14 @@ $defs: const: group_metric_sum_weight required: - type + TagType: + type: string + enum: + - Sample + - Cell + - Molecule + - Targets + - Technical TopNOp: allOf: - type: object @@ -474,7 +482,8 @@ properties: type: string dontCorrectTagsTypes: type: array - items: {} + items: + $ref: '#/$defs/TagType' parameters: type: object properties: @@ -497,6 +506,10 @@ properties: - $ref: '#/$defs/AndKeyedFilter-2' - $ref: '#/$defs/GroupFilter' - $ref: '#/$defs/InGroupsFilter-2' + tagTypes: + type: array + items: + $ref: '#/$defs/TagType' tags: type: array items: diff --git a/regression/schemas/analyzeBundle/parameters/refineTagsAndSort.schema.yaml b/regression/schemas/analyzeBundle/parameters/refineTagsAndSort.schema.yaml index 59cc6b473..97ed0a2c2 100644 --- a/regression/schemas/analyzeBundle/parameters/refineTagsAndSort.schema.yaml +++ b/regression/schemas/analyzeBundle/parameters/refineTagsAndSort.schema.yaml @@ -477,6 +477,7 @@ properties: - Sample - Cell - Molecule + - Targets - Technical parameters: type: object diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAlign.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAlign.kt index 4ed2c69ee..8effeda74 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAlign.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAlign.kt @@ -56,7 +56,23 @@ import com.milaboratory.mitool.pattern.search.ReadSearchMode import com.milaboratory.mitool.pattern.search.ReadSearchPlan import com.milaboratory.mitool.pattern.search.ReadSearchSettings import com.milaboratory.mitool.pattern.search.SearchSettings +import com.milaboratory.mitool.pattern.search.toTagsInfo import com.milaboratory.mitool.report.ReadTrimmerReportBuilder +import com.milaboratory.mitool.tag.SequenceAndQualityTagValue +import com.milaboratory.mitool.tag.TagInfo +import com.milaboratory.mitool.tag.TagParsePipeline +import com.milaboratory.mitool.tag.TagParsePipeline.CELL_SPLIT_GROUP_LABEL +import com.milaboratory.mitool.tag.TagParsePipeline.SampleStat +import com.milaboratory.mitool.tag.TagParsePipeline.Status +import com.milaboratory.mitool.tag.TagParsePipeline.Status.Good +import com.milaboratory.mitool.tag.TagParsePipeline.Status.NotAligned +import com.milaboratory.mitool.tag.TagParsePipeline.Status.NotMatched +import com.milaboratory.mitool.tag.TagParsePipeline.Status.NotParsed +import com.milaboratory.mitool.tag.TagParsePipeline.TagsParser +import com.milaboratory.mitool.tag.TagTuple +import com.milaboratory.mitool.tag.TagType +import com.milaboratory.mitool.tag.TagsInfo +import com.milaboratory.mitool.tag.TechnicalTag.TAG_INPUT_IDX import com.milaboratory.mixcr.bam.BAMReader import com.milaboratory.mixcr.basictypes.MiXCRFooter import com.milaboratory.mixcr.basictypes.MiXCRHeader @@ -64,13 +80,7 @@ import com.milaboratory.mixcr.basictypes.SequenceHistory import com.milaboratory.mixcr.basictypes.VDJCAlignments import com.milaboratory.mixcr.basictypes.VDJCAlignmentsWriter import com.milaboratory.mixcr.basictypes.VDJCHit -import com.milaboratory.mixcr.basictypes.tag.SequenceAndQualityTagValue import com.milaboratory.mixcr.basictypes.tag.TagCount -import com.milaboratory.mixcr.basictypes.tag.TagInfo -import com.milaboratory.mixcr.basictypes.tag.TagTuple -import com.milaboratory.mixcr.basictypes.tag.TagType -import com.milaboratory.mixcr.basictypes.tag.TagValueType -import com.milaboratory.mixcr.basictypes.tag.TechnicalTag.TAG_INPUT_IDX import com.milaboratory.mixcr.cli.CommandAlign.Cmd.InputType.BAM import com.milaboratory.mixcr.cli.CommandAlign.Cmd.InputType.Fasta import com.milaboratory.mixcr.cli.CommandAlign.Cmd.InputType.MIC @@ -78,13 +88,8 @@ import com.milaboratory.mixcr.cli.CommandAlign.Cmd.InputType.PairedEndFastq import com.milaboratory.mixcr.cli.CommandAlign.Cmd.InputType.QuadEndFastq import com.milaboratory.mixcr.cli.CommandAlign.Cmd.InputType.SingleEndFastq import com.milaboratory.mixcr.cli.CommandAlign.Cmd.InputType.TripleEndFastq -import com.milaboratory.mixcr.cli.CommandAlignPipeline.ProcessingBundle -import com.milaboratory.mixcr.cli.CommandAlignPipeline.ProcessingBundleStatus.Good -import com.milaboratory.mixcr.cli.CommandAlignPipeline.ProcessingBundleStatus.NotAligned -import com.milaboratory.mixcr.cli.CommandAlignPipeline.ProcessingBundleStatus.NotMatched -import com.milaboratory.mixcr.cli.CommandAlignPipeline.ProcessingBundleStatus.NotParsed -import com.milaboratory.mixcr.cli.CommandAlignPipeline.cellSplitGroupLabel -import com.milaboratory.mixcr.cli.CommandAlignPipeline.getTagsExtractor +import com.milaboratory.mixcr.cli.CommandAlignParams.Companion.allTagTransformationSteps +import com.milaboratory.mixcr.cli.CommandAlignParams.Companion.readSearchPlan import com.milaboratory.mixcr.cli.CommonDescriptions.DEFAULT_VALUE_FROM_PRESET import com.milaboratory.mixcr.cli.CommonDescriptions.Labels import com.milaboratory.mixcr.cli.MiXCRCommand.OptionsOrder @@ -132,6 +137,7 @@ import picocli.CommandLine.Parameters import java.nio.file.Path import java.nio.file.Paths import java.util.* +import java.util.concurrent.ConcurrentHashMap import java.util.concurrent.atomic.AtomicLong import java.util.regex.Pattern import kotlin.collections.component1 @@ -171,7 +177,7 @@ object CommandAlign { f0.matches(InputFileType.FASTQ) -> SingleEndFastq f0.matches(InputFileType.FASTA) || f0.matches(InputFileType.FASTA_GZ) -> Fasta f0.matches(InputFileType.BAM_SAM_CRAM) -> BAM - f0.matches(InputFileType.MIC) -> MIC(allTags = MicReader(f0).header.allTags) + f0.matches(InputFileType.MIC) -> MIC(MicReader(f0).header.tagsInfo) else -> throw ValidationException("Unknown file type: $f0") } @@ -654,8 +660,8 @@ object CommandAlign { "as reads coming from the same cells. " + "Main use-case is protocols with overlapped alpha-beta, gamma-delta or heavy-light cDNA molecules, " + "where each side was sequenced by separate mate pairs in a paired-end sequencer. " + - "Use special expansion group $cellSplitGroupLabel instead of R index " + - "(i.e. \"my_file_R{{$cellSplitGroupLabel:n}}.fastq.gz\").", + "Use special expansion group $CELL_SPLIT_GROUP_LABEL instead of R index " + + "(i.e. \"my_file_R{{$CELL_SPLIT_GROUP_LABEL:n}}.fastq.gz\").", DEFAULT_VALUE_FROM_PRESET ], names = ["--read-id-as-cell-tag"], @@ -929,15 +935,16 @@ object CommandAlign { // tagPattern is set via mixin (see above) // Prepending tag transformation step - val matchingTags = params.tagPattern?.let { - ReadSearchPlan.create(it, ReadSearchSettings(SearchSettings.Default, ReadSearchMode.Direct)).allTags + val matchingTags = params.tagPattern?.let { tagPattern -> + val plan = ReadSearchPlan.create( + tagPattern, + ReadSearchSettings(SearchSettings.Default, ReadSearchMode.Direct) + ) + plan.toTagsInfo().map { it.name }.toSet() } ?: emptySet() params = params.copy( tagTransformationSteps = listOf( - sampleSheet.tagTransformation( - matchingTags, - !strictMatching - ) + sampleSheet.tagTransformation(matchingTags, !strictMatching) ) + params.tagTransformationSteps ) } @@ -1005,20 +1012,24 @@ object CommandAlign { object Fasta : InputType(1, false) object BAM : InputType(-1 /* 1 or 2*/, false) data class MIC( - val readTags: List, - val barcodes: List, + val allTags: TagsInfo, + val readTags: List, + val barcodes: List ) : InputType(readTags.size, false) { companion object { - operator fun invoke(allTags: Collection): MIC { - val (barcodes, readTags) = allTags - .partition { tag -> TagType.isRecognisable(tag) } - return MIC(readTags, barcodes) + operator fun invoke(tagsInfo: TagsInfo): MIC { + val readTags = tagsInfo.allTagsOfType(TagType.Targets) + return MIC( + allTags = tagsInfo, + readTags = readTags, + barcodes = tagsInfo - readTags + ) } } } } - private fun createReader(pairedPatternPayload: Boolean?): OutputPortWithProgress { + private fun createReader(pairedPatternPayload: Boolean?): OutputPortWithProgress { MiXCRMain.lm.reportApplicationInputs( true, false, allInputFiles, @@ -1038,7 +1049,7 @@ object CommandAlign { cmdParams.replaceWildcards, tempDest, referenceForCram - ).map { ProcessingBundle.fromRead(it, it.weight()) } + ).map { TagParsePipeline.Input.fromRead(it, it.weight()) } when (pairedPatternPayload) { null -> reader true -> reader.onEach { record -> @@ -1062,7 +1073,7 @@ object CommandAlign { FastaSequenceReaderWrapper( FastaReader(inputFile.toFile(), NucleotideSequence.ALPHABET), cmdParams.replaceWildcards - ).map { ProcessingBundle.fromRead(it, it.weight()) } + ).map { TagParsePipeline.Input.fromRead(it, it.weight()) } } is MIC -> { @@ -1071,25 +1082,20 @@ object CommandAlign { val inputFile = inputFileGroups.fileGroups.first().files.first() val idGenerator = AtomicLong() val reader = MicReader(inputFile) - val readTagsShortcuts = inputType.readTags.sorted().map { reader.tagShortcut(it) } - val barcodeShortcuts = inputType.barcodes.map { reader.tagShortcut(it) } reader .map { record -> val readId = idGenerator.getAndIncrement() - ProcessingBundle.fromRead( - MultiRead( - readTagsShortcuts - .map { record.getTagValue(it) } - .map { SingleReadImpl(readId, it.value, "$readId") } + TagParsePipeline.Input( + read = MultiRead( + inputType.readTags + .map { tagInfo -> record.tags[tagInfo.index] as SequenceAndQualityTagValue } + .map { SingleReadImpl(readId, it.data, "$readId") } .toTypedArray() ), - record.weight.toDouble(), - tags = TagTuple( - *barcodeShortcuts.map { - SequenceAndQualityTagValue(record.getTagValue(it).value) - }.toTypedArray() - ), - originalReadId = readId + fileTags = emptyList(), + originalReadId = readId, + existedMicRecord = record, + weight = record.weight.toDouble() ) } .synchronized() @@ -1101,7 +1107,7 @@ object CommandAlign { assert(inputFileGroups.fileGroups[0].files.size == inputFileGroups.inputType.numberOfReads) FastqGroupReader(inputFileGroups.fileGroups, cmdParams.replaceWildcards, readBufferSize) .map { - ProcessingBundle.fromRead( + TagParsePipeline.Input.fromRead( it.read, it.read.weight(), fileTags = it.fileTags, @@ -1182,36 +1188,35 @@ object CommandAlign { logger.warnUnfomatted("Please cite:") for (l in vdjcLibrary.citations) logger.warnUnfomatted(l) } + val inputType = inputFileGroups.inputType - // Tags - val tagsExtractor = when (val inputType = inputFileGroups.inputType) { - is MIC -> { - val barcodeTags = inputType.barcodes.mapIndexed { index, tag -> - TagInfo( - TagType.detectByTagName(tag)!!, - TagValueType.SequenceAndQuality, - tag, - index - ) - } - - getTagsExtractor( - cmdParams.copy(tagPattern = null), - inputFileGroups.tags, - barcodeTags - ) - } + val sampleStats = ConcurrentHashMap, SampleStat>() - else -> getTagsExtractor(cmdParams, inputFileGroups.tags) + val tagsParser = when { + inputType is MIC -> TagsParser.AlreadyParsed(inputType.allTags) + cmdParams.tagPattern != null -> TagsParser.ByPattern(cmdParams.readSearchPlan) + else -> null } + val tagsExtractor = TagParsePipeline.getTagsExtractor( + TagParsePipeline.Params( + allTagTransformationSteps = cmdParams.allTagTransformationSteps, + readIdAsCellTag = cmdParams.readIdAsCellTag, + headerExtractors = cmdParams.headerExtractors, + splitBySample = cmdParams.splitBySample + ), + tagsParser, + includeTargets = false, + inputFileGroups.tags + ) + // Validating output tags if required for (tagsValidation in cmdParams.tagsValidations) tagsValidation.validate(tagsExtractor.tagsInfo) // Validating count of inputs with tag pattern - tagsExtractor.usedReadsCount?.let { requiredInputs -> - when (val inputType = inputFileGroups.inputType) { + (tagsParser as? TagsParser.ByPattern)?.usedReadsCount?.let { requiredInputs -> + when (inputType) { BAM -> ValidationException.require(requiredInputs <= 2) { "Can't use pattern with more than 2 reads with BAM input" } @@ -1223,7 +1228,7 @@ object CommandAlign { } // structure of final NSQTuple - val readsCountInTuple = when (val inputType = inputFileGroups.inputType) { + val readsCountInTuple = when (inputType) { is MIC -> when (inputType.readTags.size) { 0 -> throw ValidationException("No read tags in pattern") 1 -> VDJCAligner.ReadsCount.ONE @@ -1231,8 +1236,13 @@ object CommandAlign { else -> throw ValidationException("More then 2 read tags in pattern") } - else -> when (tagsExtractor.pairedPatternPayload) { - null -> when (inputFileGroups.inputType.numberOfReads) { + else -> when (tagsParser) { + is TagsParser.ByPattern -> when { + tagsParser.pairedPatternPayload -> VDJCAligner.ReadsCount.TWO + else -> VDJCAligner.ReadsCount.ONE + } + + else -> when (inputFileGroups.inputType.numberOfReads) { -1 -> { check(inputFileGroups.inputType == BAM) VDJCAligner.ReadsCount.ONE_OR_TWO @@ -1242,9 +1252,6 @@ object CommandAlign { 2 -> VDJCAligner.ReadsCount.TWO else -> throw ValidationException("Triple and quad fastq inputs require tag pattern for parsing.") } - - true -> VDJCAligner.ReadsCount.TWO - false -> VDJCAligner.ReadsCount.ONE } } @@ -1300,7 +1307,7 @@ object CommandAlign { aligner.setEventsListener(reportBuilder) use( - createReader(tagsExtractor.pairedPatternPayload), + createReader((tagsParser as? TagsParser.ByPattern)?.pairedPatternPayload), alignedWriter(outputFile, tagsExtractor.sampleTags), failedReadsWriter( pathsForNotAligned.notAlignedI1, @@ -1370,16 +1377,37 @@ object CommandAlign { .chunked(64) .buffered(max(16, threads.value)) - val step0 = - mainInputReads.mapUnchunked { bundle -> - val parsed = tagsExtractor.parse(bundle) - if (parsed.status == NotParsed) - reportBuilder.onFailedAlignment(VDJCAlignmentFailCause.NoBarcode, bundle.weight) - if (parsed.status == NotMatched) - reportBuilder.onFailedAlignment(VDJCAlignmentFailCause.SampleNotMatched, bundle.weight) - parsed + val step0 = mainInputReads.mapUnchunked { input -> + val result = tagsExtractor.extract(input) + + val newSeq = if (tagsParser != null && result.ok) { + val reads = tagsParser.readTags.map { tagInfo -> + (result.record!!.tags[tagInfo.index] as SequenceAndQualityTagValue).data + } + input.sequence.withElements(*reads.toTypedArray()) + } else { + input.sequence } + if (result.status == NotParsed) + reportBuilder.onFailedAlignment(VDJCAlignmentFailCause.NoBarcode, input.weight) + if (result.status == NotMatched) + reportBuilder.onFailedAlignment(VDJCAlignmentFailCause.SampleNotMatched, input.weight) + if (result.ok) { + val sampleStat = sampleStats.computeIfAbsent(result.sample) { SampleStat() } + sampleStat.reads.addAndGet(input.weight.toLong()) + sampleStat.hash.addAndGet(newSeq.hashCode()) + } + + ParseResult( + read = input.read, + sequenceForAlign = newSeq, + tags = result.tags, + sample = result.sample, + status = result.status + ) + } + val step1 = if (cmdParams.trimmingQualityThreshold > 0) { val rep = ReadTrimmerReportBuilder() val trimmerProcessor = ReadTrimmerProcessor( @@ -1401,45 +1429,44 @@ object CommandAlign { threads = threads.value ) { bundle -> if (bundle.ok) { - var alignment = aligner.process(bundle.sequence, bundle.read) - ?: return@mapChunksInParallel bundle.copy(status = NotAligned) + var alignment = aligner.process(bundle.sequenceForAlign, bundle.read) + ?: return@mapChunksInParallel AlignmentResult.failed(bundle, NotAligned) alignment = alignment .withTagCount(TagCount(bundle.tags)) .shiftIndelsAtHomopolymers(gtRequiringIndelShifts) if (cmdParams.parameters.isSaveOriginalReads) alignment = alignment.withOriginalReads(arrayOf(bundle.read)) - bundle.copy(alignment = alignment, status = Good) + AlignmentResult.aligned(bundle, alignment) } else - bundle + AlignmentResult.failed(bundle, bundle.status) } step2 .unchunked() .ordered { it.read.id } - .forEach { bundle -> - if (bundle.status == NotParsed || bundle.status == NotMatched) - notParsedWriter?.write(bundle.read) - if (bundle.status == NotAligned) - notAlignedWriter?.write(bundle.read) + .forEach { result -> + if (result.status == NotParsed || result.status == NotMatched) + notParsedWriter?.write(result.read) + if (result.status == NotAligned) + notAlignedWriter?.write(result.read) val alignment = when { - bundle.alignment != null -> bundle.alignment!! + result.alignment != null -> result.alignment - cmdParams.writeFailedAlignments && bundle.status == NotAligned -> { + cmdParams.writeFailedAlignments && result.status == NotAligned -> { // Creating an empty alignment object if alignment for current read failed - val target = readsLayout.createTargets(bundle.sequence)[0] + val target = readsLayout.createTargets(result.sequence)[0] VDJCAlignments( hits = emptyHits, - tagCount = if (bundle.tags == TagTuple.NO_TAGS) - TagCount.NO_TAGS else TagCount(bundle.tags), + tagCount = result.tags, targets = target.targets, history = SequenceHistory.RawSequence.of( - bundle.read.id, + result.read.id, target, - bundle.sequence.weight + result.sequence.weight ), - originalSequences = if (alignerParameters.isSaveOriginalSequence) arrayOf(bundle.sequence) else null, - originalReads = if (alignerParameters.isSaveOriginalSequence) arrayOf(bundle.read) else null + originalSequences = if (alignerParameters.isSaveOriginalSequence) arrayOf(result.sequence) else null, + originalReads = if (alignerParameters.isSaveOriginalSequence) arrayOf(result.read) else null ) } @@ -1449,11 +1476,11 @@ object CommandAlign { if (alignment.isChimera) reportBuilder.onChimera(alignment.weight) - writers?.get(if (cmdParams.splitBySample) bundle.sample else emptyList())?.write(alignment) + writers?.get(if (cmdParams.splitBySample) result.sample else emptyList())?.write(alignment) } // Stats - val stats = tagsExtractor.sampleStats.values.sortedBy { -it.reads.get() } + val stats = sampleStats.values.sortedBy { -it.reads.get() } val cumsum = stats.runningFold(0L) { acc, sampleStat -> acc + sampleStat.reads.get() } val cutOff = cumsum.indexOfFirst { it >= cumsum.last() * 95 / 100 }.let { if (it < 0) stats.size else it } @@ -1475,13 +1502,14 @@ object CommandAlign { if (sample.isEmpty()) tagsExtractor.inputReads.get() else - tagsExtractor.sampleStats[sample]!!.reads.get() + sampleStats[sample]!!.reads.get() ) } reportBuilder.setFinishMillis(System.currentTimeMillis()) - if (tagsExtractor.reportAgg != null) reportBuilder.setTagReport(tagsExtractor.reportAgg!!.report) + if (tagsParser is TagsParser.ByPattern) + reportBuilder.setTagReport(tagsParser.reportAgg.report) reportBuilder.setNotMatchedByHeader(tagsExtractor.notMatchedByHeader.get()) reportBuilder.setTransformerReports(tagsExtractor.transformerReports) @@ -1559,9 +1587,7 @@ object CommandAlign { "." -> null else -> MultiSampleRun.writer( outputFile, - outputFileList?.let { - MultiSampleRun.SampleNameWriter(it, sampleTags.map { tagInfo -> tagInfo.name }) - } + outputFileList?.let { MultiSampleRun.SampleNameWriter(it, sampleTags) } ) { path -> VDJCAlignmentsWriter( path, @@ -1572,6 +1598,49 @@ object CommandAlign { } } + private data class ParseResult( + val read: SequenceRead, + val sequenceForAlign: NSQTuple, + val tags: TagTuple, + val sample: List, + val status: Status + ) { + val ok get() = status == Good + fun mapSequence(mapping: (NSQTuple) -> NSQTuple) = copy(sequenceForAlign = mapping(sequenceForAlign)) + } + + private data class AlignmentResult( + val read: SequenceRead, + val sequence: NSQTuple, + val sample: List, + val alignment: VDJCAlignments?, + val status: Status, + val tags: TagCount + ) { + companion object { + fun aligned(bundle: ParseResult, alignment: VDJCAlignments) = AlignmentResult( + read = bundle.read, + sequence = bundle.sequenceForAlign, + sample = bundle.sample, + alignment = alignment, + status = Good, + tags = alignment.tagCount + ) + + fun failed(bundle: ParseResult, status: Status) = AlignmentResult( + read = bundle.read, + sequence = bundle.sequenceForAlign, + sample = bundle.sample, + alignment = null, + status = status, + tags = when (bundle.tags) { + TagTuple.NO_TAGS -> TagCount.NO_TAGS + else -> TagCount(bundle.tags) + } + ) + } + } + companion object { private val libraryNameEnding: Pattern = Pattern.compile("\\.json(?:\\.gz|)$") } diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAnalyze.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAnalyze.kt index c1c4e04c0..323861a51 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAnalyze.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAnalyze.kt @@ -18,6 +18,7 @@ import com.milaboratory.cli.MultiSampleRun.SAVE_OUTPUT_FILE_NAMES_OPTION import com.milaboratory.cli.MultiSampleRun.listToSampleName import com.milaboratory.cli.POverridesBuilderOps import com.milaboratory.mitool.cli.Parse.readSearchPlan +import com.milaboratory.mitool.pattern.search.toTagsInfo import com.milaboratory.mixcr.bam.BAMReader import com.milaboratory.mixcr.cli.CommandAlign.STRICT_SAMPLE_NAME_MATCHING_OPTION import com.milaboratory.mixcr.cli.CommandAlign.inputFileGroups @@ -357,7 +358,7 @@ object CommandAnalyze { if (pipeline.first() == parse) { val mitoolPreset = bundle.mitool ?: throw ValidationException("No mitool params") val parseParams = mitoolPreset.parse ?: throw ValidationException("No mitool parse params") - val plan = parseParams.readSearchPlan() + val plan = parseParams.readSearchPlan if (outputNoUsedReads) { // fill up args of not parsed reads in symmetry of input files pathsForNotAligned.fillWithDefaults( @@ -369,7 +370,7 @@ object CommandAnalyze { ) // fill up args for not aligned reads according to payload tags count that will be in mitool results pathsForNotAligned.fillWithDefaults( - CommandAlign.Cmd.InputType.MIC(allTags = plan.allTags), + CommandAlign.Cmd.InputType.MIC(plan.toTagsInfo()), outputFolder, outputNamePrefix, addNotAligned = true, diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAssemble.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAssemble.kt index 63911077c..0ddade5f4 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAssemble.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAssemble.kt @@ -21,6 +21,9 @@ import com.milaboratory.cli.POverridesBuilderOps import com.milaboratory.core.Range import com.milaboratory.mitool.consensus.ConsensusResult import com.milaboratory.mitool.data.MinGroupsPerGroup +import com.milaboratory.mitool.tag.TagTuple +import com.milaboratory.mitool.tag.TagType.Cell +import com.milaboratory.mitool.tag.TagType.Molecule import com.milaboratory.mixcr.assembler.AlignmentsMappingMerger import com.milaboratory.mixcr.assembler.CloneAssembler import com.milaboratory.mixcr.assembler.CloneAssemblerRunner @@ -37,9 +40,6 @@ import com.milaboratory.mixcr.basictypes.VDJCAlignments import com.milaboratory.mixcr.basictypes.VDJCAlignmentsReader import com.milaboratory.mixcr.basictypes.VDJCSProperties import com.milaboratory.mixcr.basictypes.tag.TagCount -import com.milaboratory.mixcr.basictypes.tag.TagTuple -import com.milaboratory.mixcr.basictypes.tag.TagType.Cell -import com.milaboratory.mixcr.basictypes.tag.TagType.Molecule import com.milaboratory.mixcr.basictypes.validateCompositeFeatures import com.milaboratory.mixcr.cli.CommonDescriptions.DEFAULT_VALUE_FROM_PRESET import com.milaboratory.mixcr.cli.CommonDescriptions.Labels @@ -424,7 +424,7 @@ object CommandAssemble { if (it.cloneIndex == -1L) return@map it val cloneTagCount = cloneTagCounts.get(it.cloneIndex.toInt())!! - val prefixes = it.tagCount.reduceToLevel(cloneTagCount.depth()) + val prefixes = it.tagCount.reduceToLevel(cloneTagCount.depth) if (!cloneTagCount.containsAll(prefixes.tuples())) { reportBuilder.onAlignmentFilteredByPrefix(it) // Dropped with clone semantically fits the case the most diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAssembleCells.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAssembleCells.kt index 7595e09bc..19943a6c8 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAssembleCells.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAssembleCells.kt @@ -17,6 +17,7 @@ import com.milaboratory.app.InputFileType import com.milaboratory.app.ValidationException import com.milaboratory.app.logger import com.milaboratory.cli.POverridesBuilderOps +import com.milaboratory.mitool.tag.TagType import com.milaboratory.mixcr.basictypes.ClnAReader import com.milaboratory.mixcr.basictypes.ClnAWriter import com.milaboratory.mixcr.basictypes.ClnsReader @@ -29,7 +30,6 @@ import com.milaboratory.mixcr.basictypes.IOUtil.MiXCRFileType.CLNA import com.milaboratory.mixcr.basictypes.IOUtil.MiXCRFileType.CLNS import com.milaboratory.mixcr.basictypes.MiXCRHeader import com.milaboratory.mixcr.basictypes.VDJCAlignments -import com.milaboratory.mixcr.basictypes.tag.TagType import com.milaboratory.mixcr.clonegrouping.CloneGroupingParams.Companion.mkGrouper import com.milaboratory.mixcr.presets.AnalyzeCommandDescriptor import com.milaboratory.mixcr.presets.AssembleContigsMixins diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAssembleContigs.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAssembleContigs.kt index 7e5b4399d..74945b5ed 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAssembleContigs.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAssembleContigs.kt @@ -24,6 +24,7 @@ import com.milaboratory.app.InputFileType import com.milaboratory.app.ValidationException import com.milaboratory.cli.POverridesBuilderOps import com.milaboratory.core.sequence.NucleotideSequence +import com.milaboratory.mitool.tag.TagType import com.milaboratory.mixcr.assembler.CloneFactory import com.milaboratory.mixcr.assembler.CloneFactoryParameters import com.milaboratory.mixcr.assembler.fullseq.CoverageAccumulator @@ -43,7 +44,6 @@ import com.milaboratory.mixcr.basictypes.MiXCRHeader import com.milaboratory.mixcr.basictypes.VDJCAlignments import com.milaboratory.mixcr.basictypes.VDJCSProperties.CloneOrdering import com.milaboratory.mixcr.basictypes.tag.TagCountAggregator -import com.milaboratory.mixcr.basictypes.tag.TagType import com.milaboratory.mixcr.basictypes.validateCompositeFeatures import com.milaboratory.mixcr.cli.CommonDescriptions.DEFAULT_VALUE_FROM_PRESET import com.milaboratory.mixcr.cli.CommonDescriptions.Labels @@ -348,7 +348,7 @@ private class Assembler( // process every cell barcode separately originalClone.tagCount.splitBy(cellTageLevel) .flatMap { tagsFromCell -> - val cellBarcode = tagsFromCell.reduceToLevel(cellTageLevel).singletonTuple + val cellBarcode = tagsFromCell.reduceToLevel(cellTageLevel).getSingletonTuple() assembleContigs0( originalClone.withTagCount(tagsFromCell), OutputPortFactory { diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAssemblePartial.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAssemblePartial.kt index c748f8d78..005f5dc96 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAssemblePartial.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandAssemblePartial.kt @@ -17,11 +17,11 @@ import com.milaboratory.app.InputFileType import com.milaboratory.app.ValidationException import com.milaboratory.app.logger import com.milaboratory.cli.POverridesBuilderOps +import com.milaboratory.mitool.tag.TagTuple +import com.milaboratory.mitool.tag.TagType import com.milaboratory.mixcr.basictypes.VDJCAlignments import com.milaboratory.mixcr.basictypes.VDJCAlignmentsReader import com.milaboratory.mixcr.basictypes.VDJCAlignmentsWriter -import com.milaboratory.mixcr.basictypes.tag.TagTuple -import com.milaboratory.mixcr.basictypes.tag.TagType import com.milaboratory.mixcr.cli.CommonDescriptions.DEFAULT_VALUE_FROM_PRESET import com.milaboratory.mixcr.cli.CommonDescriptions.Labels import com.milaboratory.mixcr.partialassembler.PartialAlignmentsAssembler diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportAirr.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportAirr.kt index 31424986b..a54889313 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportAirr.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportAirr.kt @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2023, MiLaboratories Inc. All Rights Reserved + * Copyright (c) 2014-2024, MiLaboratories Inc. All Rights Reserved * * Before downloading or accessing the software, please read carefully the * License Agreement available at: @@ -16,6 +16,8 @@ import cc.redberry.pipe.util.forEach import com.milaboratory.app.InputFileType import com.milaboratory.app.ValidationException import com.milaboratory.app.logger +import com.milaboratory.mitool.tag.TagType +import com.milaboratory.mitool.tag.TagsInfo import com.milaboratory.mixcr.basictypes.ClnAReader import com.milaboratory.mixcr.basictypes.ClnsReader import com.milaboratory.mixcr.basictypes.CloneSet.Companion.divideClonesByTags @@ -28,8 +30,6 @@ import com.milaboratory.mixcr.basictypes.IOUtil.MiXCRFileType.VDJCA import com.milaboratory.mixcr.basictypes.MiXCRFileInfo import com.milaboratory.mixcr.basictypes.VDJCAlignmentsReader import com.milaboratory.mixcr.basictypes.VDJCObject -import com.milaboratory.mixcr.basictypes.tag.TagType -import com.milaboratory.mixcr.basictypes.tag.TagsInfo import com.milaboratory.mixcr.export.AirrColumns import com.milaboratory.mixcr.export.AirrColumns.AirrAlignmentBoundary import com.milaboratory.mixcr.export.AirrColumns.AlignmentCigar diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportAlignmentsPretty.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportAlignmentsPretty.kt index ec9b2be71..ebcaecaab 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportAlignmentsPretty.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportAlignmentsPretty.kt @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2023, MiLaboratories Inc. All Rights Reserved + * Copyright (c) 2014-2024, MiLaboratories Inc. All Rights Reserved * * Before downloading or accessing the software, please read carefully the * License Agreement available at: @@ -18,10 +18,10 @@ import com.milaboratory.app.InputFileType import com.milaboratory.app.ValidationException import com.milaboratory.app.logger import com.milaboratory.core.sequence.NucleotideSequence +import com.milaboratory.mitool.tag.TagsInfo +import com.milaboratory.mitool.tag.extractSequence import com.milaboratory.mixcr.basictypes.MultiAlignmentHelper import com.milaboratory.mixcr.basictypes.VDJCAlignments -import com.milaboratory.mixcr.basictypes.tag.TagsInfo -import com.milaboratory.mixcr.basictypes.tag.extractSequence import com.milaboratory.mixcr.cli.CommonDescriptions.Labels import com.milaboratory.mixcr.cli.afiltering.AFilter import com.milaboratory.util.NSequenceWithQualityPrintHelper diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportCloneGroups.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportCloneGroups.kt index aa6e544c6..78895de66 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportCloneGroups.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportCloneGroups.kt @@ -16,14 +16,14 @@ import com.milaboratory.app.InputFileType import com.milaboratory.app.ValidationException import com.milaboratory.app.logger import com.milaboratory.cli.POverridesBuilderOps +import com.milaboratory.mitool.tag.TagType +import com.milaboratory.mitool.tag.TagsInfo import com.milaboratory.mixcr.basictypes.Clone import com.milaboratory.mixcr.basictypes.CloneSet import com.milaboratory.mixcr.basictypes.CloneSetIO import com.milaboratory.mixcr.basictypes.IOUtil import com.milaboratory.mixcr.basictypes.MiXCRFileInfo import com.milaboratory.mixcr.basictypes.tag.TagCountAggregator -import com.milaboratory.mixcr.basictypes.tag.TagType -import com.milaboratory.mixcr.basictypes.tag.TagsInfo import com.milaboratory.mixcr.cli.CommandExportCloneGroupsParams.SortChainsBy import com.milaboratory.mixcr.cli.CommonDescriptions.DEFAULT_VALUE_FROM_PRESET import com.milaboratory.mixcr.clonegrouping.CellType diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportClones.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportClones.kt index 2569ad4b7..d4f35c35a 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportClones.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportClones.kt @@ -19,6 +19,7 @@ import com.milaboratory.app.InputFileType import com.milaboratory.app.ValidationException import com.milaboratory.app.logger import com.milaboratory.cli.POverridesBuilderOps +import com.milaboratory.mitool.tag.TagType import com.milaboratory.mixcr.basictypes.Clone import com.milaboratory.mixcr.basictypes.CloneSet import com.milaboratory.mixcr.basictypes.CloneSet.Companion.divideClonesByTags @@ -28,7 +29,6 @@ import com.milaboratory.mixcr.basictypes.CloneSet.Companion.split import com.milaboratory.mixcr.basictypes.CloneSetIO import com.milaboratory.mixcr.basictypes.IOUtil import com.milaboratory.mixcr.basictypes.MiXCRHeader -import com.milaboratory.mixcr.basictypes.tag.TagType import com.milaboratory.mixcr.cli.CommonDescriptions.DEFAULT_VALUE_FROM_PRESET import com.milaboratory.mixcr.cli.CommonDescriptions.Labels import com.milaboratory.mixcr.clonegrouping.CloneGrouper @@ -410,9 +410,6 @@ object CommandExportClones { key.startsWith("tag:", ignoreCase = true) -> { val tagName = key.substring(4) val tag = header.tagsInfo[tagName] - ValidationException.requireNotNull(tag) { - "No tag `$tagName` in a file" - } val tagType = when (tag.index) { // If splitting by this tag means the same as splitting by tag type header.tagsInfo.getDepthFor(tag.type) - 1 -> tag.type diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportClonesPretty.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportClonesPretty.kt index b3821910d..1c51ad002 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportClonesPretty.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportClonesPretty.kt @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2023, MiLaboratories Inc. All Rights Reserved + * Copyright (c) 2014-2024, MiLaboratories Inc. All Rights Reserved * * Before downloading or accessing the software, please read carefully the * License Agreement available at: @@ -17,11 +17,11 @@ import com.milaboratory.app.InputFileType import com.milaboratory.app.ValidationException import com.milaboratory.app.logger import com.milaboratory.core.sequence.NucleotideSequence +import com.milaboratory.mitool.tag.TagsInfo import com.milaboratory.mixcr.basictypes.Clone import com.milaboratory.mixcr.basictypes.CloneSetIO import com.milaboratory.mixcr.basictypes.MultiAlignmentFormatter import com.milaboratory.mixcr.basictypes.MultiAlignmentHelper -import com.milaboratory.mixcr.basictypes.tag.TagsInfo import com.milaboratory.mixcr.cli.CommonDescriptions.Labels import gnu.trove.set.hash.TIntHashSet import io.repseq.core.Chains diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportOverlap.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportOverlap.kt index c894b0c6c..7675fd72d 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportOverlap.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportOverlap.kt @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2023, MiLaboratories Inc. All Rights Reserved + * Copyright (c) 2014-2024, MiLaboratories Inc. All Rights Reserved * * Before downloading or accessing the software, please read carefully the * License Agreement available at: @@ -14,10 +14,10 @@ package com.milaboratory.mixcr.cli import cc.redberry.pipe.util.forEach import com.milaboratory.app.InputFileType import com.milaboratory.app.ValidationException +import com.milaboratory.mitool.tag.TagType import com.milaboratory.mixcr.basictypes.Clone import com.milaboratory.mixcr.basictypes.IOUtil import com.milaboratory.mixcr.basictypes.IOUtil.extractFileInfo -import com.milaboratory.mixcr.basictypes.tag.TagType import com.milaboratory.mixcr.cli.CommonDescriptions.Labels import com.milaboratory.mixcr.export.CloneFieldsExtractorsFactory import com.milaboratory.mixcr.export.ExportFieldDescription diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportReportsAsTable.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportReportsAsTable.kt index b0d4fbb60..26b0b48a4 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportReportsAsTable.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportReportsAsTable.kt @@ -17,8 +17,8 @@ import com.milaboratory.app.logger import com.milaboratory.app.matches import com.milaboratory.cli.StepDataCollection import com.milaboratory.cli.getReportSafe +import com.milaboratory.mitool.tag.TagsInfo import com.milaboratory.mixcr.basictypes.IOUtil -import com.milaboratory.mixcr.basictypes.tag.TagsInfo import com.milaboratory.mixcr.export.ExportFieldDescription import com.milaboratory.mixcr.export.InfoWriter import com.milaboratory.mixcr.export.MetaForExport diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportShmSingleCellTreesTable.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportShmSingleCellTreesTable.kt index 1093956a4..5fe46d01c 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportShmSingleCellTreesTable.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportShmSingleCellTreesTable.kt @@ -15,8 +15,8 @@ import cc.redberry.pipe.util.asSequence import com.milaboratory.app.InputFileType import com.milaboratory.app.ValidationException import com.milaboratory.app.logger -import com.milaboratory.mixcr.basictypes.tag.TagType -import com.milaboratory.mixcr.basictypes.tag.TagsInfo +import com.milaboratory.mitool.tag.TagType +import com.milaboratory.mitool.tag.TagsInfo import com.milaboratory.mixcr.clonegrouping.CellType import com.milaboratory.mixcr.export.ExportFieldDescription import com.milaboratory.mixcr.export.InfoWriter diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportShmTreesTable.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportShmTreesTable.kt index b5b233689..392d514bc 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportShmTreesTable.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportShmTreesTable.kt @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2023, MiLaboratories Inc. All Rights Reserved + * Copyright (c) 2014-2024, MiLaboratories Inc. All Rights Reserved * * Before downloading or accessing the software, please read carefully the * License Agreement available at: @@ -15,7 +15,7 @@ import cc.redberry.pipe.util.asSequence import com.milaboratory.app.InputFileType import com.milaboratory.app.ValidationException import com.milaboratory.app.logger -import com.milaboratory.mixcr.basictypes.tag.TagsInfo +import com.milaboratory.mitool.tag.TagsInfo import com.milaboratory.mixcr.export.ExportFieldDescription import com.milaboratory.mixcr.export.InfoWriter import com.milaboratory.mixcr.export.MetaForExport diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportShmTreesTableWithNodes.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportShmTreesTableWithNodes.kt index 3dd135e74..3b2fc5892 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportShmTreesTableWithNodes.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandExportShmTreesTableWithNodes.kt @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2023, MiLaboratories Inc. All Rights Reserved + * Copyright (c) 2014-2024, MiLaboratories Inc. All Rights Reserved * * Before downloading or accessing the software, please read carefully the * License Agreement available at: @@ -20,10 +20,10 @@ import cc.redberry.pipe.util.toList import com.milaboratory.app.InputFileType import com.milaboratory.app.ValidationException import com.milaboratory.app.logger +import com.milaboratory.mitool.tag.TagType +import com.milaboratory.mitool.tag.TagsInfo import com.milaboratory.mixcr.basictypes.CloneSet.Companion.divideClonesByTags import com.milaboratory.mixcr.basictypes.VDJCSProperties -import com.milaboratory.mixcr.basictypes.tag.TagType -import com.milaboratory.mixcr.basictypes.tag.TagsInfo import com.milaboratory.mixcr.cli.CommonDescriptions.Labels import com.milaboratory.mixcr.export.ExportFieldDescription import com.milaboratory.mixcr.export.InfoWriter diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandFindAlleles.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandFindAlleles.kt index b2c0585ad..06182368d 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandFindAlleles.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandFindAlleles.kt @@ -25,6 +25,8 @@ import com.milaboratory.cli.resolve import com.milaboratory.core.io.sequence.fasta.FastaRecord import com.milaboratory.core.io.sequence.fasta.FastaWriter import com.milaboratory.core.sequence.NucleotideSequence +import com.milaboratory.mitool.tag.TagType +import com.milaboratory.mitool.tag.TagsInfo import com.milaboratory.mixcr.alleles.AlleleSearchResult import com.milaboratory.mixcr.alleles.AlleleSearchResult.Status.DE_NOVO import com.milaboratory.mixcr.alleles.AllelesBuilder @@ -38,8 +40,6 @@ import com.milaboratory.mixcr.basictypes.CloneReader import com.milaboratory.mixcr.basictypes.CloneSet import com.milaboratory.mixcr.basictypes.CloneSetIO import com.milaboratory.mixcr.basictypes.MiXCRHeader -import com.milaboratory.mixcr.basictypes.tag.TagType -import com.milaboratory.mixcr.basictypes.tag.TagsInfo import com.milaboratory.mixcr.cli.CommonDescriptions.Labels import com.milaboratory.mixcr.presets.AnalyzeCommandDescriptor import com.milaboratory.mixcr.presets.AssembleContigsMixins.SetContigAssemblingFeatures diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandFindShmTrees.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandFindShmTrees.kt index cf034d831..aea7cb0d0 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandFindShmTrees.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandFindShmTrees.kt @@ -24,14 +24,14 @@ import cc.redberry.pipe.util.toList import com.milaboratory.app.InputFileType import com.milaboratory.app.ValidationException import com.milaboratory.app.logger +import com.milaboratory.mitool.tag.TagType +import com.milaboratory.mitool.tag.TagsInfo import com.milaboratory.mixcr.assembler.CloneFactoryParameters import com.milaboratory.mixcr.basictypes.ClnsReader import com.milaboratory.mixcr.basictypes.CloneRanks import com.milaboratory.mixcr.basictypes.HasFeatureToAlign import com.milaboratory.mixcr.basictypes.MiXCRFooterMerger import com.milaboratory.mixcr.basictypes.MiXCRHeader -import com.milaboratory.mixcr.basictypes.tag.TagType -import com.milaboratory.mixcr.basictypes.tag.TagsInfo import com.milaboratory.mixcr.basictypes.validateCompositeFeatures import com.milaboratory.mixcr.cli.CommonDescriptions.Labels import com.milaboratory.mixcr.presets.AnalyzeCommandDescriptor diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandRefineTagsAndSort.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandRefineTagsAndSort.kt index 8b3e745f5..1a36a1a04 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/CommandRefineTagsAndSort.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/CommandRefineTagsAndSort.kt @@ -19,8 +19,6 @@ import com.milaboratory.app.InputFileType import com.milaboratory.app.ValidationException import com.milaboratory.app.logger import com.milaboratory.cli.POverridesBuilderOps -import com.milaboratory.core.sequence.NSequenceWithQuality -import com.milaboratory.core.sequence.NucleotideSequence import com.milaboratory.core.sequence.ShortSequenceSet import com.milaboratory.mitool.data.CriticalThresholdKey import com.milaboratory.mitool.refinement.TagCorrectionPlan @@ -29,17 +27,13 @@ import com.milaboratory.mitool.refinement.TagCorrector import com.milaboratory.mitool.refinement.TagCorrectorParameters import com.milaboratory.mitool.refinement.gfilter.SequenceExtractor import com.milaboratory.mitool.refinement.gfilter.SequenceExtractorsFactory +import com.milaboratory.mitool.tag.TagValueType +import com.milaboratory.mitool.tag.tagAliases import com.milaboratory.mixcr.basictypes.IOUtil import com.milaboratory.mixcr.basictypes.VDJCAlignments import com.milaboratory.mixcr.basictypes.VDJCAlignmentsReader import com.milaboratory.mixcr.basictypes.VDJCAlignmentsWriter -import com.milaboratory.mixcr.basictypes.tag.SequenceAndQualityTagValue -import com.milaboratory.mixcr.basictypes.tag.SequenceTagValue import com.milaboratory.mixcr.basictypes.tag.TagCount -import com.milaboratory.mixcr.basictypes.tag.TagTuple -import com.milaboratory.mixcr.basictypes.tag.TagValue -import com.milaboratory.mixcr.basictypes.tag.TagValueType -import com.milaboratory.mixcr.basictypes.tag.tagAliases import com.milaboratory.mixcr.cli.CommonDescriptions.DEFAULT_VALUE_FROM_PRESET import com.milaboratory.mixcr.cli.CommonDescriptions.Labels import com.milaboratory.mixcr.cli.MiXCRMixinCollection.Companion.mixins @@ -294,8 +288,17 @@ object CommandRefineTagsAndSort { } logger.log { "Sorting will be applied to the following tags: ${tagNames.joinToString(", ")}" } + val requestedWhitelists = cmdParams.whitelists.toMutableMap() + val alreadyFilteredWhitelists = + mainReader.header.stepParams[AnalyzeCommandDescriptor.MiToolCommandDelegationDescriptor.refineTags].firstOrNull()?.params?.whitelists + if (alreadyFilteredWhitelists != null) { + val repeatedRequest = requestedWhitelists.keys.filter { tagName -> + requestedWhitelists[tagName]?.load() == alreadyFilteredWhitelists[tagName]?.load() + } + requestedWhitelists -= repeatedRequest.toSet() + } val corrected = when { - correctionEnabled.none { true } && cmdParams.whitelists.isEmpty() && cmdParams.parameters?.postFilter == null -> { + correctionEnabled.none { true } && requestedWhitelists.isEmpty() && cmdParams.parameters?.postFilter == null -> { mitoolReport = null mainReader.reportProgress("Sorting alignments by ${tagNames.last()}") } @@ -304,7 +307,7 @@ object CommandRefineTagsAndSort { // Running correction val whitelists = mutableMapOf() tagNames.forEachIndexed { i, tn -> - val t = cmdParams.whitelists[tn] + val t = requestedWhitelists[tn] if (t != null) { logger.log { "The following whitelist will be used for $tn: $t" } whitelists[i] = t.load() @@ -316,14 +319,6 @@ object CommandRefineTagsAndSort { val correctionPlan = TagCorrectionPlan( tagNames, - tagNames.indices.map { i -> - when { - correctionEnabled[i] -> NSequenceWithQuality::class.java// Sequence&quality tags will be unwrapped for correction - tagsInfo[i].valueType == TagValueType.NonSequence -> TagValue::class.java // Other tags will be left unchanged to be used as grouping keys - // for usage of a whitelist nucleotide sequence is needed - else -> NucleotideSequence::class.java - } - }, whitelists, // For now all sequence&quality tags are corrected, // more flexibility will be added in the future @@ -354,17 +349,7 @@ object CommandRefineTagsAndSort { "This procedure don't support aggregated tags. " + "Please run tag correction for *.vdjca files produced by 'align'." ) - val tagTuple = als.tagCount.singletonTuple - Array(tagNames.size) { tIdx -> // <- local index for the procedure - val tagValue = tagTuple[tIdx] - when { - correctionEnabled[tIdx] -> (tagValue as SequenceAndQualityTagValue).data - else -> when (val key = tagValue.extractKey()) { - is SequenceTagValue -> key.value // actual sequence - else -> key// converting any tag type to a key tag - } - } - } + als.tagCount.getSingletonTuple().asArray() } // Running correction, results are temporarily persisted in temp file, so the object can be used @@ -387,15 +372,14 @@ object CommandRefineTagsAndSort { mainReader.readAlignments(), { al -> al.alignmentsIndex } ) { al, newTagValues -> - // starting off the copy of original alignment tags array - val updatedTags = al.tagCount.singletonTuple.asArray() - tagNames.indices.forEach { tIdx -> - if (correctionEnabled[tIdx]) - updatedTags[tIdx] = - SequenceAndQualityTagValue(newTagValues[tIdx] as NSequenceWithQuality) + val updatedTags = al.tagCount.getSingletonTuple().mapIndexed { i, tagValue -> + if (correctionEnabled[i]) + newTagValues[i] + else + tagValue } // Applying updated tags values and returning updated alignments object - al.withTagCount(TagCount(TagTuple(*updatedTags), al.tagCount.singletonCount)) + al.withTagCount(TagCount(updatedTags, al.tagCount.getSingletonCount())) } .reportProgress("Applying correction & sorting alignments by ${tagNames.last()}") } @@ -414,7 +398,7 @@ object CommandRefineTagsAndSort { { tIdx -> // <- index inside the alignment object sortByHashOnDisk( ComparatorWithHash.compareBy { al -> - val tagTuple = al.tagCount.singletonTuple + val tagTuple = al.tagCount.getSingletonTuple() tagTuple[tIdx].extractKey() }, tempDest, diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/ITestCommandAssemblePreClones.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/ITestCommandAssemblePreClones.kt index d4818df58..bcdfadc8e 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/ITestCommandAssemblePreClones.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/ITestCommandAssemblePreClones.kt @@ -14,12 +14,12 @@ package com.milaboratory.mixcr.cli import cc.redberry.pipe.CUtils import com.milaboratory.app.ApplicationException import com.milaboratory.app.ValidationException +import com.milaboratory.mitool.tag.TagTuple +import com.milaboratory.mitool.tag.TagType import com.milaboratory.mixcr.assembler.preclone.FilePreCloneReader import com.milaboratory.mixcr.assembler.preclone.PreCloneAssemblerParameters import com.milaboratory.mixcr.assembler.preclone.PreCloneAssemblerRunner import com.milaboratory.mixcr.basictypes.VDJCAlignmentsReader -import com.milaboratory.mixcr.basictypes.tag.TagTuple -import com.milaboratory.mixcr.basictypes.tag.TagType import com.milaboratory.util.JsonOverrider import com.milaboratory.util.ReportHelper import com.milaboratory.util.SmartProgressReporter diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/MiXCRParamsResolver.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/MiXCRParamsResolver.kt index 7537cf772..32252d651 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/MiXCRParamsResolver.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/MiXCRParamsResolver.kt @@ -18,6 +18,7 @@ import com.milaboratory.cli.PresetAware import com.milaboratory.mixcr.basictypes.HasFeatureToAlign import com.milaboratory.mixcr.cli.CommonDescriptions.Labels import com.milaboratory.mixcr.presets.AlignMixins +import com.milaboratory.mixcr.presets.AnalyzeCommandDescriptor import com.milaboratory.mixcr.presets.AnalyzeCommandDescriptor.MiToolCommandDelegationDescriptor.parse import com.milaboratory.mixcr.presets.AnalyzeCommandDescriptor.assemble import com.milaboratory.mixcr.presets.AnalyzeCommandDescriptor.assembleCells @@ -29,7 +30,6 @@ import com.milaboratory.mixcr.presets.MiXCRMixin import com.milaboratory.mixcr.presets.MiXCRParamsBundle import com.milaboratory.mixcr.presets.PipelineMixins import com.milaboratory.mixcr.presets.Presets -import com.milaboratory.mixcr.presets.RefineTagsAndSortMixins import io.repseq.core.GeneFeature import io.repseq.core.GeneFeature.CDR3 import io.repseq.core.GeneFeature.VDJRegion @@ -75,7 +75,9 @@ abstract class MiXCRParamsResolver

( } if (parse in steps) { - val parseParams = bundle.mitool!!.parse!! + val parseParams = ValidationException.requireNotNull(bundle.mitool?.parse) { + "No params for MiTool parse" + } val mitoolPattern = ValidationException.requireNotNull(parseParams.pattern) { "Tag pattern should be set in `mitool.parse.pattern`" } @@ -88,12 +90,6 @@ abstract class MiXCRParamsResolver

( ValidationException.require(mitoolPattern == alignPattern) { "Tag patterns are different in `mitool.parse.pattern` and `align.tagPattern`: $mitoolPattern and $alignPattern" } - ValidationException.require(!alignParams.readIdAsCellTag) { - "`readIdAsCellTag` is not supported with mitool commands in pipeline" - } - ValidationException.require(alignParams.headerExtractors.isEmpty()) { - "`headerExtractors` are not supported with mitool commands in pipeline" - } if (alignParams.parameters.isSaveOriginalReads) { logger.warn { "Saving original reads with mitool commands in pipeline will lead to saving reads after mitool processing, not original ones" } } @@ -101,10 +97,15 @@ abstract class MiXCRParamsResolver

( logger.warn { "Saving original sequences with mitool commands in pipeline will lead to saving sequences after mitool processing, not original ones" } } - bundle.mitool!!.refineTags?.let { refineTags -> - ValidationException.requireEmpty(refineTags.dontCorrectTagsTypes) { - "With mitool refineTags command in pipeline, `${RefineTagsAndSortMixins.DontCorrectTagType.CMD_OPTION}` is not applicable, " + - "please use `${RefineTagsAndSortMixins.DontCorrectTagName.CMD_OPTION}` instead" + if (AnalyzeCommandDescriptor.MiToolCommandDelegationDescriptor.refineTags in steps) { + val refineTags = ValidationException.requireNotNull(bundle.mitool?.refineTags) { + "No params for MiTool refine-tags" + } + ValidationException.require(refineTags.tags.isNotEmpty() || refineTags.tagTypes.isNotEmpty()) { + "Either mitool.refineTags.tags or mitool.refineTags.tagTypes should be set" + } + ValidationException.require(refineTags.tags.isEmpty() || refineTags.tagTypes.isEmpty()) { + "Both mitool.refineTags.tags or mitool.refineTags.tagTypes are specified, specify only one" } } } diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/Mixins.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/Mixins.kt index 9d5890654..e22ec0f02 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/Mixins.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/Mixins.kt @@ -12,7 +12,7 @@ package com.milaboratory.mixcr.cli import com.milaboratory.app.ValidationException -import com.milaboratory.mixcr.basictypes.tag.TagType +import com.milaboratory.mitool.tag.TagType import com.milaboratory.mixcr.cli.CommonDescriptions.Labels import com.milaboratory.mixcr.cli.MiXCRCommand.OptionsOrder import com.milaboratory.mixcr.clonegrouping.CellType diff --git a/src/main/kotlin/com/milaboratory/mixcr/cli/postanalysis/CommandPa.kt b/src/main/kotlin/com/milaboratory/mixcr/cli/postanalysis/CommandPa.kt index 7a34e9324..de797c600 100644 --- a/src/main/kotlin/com/milaboratory/mixcr/cli/postanalysis/CommandPa.kt +++ b/src/main/kotlin/com/milaboratory/mixcr/cli/postanalysis/CommandPa.kt @@ -14,8 +14,8 @@ package com.milaboratory.mixcr.cli.postanalysis import com.milaboratory.app.InputFileType import com.milaboratory.app.ValidationException import com.milaboratory.app.matches +import com.milaboratory.mitool.tag.TagsInfo import com.milaboratory.mixcr.basictypes.CloneSetIO -import com.milaboratory.mixcr.basictypes.tag.TagsInfo import com.milaboratory.mixcr.cli.ChainsCandidates import com.milaboratory.mixcr.cli.ChainsUtil import com.milaboratory.mixcr.cli.CommonDescriptions diff --git a/src/main/resources/presets/protocols/10x.yaml b/src/main/resources/presets/protocols/10x.yaml index a08a4bcbb..d73cfc085 100644 --- a/src/main/resources/presets/protocols/10x.yaml +++ b/src/main/resources/presets/protocols/10x.yaml @@ -22,19 +22,19 @@ refineTags: inheritFrom: mitool-refineTags-default overrides: - tags: - - CELL - - UMI + tagTypes: + - Cell + - Molecule whitelists: CELL: builtin:737K-august-2016 consensus: inheritFrom: mitool-consensus-4-steps overrides: - - groupingTags: - - CELL - - UMI - payloadTags: - - R2 + - groupingTagTypes: + - Cell + - Molecule + payloadTagTypes: + - Targets parameters: aAssemblerParameters: minAlignmentScore: 499 @@ -42,11 +42,11 @@ positiveMinQualityShare: 0.29 positiveMinIntersectionQuality: 379 minRecordSharePerConsensus: 0.001 - - groupingTags: - - CELL - - UMI - payloadTags: - - R2 + - groupingTagTypes: + - Cell + - Molecule + payloadTagTypes: + - Targets parameters: aAssemblerParameters: minAlignmentScore: 51 @@ -54,10 +54,10 @@ positiveMinQualityShare: 0.11 positiveMinIntersectionQuality: 101 minRecordSharePerConsensus: 0.3 - - groupingTags: - - CELL - payloadTags: - - R2 + - groupingTagTypes: + - Cell + payloadTagTypes: + - Targets parameters: aAssemblerParameters: minAlignmentScore: 41 @@ -66,10 +66,10 @@ positiveMinIntersectionQuality: 130 minRecordSharePerConsensus: 0.001 useWeightsForRecordsPerConsensus: false - - groupingTags: - - CELL - payloadTags: - - R2 + - groupingTagTypes: + - Cell + payloadTagTypes: + - Targets parameters: aAssemblerParameters: minAlignmentScore: 181 diff --git a/src/main/resources/presets/protocols/bruker.yaml b/src/main/resources/presets/protocols/bruker.yaml new file mode 100644 index 000000000..1bb25e6b3 --- /dev/null +++ b/src/main/resources/presets/protocols/bruker.yaml @@ -0,0 +1,150 @@ +"bruker-human-sc-xcr-vdj-beacon": + vendor: "Bruker" + label: "Single cell VDJ" + category: non-generic + inheritFrom: generic-lt-single-cell-amplicon + mixins: + - type: SetTagPattern + tagPattern: ^(CELL:N{10})ggg\^(R2:*) + - type: AddPipelineStep + step: mitool-parse + - type: AddPipelineStep + step: mitool-refine-tags + - type: AddPipelineStep + step: mitool-consensus + - type: SetSpecies + species: hsa + - type: SetClonotypeAssemblingFeatures + features: [ "VDJRegion" ] + - type: LeftAlignmentBoundaryNoPoint + floating: true + - type: RightAlignmentBoundaryNoPoint + floating: true + geneType: C + mitool: + parse: + inheritFrom: mitool-parse-default + refineTags: + inheritFrom: mitool-refineTags-default + overrides: + tagTypes: + - Cell + whitelists: + CELL: + - TGGTAGGCTG + - GTTAGCTGCT + - TACATAAAGA + - AGCCCTATCA + - ACCTACCGCC + - TCTCCAAGAC + - GTATACATTA + - AGACTCGATT + - CCAGGATTAA + - CTCCTTCAAG + - ACTACTTCTG + - GCCTTGTTGT + consensus: + inheritFrom: mitool-consensus-2-steps + overrides: + - groupingTagTypes: + - Cell + payloadTagTypes: + - Targets + parameters: + aAssemblerParameters: + minAlignmentScore: 80 + maxNormalizedAlignmentPenalty: 0.13 + positiveMinQualityShare: 0.13 + positiveMinIntersectionQuality: 150 + minRecordSharePerConsensus: 0.001 + minRecordsPerConsensus: 3 + - groupingTagTypes: + - Cell + payloadTagTypes: + - Targets + parameters: + aAssemblerParameters: + minAlignmentScore: 71 + maxNormalizedAlignmentPenalty: 0.1 + positiveMinQualityShare: 0.14 + positiveMinIntersectionQuality: 239 + minRecordSharePerConsensus: 1.0 + minRecordsPerConsensus: 3 + align: + inheritFrom: align-single-cell + overrides: + trimmingQualityThreshold: 0 + parameters: + readsLayout: ReverseOnly + vParameters: + relativeMinScore: 0.7 + parameters: + mapperRelativeMinScore: 0.7 + relativeMinScore: 0.7 + refineTagsAndSort: + runCorrection: false + parameters: + correctionPower: 0.001 + backgroundSubstitutionRate: 0.001 + backgroundIndelRate: 1.0e-05 + minQuality: 12 + maxSubstitutions: 2 + maxIndels: 2 + maxTotalErrors: 3 + postFilter: null + assemble: + inheritFrom: assemble-with-consensus-sc-cell-level-fr + overrides: + clnaOutput: false + cloneAssemblerParameters: + badQualityThreshold: 0 + minimalQuality: 0 + cloneFactoryParameters: + vParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + relativeMinScore: 0.97 + scoring: null + jParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + relativeMinScore: + scoring: null + dParameters: + absoluteMinScore: + relativeMinScore: + maxHits: + scoring: null + cParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + relativeMinScore: + scoring: null + postFilters: null + exportClones: + inheritFrom: exportClones-single-cell-no-umi-base + overrides: + fields: + - field: "-cloneId" + - field: "-cellGroup" + - field: "-tags" + args: [ "Cell" ] + - field: "-readCount" + - field: "-readFraction" + - field: "-targetSequences" + - field: "-targetQualities" + - field: "-vHit" + - field: "-dHit" + - field: "-jHit" + - field: "-cHit" + - field: "-vAlignment" + - field: "-dAlignment" + - field: "-jAlignment" + - field: "-cAlignment" + - field: "-allNFeaturesWithMinQuality" + - field: "-nFeature" + args: [ "VDJRegion" ] + - field: "-aaFeature" + args: [ "VDJRegion" ] + - field: "-defaultAnchorPoints" + - field: "-topChains" \ No newline at end of file diff --git a/src/test/java/com/milaboratory/mixcr/assembler/CloneAssemblerRunnerTest.java b/src/test/java/com/milaboratory/mixcr/assembler/CloneAssemblerRunnerTest.java index db0bb1d2c..c75129cbb 100644 --- a/src/test/java/com/milaboratory/mixcr/assembler/CloneAssemblerRunnerTest.java +++ b/src/test/java/com/milaboratory/mixcr/assembler/CloneAssemblerRunnerTest.java @@ -21,9 +21,9 @@ import com.milaboratory.core.sequence.NucleotideSequence; import com.milaboratory.core.sequence.quality.QualityAggregationType; import com.milaboratory.core.tree.TreeSearchParameters; +import com.milaboratory.mitool.tag.TagsInfo; import com.milaboratory.mixcr.assembler.preclone.PreCloneReader; import com.milaboratory.mixcr.basictypes.*; -import com.milaboratory.mixcr.basictypes.tag.TagsInfo; import com.milaboratory.mixcr.tests.MiXCRTestUtils; import com.milaboratory.mixcr.vdjaligners.*; import com.milaboratory.util.GlobalObjectMappers; diff --git a/src/test/java/com/milaboratory/mixcr/basictypes/ClnAReaderTest.java b/src/test/java/com/milaboratory/mixcr/basictypes/ClnAReaderTest.java index ceb996db3..61f988708 100644 --- a/src/test/java/com/milaboratory/mixcr/basictypes/ClnAReaderTest.java +++ b/src/test/java/com/milaboratory/mixcr/basictypes/ClnAReaderTest.java @@ -16,11 +16,11 @@ import cc.redberry.pipe.blocks.FilteringPort; import cc.redberry.pipe.util.CountingOutputPort; import com.milaboratory.cli.AppVersionInfo; +import com.milaboratory.mitool.tag.TagsInfo; import com.milaboratory.mixcr.assembler.AlignmentsMappingMerger; import com.milaboratory.mixcr.assembler.CloneAssemblerParametersPresets; import com.milaboratory.mixcr.assembler.ReadToCloneMapping; import com.milaboratory.mixcr.assembler.preclone.PreCloneReader; -import com.milaboratory.mixcr.basictypes.tag.TagsInfo; import com.milaboratory.mixcr.presets.MiXCRParamsSpec; import com.milaboratory.mixcr.presets.MiXCRStepParams; import com.milaboratory.mixcr.util.MiXCRVersionInfo; diff --git a/src/test/java/com/milaboratory/mixcr/basictypes/VDJCObjectTest.java b/src/test/java/com/milaboratory/mixcr/basictypes/VDJCObjectTest.java index 6e7470966..a08114620 100644 --- a/src/test/java/com/milaboratory/mixcr/basictypes/VDJCObjectTest.java +++ b/src/test/java/com/milaboratory/mixcr/basictypes/VDJCObjectTest.java @@ -13,7 +13,7 @@ import com.milaboratory.core.sequence.NSequenceWithQuality; import com.milaboratory.core.sequence.NucleotideSequence; -import com.milaboratory.mixcr.basictypes.tag.TagsInfo; +import com.milaboratory.mitool.tag.TagsInfo; import com.milaboratory.mixcr.cli.CommandExportAlignmentsPretty; import com.milaboratory.mixcr.util.RunMiXCR; import io.repseq.core.GeneFeature; diff --git a/src/test/java/com/milaboratory/mixcr/basictypes/tag/TagCountTest.java b/src/test/java/com/milaboratory/mixcr/basictypes/tag/TagCountTest.java index e100bab57..73afc56b8 100644 --- a/src/test/java/com/milaboratory/mixcr/basictypes/tag/TagCountTest.java +++ b/src/test/java/com/milaboratory/mixcr/basictypes/tag/TagCountTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2022, MiLaboratories Inc. All Rights Reserved + * Copyright (c) 2014-2024, MiLaboratories Inc. All Rights Reserved * * Before downloading or accessing the software, please read carefully the * License Agreement available at: @@ -12,6 +12,9 @@ package com.milaboratory.mixcr.basictypes.tag; import com.milaboratory.core.sequence.NucleotideSequence; +import com.milaboratory.mitool.tag.SequenceTagValue; +import com.milaboratory.mitool.tag.TagTuple; +import com.milaboratory.mitool.tag.TagValue; import org.junit.Assert; import org.junit.Test; @@ -42,4 +45,4 @@ public void test1() { TagCount tc2 = tca2.createAndDestroy(); Assert.assertEquals(tc2, tc1.reduceToLevel(1)); } -} \ No newline at end of file +} diff --git a/src/test/java/com/milaboratory/mixcr/basictypes/tag/TagInfoTest.java b/src/test/java/com/milaboratory/mixcr/basictypes/tag/TagInfoTest.java index 0bcc9949a..23534cbf8 100644 --- a/src/test/java/com/milaboratory/mixcr/basictypes/tag/TagInfoTest.java +++ b/src/test/java/com/milaboratory/mixcr/basictypes/tag/TagInfoTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2022, MiLaboratories Inc. All Rights Reserved + * Copyright (c) 2014-2024, MiLaboratories Inc. All Rights Reserved * * Before downloading or accessing the software, please read carefully the * License Agreement available at: @@ -11,13 +11,19 @@ */ package com.milaboratory.mixcr.basictypes.tag; +import com.milaboratory.mitool.tag.TagInfo; +import com.milaboratory.mitool.tag.TagType; +import com.milaboratory.mitool.tag.TagValueType; +import com.milaboratory.mitool.tag.TagsInfo; import com.milaboratory.test.TestUtil; import org.junit.Test; +import java.util.Arrays; + public class TagInfoTest { @Test public void test1() { TestUtil.assertJson(new TagInfo(TagType.Sample, TagValueType.SequenceAndQuality, "TEST", 0)); - TestUtil.assertJson(new TagsInfo(0, new TagInfo(TagType.Sample, TagValueType.SequenceAndQuality, "TEST", 0))); + TestUtil.assertJson(new TagsInfo(0, Arrays.asList(new TagInfo(TagType.Sample, TagValueType.SequenceAndQuality, "TEST", 0)), false)); } -} \ No newline at end of file +} diff --git a/src/test/java/com/milaboratory/mixcr/basictypes/tag/TagsInfoTest.java b/src/test/java/com/milaboratory/mixcr/basictypes/tag/TagsInfoTest.java index b303a400e..72e2e3cf4 100644 --- a/src/test/java/com/milaboratory/mixcr/basictypes/tag/TagsInfoTest.java +++ b/src/test/java/com/milaboratory/mixcr/basictypes/tag/TagsInfoTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2022, MiLaboratories Inc. All Rights Reserved + * Copyright (c) 2014-2024, MiLaboratories Inc. All Rights Reserved * * Before downloading or accessing the software, please read carefully the * License Agreement available at: @@ -11,19 +11,26 @@ */ package com.milaboratory.mixcr.basictypes.tag; +import com.milaboratory.mitool.tag.TagInfo; +import com.milaboratory.mitool.tag.TagType; +import com.milaboratory.mitool.tag.TagValueType; +import com.milaboratory.mitool.tag.TagsInfo; import com.milaboratory.test.TestUtil; import org.junit.Test; -import static org.junit.Assert.*; +import java.util.Arrays; public class TagsInfoTest { @Test public void testJson() { TagsInfo ti = new TagsInfo(1, - new TagInfo(TagType.Sample, TagValueType.SequenceAndQuality,"SPL1", 0), - new TagInfo(TagType.Cell, TagValueType.SequenceAndQuality,"CELL", 1), - new TagInfo(TagType.Molecule, TagValueType.SequenceAndQuality,"UMI", 2) + Arrays.asList( + new TagInfo(TagType.Sample, TagValueType.SequenceAndQuality, "SPL1", 0), + new TagInfo(TagType.Cell, TagValueType.SequenceAndQuality, "CELL", 1), + new TagInfo(TagType.Molecule, TagValueType.SequenceAndQuality, "UMI", 2) + ), + false ); TestUtil.assertJson(ti); } -} \ No newline at end of file +} diff --git a/src/test/java/com/milaboratory/mixcr/postanalysis/ui/DownsamplingParametersTest.java b/src/test/java/com/milaboratory/mixcr/postanalysis/ui/DownsamplingParametersTest.java index b6f385b64..a7c38a243 100644 --- a/src/test/java/com/milaboratory/mixcr/postanalysis/ui/DownsamplingParametersTest.java +++ b/src/test/java/com/milaboratory/mixcr/postanalysis/ui/DownsamplingParametersTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2022, MiLaboratories Inc. All Rights Reserved + * Copyright (c) 2014-2024, MiLaboratories Inc. All Rights Reserved * * Before downloading or accessing the software, please read carefully the * License Agreement available at: @@ -12,18 +12,20 @@ package com.milaboratory.mixcr.postanalysis.ui; -import com.milaboratory.mixcr.basictypes.tag.TagInfo; -import com.milaboratory.mixcr.basictypes.tag.TagType; -import com.milaboratory.mixcr.basictypes.tag.TagValueType; -import com.milaboratory.mixcr.basictypes.tag.TagsInfo; +import com.milaboratory.mitool.tag.TagInfo; +import com.milaboratory.mitool.tag.TagType; +import com.milaboratory.mitool.tag.TagValueType; +import com.milaboratory.mitool.tag.TagsInfo; import org.junit.Test; +import java.util.Arrays; + public class DownsamplingParametersTest { @Test public void test1() { DownsamplingParameters.parse( "count-umi-auto", - new TagsInfo(2, new TagInfo(TagType.Molecule, TagValueType.NonSequence, "UMI", 0)), + new TagsInfo(2, Arrays.asList(new TagInfo(TagType.Molecule, TagValueType.NonSequence, "UMI", 0)), false), false, true ); @@ -48,4 +50,4 @@ public void test3() { true ); } -} \ No newline at end of file +} diff --git a/src/test/java/com/milaboratory/mixcr/tests/MiXCRTestUtils.java b/src/test/java/com/milaboratory/mixcr/tests/MiXCRTestUtils.java index 070b7feea..9580bf519 100644 --- a/src/test/java/com/milaboratory/mixcr/tests/MiXCRTestUtils.java +++ b/src/test/java/com/milaboratory/mixcr/tests/MiXCRTestUtils.java @@ -16,9 +16,9 @@ import com.milaboratory.core.sequence.NSequenceWithQuality; import com.milaboratory.core.sequence.NucleotideSequence; import com.milaboratory.mitool.data.CriticalThresholdCollection; +import com.milaboratory.mitool.tag.TagsInfo; import com.milaboratory.mixcr.basictypes.*; import com.milaboratory.mixcr.basictypes.tag.TagCount; -import com.milaboratory.mixcr.basictypes.tag.TagsInfo; import com.milaboratory.mixcr.partialassembler.AlignedTarget; import com.milaboratory.mixcr.partialassembler.VDJCMultiRead; import com.milaboratory.mixcr.presets.MiXCRParamsSpec; diff --git a/src/test/kotlin/com/milaboratory/mixcr/MixinsTest.kt b/src/test/kotlin/com/milaboratory/mixcr/MixinsTest.kt index 0fe8da163..554002181 100644 --- a/src/test/kotlin/com/milaboratory/mixcr/MixinsTest.kt +++ b/src/test/kotlin/com/milaboratory/mixcr/MixinsTest.kt @@ -1,16 +1,16 @@ package com.milaboratory.mixcr import com.milaboratory.core.sequence.NSequenceWithQuality -import com.milaboratory.mixcr.basictypes.tag.SequenceAndQualityTagValue -import com.milaboratory.mixcr.basictypes.tag.StringTagValue -import com.milaboratory.mixcr.basictypes.tag.TagInfo -import com.milaboratory.mixcr.basictypes.tag.TagType.Cell -import com.milaboratory.mixcr.basictypes.tag.TagType.Sample -import com.milaboratory.mixcr.basictypes.tag.TagType.Technical -import com.milaboratory.mixcr.basictypes.tag.TagValueType.NonSequence -import com.milaboratory.mixcr.basictypes.tag.TagValueType.SequenceAndQuality -import com.milaboratory.mixcr.basictypes.tag.TagsInfo -import com.milaboratory.mixcr.basictypes.tag.TechnicalTag +import com.milaboratory.mitool.tag.SequenceAndQualityTagValue +import com.milaboratory.mitool.tag.StringTagValue +import com.milaboratory.mitool.tag.TagType.Cell +import com.milaboratory.mitool.tag.TagType.Sample +import com.milaboratory.mitool.tag.TagType.Technical +import com.milaboratory.mitool.tag.TagValueType.NonSequence +import com.milaboratory.mitool.tag.TagValueType.SequenceAndQuality +import com.milaboratory.mitool.tag.TechnicalTag +import com.milaboratory.mitool.tag.TagInfo +import com.milaboratory.mitool.tag.TagsInfo import com.milaboratory.mixcr.presets.AlignMixins import com.milaboratory.test.TestUtil import com.milaboratory.util.K_YAML_OM diff --git a/src/test/kotlin/com/milaboratory/mixcr/PresetsTest.kt b/src/test/kotlin/com/milaboratory/mixcr/PresetsTest.kt index d6e419129..1e3ecbbe4 100644 --- a/src/test/kotlin/com/milaboratory/mixcr/PresetsTest.kt +++ b/src/test/kotlin/com/milaboratory/mixcr/PresetsTest.kt @@ -3,10 +3,10 @@ package com.milaboratory.mixcr import com.fasterxml.jackson.module.kotlin.readValue import com.milaboratory.cli.ParamsBundleSpecBaseAddress import com.milaboratory.cli.ParamsBundleSpecBaseEmbedded -import com.milaboratory.mixcr.basictypes.tag.TagInfo -import com.milaboratory.mixcr.basictypes.tag.TagType -import com.milaboratory.mixcr.basictypes.tag.TagValueType -import com.milaboratory.mixcr.basictypes.tag.TagsInfo +import com.milaboratory.mitool.tag.TagType +import com.milaboratory.mitool.tag.TagValueType +import com.milaboratory.mitool.tag.TagInfo +import com.milaboratory.mitool.tag.TagsInfo import com.milaboratory.mixcr.cli.CommandAlignParams import com.milaboratory.mixcr.cli.allClonesWillBeCoveredByFeature import com.milaboratory.mixcr.cli.presetFlagsMessages diff --git a/src/test/kotlin/com/milaboratory/mixcr/assembler/fullseq/FullSeqAssemblerTest.kt b/src/test/kotlin/com/milaboratory/mixcr/assembler/fullseq/FullSeqAssemblerTest.kt index 7c36b65ef..2432b625f 100644 --- a/src/test/kotlin/com/milaboratory/mixcr/assembler/fullseq/FullSeqAssemblerTest.kt +++ b/src/test/kotlin/com/milaboratory/mixcr/assembler/fullseq/FullSeqAssemblerTest.kt @@ -21,11 +21,11 @@ import com.milaboratory.core.sequence.NSequenceWithQuality import com.milaboratory.core.sequence.NucleotideSequence import com.milaboratory.core.sequence.SequenceQuality import com.milaboratory.core.sequence.quality.QualityTrimmerParameters +import com.milaboratory.mitool.tag.TagsInfo import com.milaboratory.mixcr.assembler.CloneFactory import com.milaboratory.mixcr.basictypes.Clone import com.milaboratory.mixcr.basictypes.MultiAlignmentHelper import com.milaboratory.mixcr.basictypes.VDJCAlignments -import com.milaboratory.mixcr.basictypes.tag.TagsInfo import com.milaboratory.mixcr.cli.CommandExportClonesPretty.Companion.outputCompact import com.milaboratory.mixcr.util.RunMiXCR import com.milaboratory.mixcr.util.RunMiXCR.RunMiXCRAnalysis diff --git a/src/test/kotlin/com/milaboratory/mixcr/export/FieldExtractorsTest.kt b/src/test/kotlin/com/milaboratory/mixcr/export/FieldExtractorsTest.kt index b63cb68d3..7800b317c 100644 --- a/src/test/kotlin/com/milaboratory/mixcr/export/FieldExtractorsTest.kt +++ b/src/test/kotlin/com/milaboratory/mixcr/export/FieldExtractorsTest.kt @@ -11,7 +11,7 @@ */ package com.milaboratory.mixcr.export -import com.milaboratory.mixcr.basictypes.tag.TagsInfo +import com.milaboratory.mitool.tag.TagsInfo import com.milaboratory.mixcr.partialassembler.PartialAlignmentsAssemblerAligner import com.milaboratory.mixcr.tests.MiXCRTestUtils import com.milaboratory.mixcr.tests.TargetBuilder