diff --git a/packages/apollo-cli/package.json b/packages/apollo-cli/package.json index 54d9e1078..e1af85fb9 100644 --- a/packages/apollo-cli/package.json +++ b/packages/apollo-cli/package.json @@ -85,7 +85,7 @@ "@istanbuljs/esm-loader-hook": "^0.2.0", "@istanbuljs/nyc-config-typescript": "^1.0.2", "@oclif/test": "^3.1.3", - "@types/chai": "^4", + "@types/chai": "^4.3.19", "@types/cli-progress": "^3", "@types/inquirer": "^9.0.7", "@types/mocha": "^10", diff --git a/packages/apollo-shared/package.json b/packages/apollo-shared/package.json index 22934e8f8..1b91a530f 100644 --- a/packages/apollo-shared/package.json +++ b/packages/apollo-shared/package.json @@ -26,8 +26,11 @@ "devDependencies": { "@nestjs/common": "^10.1.0", "@nestjs/core": "^10.1.0", + "@types/chai": "^4.3.19", "@types/node": "^18.14.2", "@types/rimraf": "^3", + "chai": "^5.1.1", + "chai-exclude": "^3.0.0", "glob": "^11.0.0", "mobx": "^6.6.1", "mobx-state-tree": "^5.1.7", diff --git a/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts b/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts index 6a412af5d..b6abc8f0a 100644 --- a/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts +++ b/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts @@ -1,11 +1,16 @@ /* eslint-disable @typescript-eslint/no-floating-promises */ -import { strict as assert } from 'node:assert' import { describe, it } from 'node:test' -import gff from '@gmod/gff' +import { readFileSync } from 'node:fs' + +import gff, { GFF3Feature } from '@gmod/gff' +import { assert, use } from 'chai' +import chaiExclude from 'chai-exclude' import { gff3ToAnnotationFeature } from './gff3ToAnnotationFeature' import { AnnotationFeatureSnapshot } from '@apollo-annotation/mst' +use(chaiExclude) + const testCases: [string, string, AnnotationFeatureSnapshot][] = [ [ 'a feature with no children', @@ -23,18 +28,173 @@ const testCases: [string, string, AnnotationFeatureSnapshot][] = [ }, }, ], + [ + 'a feature with two children', + `ctgA est EST_match 1050 3202 . + . ID=Match1;Name=agt830.5;Target=agt830.5 1 654 +ctgA est match_part 1050 1500 . + . Parent=Match1;Name=agt830.5;Target=agt830.5 1 451 +ctgA est match_part 3000 3202 . + . Parent=Match1;Name=agt830.5;Target=agt830.5 452 654 +`, + { + _id: '66cf9fbb4e947fa2c27d3d6a', + refSeq: 'ctgA', + type: 'EST_match', + min: 1049, + max: 3202, + strand: 1, + children: { + '66cf9fbb4e947fa2c27d3d68': { + _id: '66cf9fbb4e947fa2c27d3d68', + refSeq: 'ctgA', + type: 'match_part', + min: 1049, + max: 1500, + strand: 1, + attributes: { + gff_source: ['est'], + gff_name: ['agt830.5'], + gff_target: ['agt830.5 1 451'], + }, + }, + '66cf9fbb4e947fa2c27d3d69': { + _id: '66cf9fbb4e947fa2c27d3d69', + refSeq: 'ctgA', + type: 'match_part', + min: 2999, + max: 3202, + strand: 1, + attributes: { + gff_source: ['est'], + gff_name: ['agt830.5'], + gff_target: ['agt830.5 452 654'], + }, + }, + }, + attributes: { + gff_source: ['est'], + gff_id: ['Match1'], + gff_name: ['agt830.5'], + gff_target: ['agt830.5 1 654'], + }, + }, + ], ] +interface AnnotationFeatureSnapshotWithChildrenArray + extends Omit { + children?: AnnotationFeatureSnapshotWithChildrenArray[] +} + +function childrenToArray( + feature: AnnotationFeatureSnapshot, +): AnnotationFeatureSnapshotWithChildrenArray { + const { children } = feature + if (!children) { + return feature as AnnotationFeatureSnapshotWithChildrenArray + } + const childrenArray = Object.values(children).map((child) => + childrenToArray(child), + ) + return { ...feature, children: childrenArray } +} + function compareFeatures( feature1: AnnotationFeatureSnapshot, feature2: AnnotationFeatureSnapshot, ) { - assert.deepEqual( - { ...feature1, _id: undefined }, - { ...feature2, _id: undefined }, + assert.deepEqualExcludingEvery( + childrenToArray(feature1), + childrenToArray(feature2), + '_id', ) } +function readFeatureFile(fn: string): GFF3Feature[] { + const lines = readFileSync(fn).toString().split('\n') + const feature: string[] = [] + for (const line of lines) { + if (!line.startsWith('#')) { + feature.push(line) + } + } + const inGff = gff.parseStringSync(feature.join('\n')) as GFF3Feature[] + return inGff +} + +function readAnnotationFeatureSnapshot(fn: string): AnnotationFeatureSnapshot { + const lines = readFileSync(fn).toString() + return JSON.parse(lines) as AnnotationFeatureSnapshot +} + +const [ex1, ex2, ex3, ex4] = readFeatureFile( + 'test_data/gene_representations.gff3', +) + +describe('gff3ToAnnotationFeature examples', () => { + it('Convert one CDS', () => { + const actual = gff3ToAnnotationFeature( + readFeatureFile('test_data/one_cds.gff3')[0], + ) + const expected = readAnnotationFeatureSnapshot('test_data/one_cds.json') + compareFeatures(actual, expected) + }) + it('Convert two CDSs', () => { + const actual = gff3ToAnnotationFeature( + readFeatureFile('test_data/two_cds.gff3')[0], + ) + const expected = readAnnotationFeatureSnapshot('test_data/two_cds.json') + compareFeatures(actual, expected) + }) + it('Convert example 1', () => { + const actual = gff3ToAnnotationFeature(ex1) + const txt = JSON.stringify(actual, null, 2) + + assert.equal(txt.match(/"type": "CDS"/g)?.length, 4) + assert.equal(txt.match(/"type": "TF_binding_site"/g)?.length, 1) + + const expected = readAnnotationFeatureSnapshot('test_data/example01.json') + compareFeatures(actual, expected) + }) + it('Convert example 2', () => { + const actual = gff3ToAnnotationFeature(ex2) + const txt = JSON.stringify(actual, null, 2) + assert.equal(txt.match(/"type": "CDS"/g)?.length, 4) + const expected = readAnnotationFeatureSnapshot('test_data/example02.json') + compareFeatures(actual, expected) + }) + it('Convert example 3', () => { + // NB: In example 3 (and in the other examples) mRNA10003 produces two proteins. + // In the other examples the two proteins are identified by sharing the same cds id. + // In example 3 instead each cds has a unique id so the two proteins are identified by the order they + // appear in the gff. + const actual = gff3ToAnnotationFeature(ex3) + const txt = JSON.stringify(actual, null, 2) + assert.equal(txt.match(/"type": "CDS"/g)?.length, 4) + + // const expected = readAnnotationFeatureSnapshot('test_data/example03.json') + // compareFeatures(actual, expected) + }) + it('Convert example 4', () => { + const ft = JSON.stringify(ex4, null, 2) + assert.equal(ft.match(/"type": "five_prime_UTR"/g)?.length, 6) + assert.equal(ft.match(/"type": "three_prime_UTR"/g)?.length, 3) + + const actual = gff3ToAnnotationFeature(ex4) + const txt = JSON.stringify(actual, null, 2) + assert.equal(txt.match(/"type": "CDS"/g)?.length, 4) + assert.equal(txt.match(/prime_UTR/g), null) + + const expected = readAnnotationFeatureSnapshot('test_data/example04.json') + compareFeatures(actual, expected) + }) + it('Convert braker gff', () => { + const [gffFeature] = readFeatureFile('test_data/braker.gff') + const actual = gff3ToAnnotationFeature(gffFeature) + const txt = JSON.stringify(actual, null, 2) + assert.equal(txt.match(/intron/g), null) + assert.equal(txt.match(/_codon/g), null) + }) +}) + describe('gff3ToAnnotationFeature', () => { for (const testCase of testCases) { const [description, featureLine, convertedFeature] = testCase diff --git a/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.ts b/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.ts index c07c1d31b..cd4c368d9 100644 --- a/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.ts +++ b/packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.ts @@ -158,7 +158,10 @@ function convertChildren( const [firstChildFeatureLocation] = childFeature if ( firstChildFeatureLocation.type === 'three_prime_UTR' || - firstChildFeatureLocation.type === 'five_prime_UTR' + firstChildFeatureLocation.type === 'five_prime_UTR' || + firstChildFeatureLocation.type === 'intron' || + firstChildFeatureLocation.type === 'start_codon' || + firstChildFeatureLocation.type === 'stop_codon' ) { continue } @@ -232,20 +235,17 @@ function processCDS( groupedLocations.push([location]) continue } - const lastGroupLastLocation = lastGroup.at(-1) - if (!lastGroupLastLocation) { - throw new Error('Got group with no locations') - } - if ( + const overlaps = lastGroup.some((lastGroupLoc) => doesIntersect2( /* eslint-disable @typescript-eslint/no-non-null-assertion */ - lastGroupLastLocation.start!, - lastGroupLastLocation.end!, + lastGroupLoc.start!, + lastGroupLoc.end!, location.start!, location.end!, /* eslint-enable @typescript-eslint/no-non-null-assertion */ - ) - ) { + ), + ) + if (overlaps) { groupedLocations.push([location]) } else { lastGroup.push(location) diff --git a/packages/apollo-shared/test_data/braker.gff b/packages/apollo-shared/test_data/braker.gff new file mode 100644 index 000000000..27feccfcd --- /dev/null +++ b/packages/apollo-shared/test_data/braker.gff @@ -0,0 +1,13 @@ +##gff-version 3 +CM033580.1 AUGUSTUS gene 15529 16566 0.92 - . ID=g1; +CM033580.1 AUGUSTUS mRNA 15529 16566 0.92 - . ID=g1.t1;Parent=g1; +CM033580.1 AUGUSTUS stop_codon 15529 15531 . - 0 ID=g1.t1.stop1;Parent=g1.t1; +CM033580.1 AUGUSTUS CDS 15529 15659 0.92 - 2 ID=g1.t1.CDS1;Parent=g1.t1; +CM033580.1 AUGUSTUS exon 15529 15659 . - . ID=g1.t1.exon1;Parent=g1.t1; +CM033580.1 AUGUSTUS intron 15660 16112 0.96 - . ID=g1.t1.intron1;Parent=g1.t1; +CM033580.1 AUGUSTUS CDS 16113 16314 0.96 - 0 ID=g1.t1.CDS2;Parent=g1.t1; +CM033580.1 AUGUSTUS exon 16113 16314 . - . ID=g1.t1.exon2;Parent=g1.t1; +CM033580.1 AUGUSTUS intron 16315 16536 0.96 - . ID=g1.t1.intron2;Parent=g1.t1; +CM033580.1 AUGUSTUS CDS 16537 16566 0.99 - 0 ID=g1.t1.CDS3;Parent=g1.t1; +CM033580.1 AUGUSTUS exon 16537 16566 . - . ID=g1.t1.exon3;Parent=g1.t1; +CM033580.1 AUGUSTUS start_codon 16564 16566 . - 0 ID=g1.t1.start1;Parent=g1.t1; diff --git a/packages/apollo-shared/test_data/example01.gff3 b/packages/apollo-shared/test_data/example01.gff3 new file mode 100644 index 000000000..bdedf2aa0 --- /dev/null +++ b/packages/apollo-shared/test_data/example01.gff3 @@ -0,0 +1,26 @@ +##gff-version 3 +##sequence-region chr1 1000 9000 +#example01 +chr1 . gene 1000 9000 . + . ID=gene10001;Name=EDEN +chr1 . TF_binding_site 1000 1012 . + . ID=tfbs10001;Parent=gene10001 +chr1 . mRNA 1050 9000 . + . ID=mRNA10001;Parent=gene10001;Name=EDEN.1 +chr1 . mRNA 1050 9000 . + . ID=mRNA10002;Parent=gene10001;Name=EDEN.2 +chr1 . mRNA 1300 9000 . + . ID=mRNA10003;Parent=gene10001;Name=EDEN.3 +chr1 . exon 1050 1500 . + . ID=exon10001;Parent=mRNA10001,mRNA10002 +chr1 . exon 1300 1500 . + . ID=exon10002;Parent=mRNA10003 +chr1 . exon 3000 3902 . + . ID=exon10003;Parent=mRNA10001,mRNA10003 +chr1 . exon 5000 5500 . + . ID=exon10004;Parent=mRNA10001,mRNA10002,mRNA10003 +chr1 . exon 7000 9000 . + . ID=exon10005;Parent=mRNA10001,mRNA10002,mRNA10003 +chr1 . CDS 1201 1500 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1 +chr1 . CDS 3000 3902 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1 +chr1 . CDS 5000 5500 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1 +chr1 . CDS 7000 7600 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1 +chr1 . CDS 1201 1500 . + 0 ID=cds10002;Parent=mRNA10002;Name=edenprotein.2 +chr1 . CDS 5000 5500 . + 0 ID=cds10002;Parent=mRNA10002;Name=edenprotein.2 +chr1 . CDS 7000 7600 . + 0 ID=cds10002;Parent=mRNA10002;Name=edenprotein.2 +chr1 . CDS 3301 3902 . + 0 ID=cds10003;Parent=mRNA10003;Name=edenprotein.3 +chr1 . CDS 5000 5500 . + 1 ID=cds10003;Parent=mRNA10003;Name=edenprotein.3 +chr1 . CDS 7000 7600 . + 1 ID=cds10003;Parent=mRNA10003;Name=edenprotein.3 +chr1 . CDS 3391 3902 . + 0 ID=cds10004;Parent=mRNA10003;Name=edenprotein.4 +chr1 . CDS 5000 5500 . + 1 ID=cds10004;Parent=mRNA10003;Name=edenprotein.4 +chr1 . CDS 7000 7600 . + 1 ID=cds10004;Parent=mRNA10003;Name=edenprotein.4 diff --git a/packages/apollo-shared/test_data/example01.json b/packages/apollo-shared/test_data/example01.json new file mode 100644 index 000000000..161b7a135 --- /dev/null +++ b/packages/apollo-shared/test_data/example01.json @@ -0,0 +1,236 @@ +{ + "_id": "66e049f17b9cedae9ad89108", + "refSeq": "chr1", + "type": "gene", + "min": 999, + "max": 9000, + "strand": 1, + "children": { + "66e049f17b9cedae9ad890f5": { + "_id": "66e049f17b9cedae9ad890f5", + "refSeq": "chr1", + "type": "TF_binding_site", + "min": 999, + "max": 1012, + "strand": 1, + "attributes": { + "gff_id": ["tfbs10001"] + } + }, + "66e049f17b9cedae9ad890fb": { + "_id": "66e049f17b9cedae9ad890fb", + "refSeq": "chr1", + "type": "mRNA", + "min": 1049, + "max": 9000, + "strand": 1, + "children": { + "66e049f17b9cedae9ad890f6": { + "_id": "66e049f17b9cedae9ad890f6", + "refSeq": "chr1", + "type": "exon", + "min": 1049, + "max": 1500, + "strand": 1, + "attributes": { + "gff_id": ["exon10002"] + } + }, + "66e049f17b9cedae9ad890f7": { + "_id": "66e049f17b9cedae9ad890f7", + "refSeq": "chr1", + "type": "exon", + "min": 2999, + "max": 3902, + "strand": 1, + "attributes": { + "gff_id": ["exon10003"] + } + }, + "66e049f17b9cedae9ad890f8": { + "_id": "66e049f17b9cedae9ad890f8", + "refSeq": "chr1", + "type": "exon", + "min": 4999, + "max": 5500, + "strand": 1, + "attributes": { + "gff_id": ["exon10004"] + } + }, + "66e049f17b9cedae9ad890f9": { + "_id": "66e049f17b9cedae9ad890f9", + "refSeq": "chr1", + "type": "exon", + "min": 6999, + "max": 9000, + "strand": 1, + "attributes": { + "gff_id": ["exon10005"] + } + }, + "66e049f17b9cedae9ad890fa": { + "_id": "66e049f17b9cedae9ad890fa", + "refSeq": "chr1", + "type": "CDS", + "min": 1200, + "max": 7600, + "strand": 1, + "attributes": { + "gff_id": ["cds10001"], + "gff_name": ["edenprotein.1"] + } + } + }, + "attributes": { + "gff_id": ["mRNA10001"], + "gff_name": ["EDEN.1"] + } + }, + "66e049f17b9cedae9ad89100": { + "_id": "66e049f17b9cedae9ad89100", + "refSeq": "chr1", + "type": "mRNA", + "min": 1049, + "max": 9000, + "strand": 1, + "children": { + "66e049f17b9cedae9ad890fc": { + "_id": "66e049f17b9cedae9ad890fc", + "refSeq": "chr1", + "type": "exon", + "min": 1049, + "max": 1500, + "strand": 1, + "attributes": { + "gff_id": ["exon10002"] + } + }, + "66e049f17b9cedae9ad890fd": { + "_id": "66e049f17b9cedae9ad890fd", + "refSeq": "chr1", + "type": "exon", + "min": 4999, + "max": 5500, + "strand": 1, + "attributes": { + "gff_id": ["exon10004"] + } + }, + "66e049f17b9cedae9ad890fe": { + "_id": "66e049f17b9cedae9ad890fe", + "refSeq": "chr1", + "type": "exon", + "min": 6999, + "max": 9000, + "strand": 1, + "attributes": { + "gff_id": ["exon10005"] + } + }, + "66e049f17b9cedae9ad890ff": { + "_id": "66e049f17b9cedae9ad890ff", + "refSeq": "chr1", + "type": "CDS", + "min": 1200, + "max": 7600, + "strand": 1, + "attributes": { + "gff_id": ["cds10002"], + "gff_name": ["edenprotein.2"] + } + } + }, + "attributes": { + "gff_id": ["mRNA10002"], + "gff_name": ["EDEN.2"] + } + }, + "66e049f17b9cedae9ad89107": { + "_id": "66e049f17b9cedae9ad89107", + "refSeq": "chr1", + "type": "mRNA", + "min": 1299, + "max": 9000, + "strand": 1, + "children": { + "66e049f17b9cedae9ad89101": { + "_id": "66e049f17b9cedae9ad89101", + "refSeq": "chr1", + "type": "exon", + "min": 1299, + "max": 1500, + "strand": 1, + "attributes": { + "gff_id": ["exon10001"] + } + }, + "66e049f17b9cedae9ad89102": { + "_id": "66e049f17b9cedae9ad89102", + "refSeq": "chr1", + "type": "exon", + "min": 2999, + "max": 3902, + "strand": 1, + "attributes": { + "gff_id": ["exon10003"] + } + }, + "66e049f17b9cedae9ad89103": { + "_id": "66e049f17b9cedae9ad89103", + "refSeq": "chr1", + "type": "exon", + "min": 4999, + "max": 5500, + "strand": 1, + "attributes": { + "gff_id": ["exon10004"] + } + }, + "66e049f17b9cedae9ad89104": { + "_id": "66e049f17b9cedae9ad89104", + "refSeq": "chr1", + "type": "exon", + "min": 6999, + "max": 9000, + "strand": 1, + "attributes": { + "gff_id": ["exon10005"] + } + }, + "66e049f17b9cedae9ad89105": { + "_id": "66e049f17b9cedae9ad89105", + "refSeq": "chr1", + "type": "CDS", + "min": 3300, + "max": 7600, + "strand": 1, + "attributes": { + "gff_id": ["cds10003"], + "gff_name": ["edenprotein.3"] + } + }, + "66e049f17b9cedae9ad89106": { + "_id": "66e049f17b9cedae9ad89106", + "refSeq": "chr1", + "type": "CDS", + "min": 3390, + "max": 7600, + "strand": 1, + "attributes": { + "gff_id": ["cds10004"], + "gff_name": ["edenprotein.4"] + } + } + }, + "attributes": { + "gff_id": ["mRNA10003"], + "gff_name": ["EDEN.3"] + } + } + }, + "attributes": { + "gff_id": ["gene10001"], + "gff_name": ["EDEN"] + } +} diff --git a/packages/apollo-shared/test_data/example02.json b/packages/apollo-shared/test_data/example02.json new file mode 100644 index 000000000..089705a3e --- /dev/null +++ b/packages/apollo-shared/test_data/example02.json @@ -0,0 +1,225 @@ +{ + "_id": "66e049609048deab4117a33e", + "refSeq": "chr1", + "type": "gene", + "min": 10999, + "max": 19000, + "strand": 1, + "children": { + "66e049609048deab4117a331": { + "_id": "66e049609048deab4117a331", + "refSeq": "chr1", + "type": "mRNA", + "min": 11049, + "max": 19000, + "strand": 1, + "children": { + "66e049609048deab4117a32c": { + "_id": "66e049609048deab4117a32c", + "refSeq": "chr1", + "type": "exon", + "min": 11049, + "max": 11500, + "strand": 1, + "attributes": { + "gff_id": ["exon20001"] + } + }, + "66e049609048deab4117a32d": { + "_id": "66e049609048deab4117a32d", + "refSeq": "chr1", + "type": "exon", + "min": 12999, + "max": 13902, + "strand": 1, + "attributes": { + "gff_id": ["exon20004"] + } + }, + "66e049609048deab4117a32e": { + "_id": "66e049609048deab4117a32e", + "refSeq": "chr1", + "type": "exon", + "min": 14999, + "max": 15500, + "strand": 1, + "attributes": { + "gff_id": ["exon20006"] + } + }, + "66e049609048deab4117a32f": { + "_id": "66e049609048deab4117a32f", + "refSeq": "chr1", + "type": "exon", + "min": 16999, + "max": 19000, + "strand": 1, + "attributes": { + "gff_id": ["exon20009"] + } + }, + "66e049609048deab4117a330": { + "_id": "66e049609048deab4117a330", + "refSeq": "chr1", + "type": "CDS", + "min": 11200, + "max": 17600, + "strand": 1, + "attributes": { + "gff_id": ["cds20001"], + "gff_name": ["edenprotein.1"] + } + } + }, + "attributes": { + "gff_id": ["mRNA20001"], + "gff_name": ["EDEN.1"] + } + }, + "66e049609048deab4117a336": { + "_id": "66e049609048deab4117a336", + "refSeq": "chr1", + "type": "mRNA", + "min": 11049, + "max": 19000, + "strand": 1, + "children": { + "66e049609048deab4117a332": { + "_id": "66e049609048deab4117a332", + "refSeq": "chr1", + "type": "exon", + "min": 11049, + "max": 11500, + "strand": 1, + "attributes": { + "gff_id": ["exon20002"] + } + }, + "66e049609048deab4117a333": { + "_id": "66e049609048deab4117a333", + "refSeq": "chr1", + "type": "exon", + "min": 14999, + "max": 15500, + "strand": 1, + "attributes": { + "gff_id": ["exon20007"] + } + }, + "66e049609048deab4117a334": { + "_id": "66e049609048deab4117a334", + "refSeq": "chr1", + "type": "exon", + "min": 16999, + "max": 19000, + "strand": 1, + "attributes": { + "gff_id": ["exon20010"] + } + }, + "66e049609048deab4117a335": { + "_id": "66e049609048deab4117a335", + "refSeq": "chr1", + "type": "CDS", + "min": 11200, + "max": 17600, + "strand": 1, + "attributes": { + "gff_id": ["cds20002"], + "gff_name": ["edenprotein.2"] + } + } + }, + "attributes": { + "gff_id": ["mRNA20002"], + "gff_name": ["EDEN.2"] + } + }, + "66e049609048deab4117a33d": { + "_id": "66e049609048deab4117a33d", + "refSeq": "chr1", + "type": "mRNA", + "min": 11299, + "max": 19000, + "strand": 1, + "children": { + "66e049609048deab4117a337": { + "_id": "66e049609048deab4117a337", + "refSeq": "chr1", + "type": "exon", + "min": 11299, + "max": 11500, + "strand": 1, + "attributes": { + "gff_id": ["exon20003"] + } + }, + "66e049609048deab4117a338": { + "_id": "66e049609048deab4117a338", + "refSeq": "chr1", + "type": "exon", + "min": 12999, + "max": 13902, + "strand": 1, + "attributes": { + "gff_id": ["exon20005"] + } + }, + "66e049609048deab4117a339": { + "_id": "66e049609048deab4117a339", + "refSeq": "chr1", + "type": "exon", + "min": 14999, + "max": 15500, + "strand": 1, + "attributes": { + "gff_id": ["exon20008"] + } + }, + "66e049609048deab4117a33a": { + "_id": "66e049609048deab4117a33a", + "refSeq": "chr1", + "type": "exon", + "min": 16999, + "max": 19000, + "strand": 1, + "attributes": { + "gff_id": ["exon20011"] + } + }, + "66e049609048deab4117a33b": { + "_id": "66e049609048deab4117a33b", + "refSeq": "chr1", + "type": "CDS", + "min": 13300, + "max": 17600, + "strand": 1, + "attributes": { + "gff_id": ["cds20003"], + "gff_name": ["edenprotein.3"] + } + }, + "66e049609048deab4117a33c": { + "_id": "66e049609048deab4117a33c", + "refSeq": "chr1", + "type": "CDS", + "min": 13390, + "max": 17600, + "strand": 1, + "attributes": { + "gff_id": ["cds20004"], + "gff_name": ["edenprotein.4"] + } + } + }, + "attributes": { + "gff_id": ["mRNA20003"], + "gff_name": ["EDEN.3"] + } + } + }, + "attributes": { + "gff_id": ["gene20001"], + "gff_name": ["EDEN"] + } +} diff --git a/packages/apollo-shared/test_data/example04.json b/packages/apollo-shared/test_data/example04.json new file mode 100644 index 000000000..83d8928d6 --- /dev/null +++ b/packages/apollo-shared/test_data/example04.json @@ -0,0 +1,225 @@ +{ + "_id": "66e0555fae0bd7cfcd69912d", + "refSeq": "chr1", + "type": "gene", + "min": 30999, + "max": 39000, + "strand": 1, + "children": { + "66e0555fae0bd7cfcd699120": { + "_id": "66e0555fae0bd7cfcd699120", + "refSeq": "chr1", + "type": "mRNA", + "min": 31049, + "max": 39000, + "strand": 1, + "children": { + "66e0555fae0bd7cfcd69911b": { + "_id": "66e0555fae0bd7cfcd69911b", + "refSeq": "chr1", + "type": "exon", + "min": 31049, + "max": 31500, + "strand": 1, + "attributes": { + "gff_id": ["exon40001"] + } + }, + "66e0555fae0bd7cfcd69911c": { + "_id": "66e0555fae0bd7cfcd69911c", + "refSeq": "chr1", + "type": "exon", + "min": 32999, + "max": 33902, + "strand": 1, + "attributes": { + "gff_id": ["exon40003"] + } + }, + "66e0555fae0bd7cfcd69911d": { + "_id": "66e0555fae0bd7cfcd69911d", + "refSeq": "chr1", + "type": "exon", + "min": 34999, + "max": 35500, + "strand": 1, + "attributes": { + "gff_id": ["exon40004"] + } + }, + "66e0555fae0bd7cfcd69911e": { + "_id": "66e0555fae0bd7cfcd69911e", + "refSeq": "chr1", + "type": "exon", + "min": 36999, + "max": 39000, + "strand": 1, + "attributes": { + "gff_id": ["exon40005"] + } + }, + "66e0555fae0bd7cfcd69911f": { + "_id": "66e0555fae0bd7cfcd69911f", + "refSeq": "chr1", + "type": "CDS", + "min": 31200, + "max": 37600, + "strand": 1, + "attributes": { + "gff_id": ["cds40001"], + "gff_name": ["edenprotein.1"] + } + } + }, + "attributes": { + "gff_id": ["mRNA40001"], + "gff_name": ["EDEN.1"] + } + }, + "66e0555fae0bd7cfcd699125": { + "_id": "66e0555fae0bd7cfcd699125", + "refSeq": "chr1", + "type": "mRNA", + "min": 31049, + "max": 39000, + "strand": 1, + "children": { + "66e0555fae0bd7cfcd699121": { + "_id": "66e0555fae0bd7cfcd699121", + "refSeq": "chr1", + "type": "exon", + "min": 31049, + "max": 31500, + "strand": 1, + "attributes": { + "gff_id": ["exon40001"] + } + }, + "66e0555fae0bd7cfcd699122": { + "_id": "66e0555fae0bd7cfcd699122", + "refSeq": "chr1", + "type": "exon", + "min": 34999, + "max": 35500, + "strand": 1, + "attributes": { + "gff_id": ["exon40004"] + } + }, + "66e0555fae0bd7cfcd699123": { + "_id": "66e0555fae0bd7cfcd699123", + "refSeq": "chr1", + "type": "exon", + "min": 36999, + "max": 39000, + "strand": 1, + "attributes": { + "gff_id": ["exon40005"] + } + }, + "66e0555fae0bd7cfcd699124": { + "_id": "66e0555fae0bd7cfcd699124", + "refSeq": "chr1", + "type": "CDS", + "min": 31200, + "max": 37600, + "strand": 1, + "attributes": { + "gff_id": ["cds40002"], + "gff_name": ["edenprotein.2"] + } + } + }, + "attributes": { + "gff_id": ["mRNA40002"], + "gff_name": ["EDEN.2"] + } + }, + "66e0555fae0bd7cfcd69912c": { + "_id": "66e0555fae0bd7cfcd69912c", + "refSeq": "chr1", + "type": "mRNA", + "min": 31299, + "max": 39000, + "strand": 1, + "children": { + "66e0555fae0bd7cfcd699126": { + "_id": "66e0555fae0bd7cfcd699126", + "refSeq": "chr1", + "type": "exon", + "min": 31299, + "max": 31500, + "strand": 1, + "attributes": { + "gff_id": ["exon40002"] + } + }, + "66e0555fae0bd7cfcd699127": { + "_id": "66e0555fae0bd7cfcd699127", + "refSeq": "chr1", + "type": "exon", + "min": 32999, + "max": 33902, + "strand": 1, + "attributes": { + "gff_id": ["exon40003"] + } + }, + "66e0555fae0bd7cfcd699128": { + "_id": "66e0555fae0bd7cfcd699128", + "refSeq": "chr1", + "type": "exon", + "min": 34999, + "max": 35500, + "strand": 1, + "attributes": { + "gff_id": ["exon40004"] + } + }, + "66e0555fae0bd7cfcd699129": { + "_id": "66e0555fae0bd7cfcd699129", + "refSeq": "chr1", + "type": "exon", + "min": 36999, + "max": 39000, + "strand": 1, + "attributes": { + "gff_id": ["exon40005"] + } + }, + "66e0555fae0bd7cfcd69912a": { + "_id": "66e0555fae0bd7cfcd69912a", + "refSeq": "chr1", + "type": "CDS", + "min": 33300, + "max": 37600, + "strand": 1, + "attributes": { + "gff_id": ["cds40003"], + "gff_name": ["edenprotein.3"] + } + }, + "66e0555fae0bd7cfcd69912b": { + "_id": "66e0555fae0bd7cfcd69912b", + "refSeq": "chr1", + "type": "CDS", + "min": 33390, + "max": 37600, + "strand": 1, + "attributes": { + "gff_id": ["cds40004"], + "gff_name": ["edenprotein.4"] + } + } + }, + "attributes": { + "gff_id": ["mRNA40003"], + "gff_name": ["EDEN.3"] + } + } + }, + "attributes": { + "gff_id": ["gene40001"], + "gff_name": ["EDEN"] + } +} diff --git a/packages/apollo-shared/test_data/example05.gff3 b/packages/apollo-shared/test_data/example05.gff3 new file mode 100644 index 000000000..70031fe06 --- /dev/null +++ b/packages/apollo-shared/test_data/example05.gff3 @@ -0,0 +1,30 @@ +##gff-version 3 +# example 5 +chr1 . gene 1000 9000 . + . ID=gene10001;Name=EDEN;testid=t003 +chr1 . mRNA 1050 9000 . + . ID=mRNA10001;Parent=gene10001;Name=EDEN.1;testid=t004 +chr1 . mRNA 1050 9000 . + . ID=mRNA10002;Parent=gene10001;Name=EDEN.2;testid=t005 +chr1 . mRNA 1300 9000 . + . ID=mRNA10003;Parent=gene10001;Name=EDEN.3;testid=t006 +chr1 . exon 1050 1500 . + . ID=exon10001;Parent=mRNA10001,mRNA10002;testid=t007 +chr1 . exon 1300 1500 . + . ID=exon10002;Parent=mRNA10003;testid=t008 +chr1 . exon 3000 3902 . + . ID=exon10003;Parent=mRNA10001,mRNA10003;testid=t009 +chr1 . exon 5000 5500 . + . ID=exon10004;Parent=mRNA10001,mRNA10002,mRNA10003;testid=t010 +chr1 . exon 7000 9000 . + . ID=exon10005;Parent=mRNA10001,mRNA10002,mRNA10003;testid=t011 +chr1 . five_prime_UTR 1050 1200 . + . ID=five_prime_UTR10001;Parent=mRNA10001,mRNA10002;testid=t012 +chr1 . five_prime_UTR 1300 1500 . + . ID=five_prime_UTR10002;Parent=mRNA10003;testid=t013 +chr1 . five_prime_UTR 3000 3300 . + . ID=five_prime_UTR10003;Parent=mRNA10003;testid=t014 +chr1 . five_prime_UTR 3000 3390 . + . ID=five_prime_UTR10004;Parent=mRNA10003;testid=t015 +chr1 . five_prime_UTR 3000 3390 . + . ID=five_prime_UTR10004;Parent=mRNA10003;testid=t016 +chr1 . three_prime_UTR 7601 9000 . + . ID=three_prime_UTR10001;Parent=mRNA10001,mRNA10002,mRNA10003;testid=t017 +chr1 . CDS 1201 1500 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1;testid=t018 +chr1 . CDS 3000 3902 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1;testid=t019 +chr1 . CDS 5000 5500 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1;testid=t020 +chr1 . CDS 7000 7600 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1;testid=t021 +chr1 . CDS 1201 1500 . + 0 ID=cds10002;Parent=mRNA10002;Name=edenprotein.2;testid=t022 +chr1 . CDS 5000 5500 . + 0 ID=cds10002;Parent=mRNA10002;Name=edenprotein.2;testid=t023 +chr1 . CDS 7000 7600 . + 0 ID=cds10002;Parent=mRNA10002;Name=edenprotein.2;testid=t024 +chr1 . CDS 3301 3902 . + 0 ID=cds10003;Parent=mRNA10003;Name=edenprotein.3;testid=t025 +chr1 . CDS 5000 5500 . + 1 ID=cds10003;Parent=mRNA10003;Name=edenprotein.3;testid=t026 +chr1 . CDS 7000 7600 . + 1 ID=cds10003;Parent=mRNA10003;Name=edenprotein.3;testid=t027 +chr1 . CDS 3391 3902 . + 0 ID=cds10004;Parent=mRNA10003;Name=edenprotein.4;testid=t028 +chr1 . CDS 5000 5500 . + 1 ID=cds10004;Parent=mRNA10003;Name=edenprotein.4;testid=t029 +chr1 . CDS 7000 7600 . + 1 ID=cds10004;Parent=mRNA10003;Name=edenprotein.4;testid=t030 diff --git a/packages/apollo-shared/test_data/example06.gff3 b/packages/apollo-shared/test_data/example06.gff3 new file mode 100644 index 000000000..a762a0936 --- /dev/null +++ b/packages/apollo-shared/test_data/example06.gff3 @@ -0,0 +1,30 @@ +##gff-version 3 +# example 6 +chr1 . gene 1000 9000 . + . ID=gene10001;Name=EDEN;testid=t003 +chr1 . mRNA 1050 9000 . + . ID=mRNA10001;Parent=gene10001;Name=EDEN.1;testid=t004 +chr1 . mRNA 1050 9000 . + . ID=mRNA10002;Parent=gene10001;Name=EDEN.2;testid=t005 +chr1 . mRNA 1300 9000 . + . ID=mRNA10003;Parent=gene10001;Name=EDEN.3;testid=t006 +chr1 . exon 1050 1500 . + . ID=exon10001;Parent=mRNA10001,mRNA10002;testid=t007 +chr1 . exon 1300 1500 . + . ID=exon10002;Parent=mRNA10003;testid=t008 +chr1 . exon 3000 3902 . + . ID=exon10003;Parent=mRNA10001,mRNA10003;testid=t009 +chr1 . exon 5000 5500 . + . ID=exon10004;Parent=mRNA10001,mRNA10002,mRNA10003;testid=t010 +chr1 . exon 7000 9000 . + . ID=exon10005;Parent=mRNA10001,mRNA10002,mRNA10003;testid=t011 +chr1 . five_prime_UTR 1050 1200 . + . ID=five_prime_UTR10001;Parent=mRNA10001,mRNA10002;testid=t012 +chr1 . five_prime_UTR 1300 1500 . + . ID=five_prime_UTR10002;Parent=mRNA10003;testid=t013 +chr1 . five_prime_UTR 3000 3300 . + . ID=five_prime_UTR10003;Parent=mRNA10003;testid=t014 +chr1 . five_prime_UTR 3000 3390 . + . ID=five_prime_UTR10004;Parent=mRNA10003;testid=t015 +chr1 . five_prime_UTR 3000 3390 . + . ID=five_prime_UTR10004;Parent=mRNA10003;testid=t016 +chr1 . three_prime_UTR 7601 9000 . + . ID=three_prime_UTR10001;Parent=mRNA10001,mRNA10002,mRNA10003;testid=t017 +chr1 . CDS 1201 1500 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1;testid=t018 +chr1 . CDS 3000 3902 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1;testid=t019 +chr1 . CDS 5000 5500 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1;testid=t020 +chr1 . CDS 7000 7600 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1;testid=t021 +chr1 . CDS 1201 1500 . + 0 ID=cds10002;Parent=mRNA10002;Name=edenprotein.2;testid=t022 +chr1 . CDS 5000 5500 . + 0 ID=cds10002;Parent=mRNA10002;Name=edenprotein.2;testid=t023 +chr1 . CDS 7000 7600 . + 0 ID=cds10002;Parent=mRNA10002;Name=edenprotein.2;testid=t024 +chr1 . CDS 3301 3902 . + 0 ID=cds10003;Parent=mRNA10003;Name=edenprotein.3;testid=t025 +chr1 . CDS 5000 5500 . + 1 ID=cds10003;Parent=mRNA10003;Name=edenprotein.3;testid=t026 +chr1 . CDS 7000 7600 . + 1 ID=cds10003;Parent=mRNA10003;Name=edenprotein.3;testid=t027 +chr1 . CDS 3391 3902 . + 0 ID=cds10004;Parent=mRNA10003;Name=edenprotein.4;testid=t028 +chr1 . CDS 5000 5500 . + 1 ID=cds10004;Parent=mRNA10003;Name=edenprotein.4;testid=t029 +chr1 . CDS 7000 7600 . + 1 ID=cds10004;Parent=mRNA10003;Name=edenprotein.4;testid=t030 diff --git a/packages/apollo-shared/test_data/example07.gff3 b/packages/apollo-shared/test_data/example07.gff3 new file mode 100644 index 000000000..c1bb5fcab --- /dev/null +++ b/packages/apollo-shared/test_data/example07.gff3 @@ -0,0 +1,30 @@ +##gff-version 3 +# example 2 +chr1 . gene 1000 9000 . + . ID=gene10001;Name=EDEN;testid=t003 +chr1 . mRNA 1050 9000 . + . ID=mRNA10001;Parent=gene10001;Name=EDEN.1;testid=t004 +chr1 . mRNA 1050 9000 . + . ID=mRNA10002;Parent=gene10001;Name=EDEN.2;testid=t005 +chr1 . mRNA 1300 9000 . + . ID=mRNA10003;Parent=gene10001;Name=EDEN.3;testid=t006 +chr1 . exon 1050 1500 . + . ID=exon10001;Parent=mRNA10001;testid=t007 +chr1 . exon 1050 1500 . + . ID=exon10002;Parent=mRNA10002;testid=t008 +chr1 . exon 1300 1500 . + . ID=exon10003;Parent=mRNA10003;testid=t009 +chr1 . exon 3000 3902 . + . ID=exon10004;Parent=mRNA10001;testid=t010 +chr1 . exon 3000 3902 . + . ID=exon10005;Parent=mRNA10003;testid=t011 +chr1 . exon 5000 5500 . + . ID=exon10006;Parent=mRNA10001;testid=t012 +chr1 . exon 5000 5500 . + . ID=exon10007;Parent=mRNA10002;testid=t013 +chr1 . exon 5000 5500 . + . ID=exon10008;Parent=mRNA10003;testid=t014 +chr1 . exon 7000 9000 . + . ID=exon10009;Parent=mRNA10001;testid=t015 +chr1 . exon 7000 9000 . + . ID=exon20010;Parent=mRNA10002;testid=t016 +chr1 . exon 7000 9000 . + . ID=exon20011;Parent=mRNA10003;testid=t017 +chr1 . CDS 1201 1500 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1;testid=t018 +chr1 . CDS 3000 3902 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1;testid=t019 +chr1 . CDS 5000 5500 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1;testid=t020 +chr1 . CDS 7000 7600 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1;testid=t021 +chr1 . CDS 1201 1500 . + 0 ID=cds10002;Parent=mRNA10002;Name=edenprotein.2;testid=t022 +chr1 . CDS 5000 5500 . + 0 ID=cds10002;Parent=mRNA10002;Name=edenprotein.2;testid=t023 +chr1 . CDS 7000 7600 . + 0 ID=cds10002;Parent=mRNA10002;Name=edenprotein.2;testid=t024 +chr1 . CDS 3301 3902 . + 0 ID=cds10003;Parent=mRNA10003;Name=edenprotein.3;testid=t025 +chr1 . CDS 5000 5500 . + 1 ID=cds10003;Parent=mRNA10003;Name=edenprotein.3;testid=t026 +chr1 . CDS 7000 7600 . + 1 ID=cds10003;Parent=mRNA10003;Name=edenprotein.3;testid=t027 +chr1 . CDS 3391 3902 . + 0 ID=cds10004;Parent=mRNA10003;Name=edenprotein.4;testid=t028 +chr1 . CDS 5000 5500 . + 1 ID=cds10004;Parent=mRNA10003;Name=edenprotein.4;testid=t029 +chr1 . CDS 7000 7600 . + 1 ID=cds10004;Parent=mRNA10003;Name=edenprotein.4;testid=t030 diff --git a/packages/apollo-shared/test_data/gene_mrna.gff3 b/packages/apollo-shared/test_data/gene_mrna.gff3 new file mode 100644 index 000000000..55efcbad7 --- /dev/null +++ b/packages/apollo-shared/test_data/gene_mrna.gff3 @@ -0,0 +1,4 @@ +##gff-version 3 +##sequence-region ctgA 1000 2000 +ctgA example gene 1000 2000 . . . ID=gene01;Name=geneA;Alias=hga +ctgA example mRNA 1000 2000 . . . Parent=gene01;Name=mrnaA;Alias=hga diff --git a/packages/apollo-cli/test_data/gene_representations.gff3 b/packages/apollo-shared/test_data/gene_representations.gff3 similarity index 96% rename from packages/apollo-cli/test_data/gene_representations.gff3 rename to packages/apollo-shared/test_data/gene_representations.gff3 index e3ebe11c0..b9aff4134 100644 --- a/packages/apollo-cli/test_data/gene_representations.gff3 +++ b/packages/apollo-shared/test_data/gene_representations.gff3 @@ -110,64 +110,6 @@ chr1 . CDS 37000 37600 . + 1 ID=cds40003;Parent=mRNA40003;Name=edenprotein.3 chr1 . CDS 33391 33902 . + 0 ID=cds40004;Parent=mRNA40003;Name=edenprotein.4 chr1 . CDS 35000 35500 . + 1 ID=cds40004;Parent=mRNA40003;Name=edenprotein.4 chr1 . CDS 37000 37600 . + 1 ID=cds40004;Parent=mRNA40003;Name=edenprotein.4 -# example 5 -chr1 . gene 41000 49000 . + . ID=gene50001;Name=EDEN -chr1 . mRNA 41050 49000 . + . ID=mRNA50001;Parent=gene50001;Name=EDEN.1 -chr1 . mRNA 41050 49000 . + . ID=mRNA50002;Parent=gene50001;Name=EDEN.2 -chr1 . mRNA 41300 49000 . + . ID=mRNA50003;Parent=gene50001;Name=EDEN.3 -chr1 . exon 41050 41500 . + . ID=exon50001;Parent=mRNA50001,mRNA50002 -chr1 . exon 41300 41500 . + . ID=exon50002;Parent=mRNA50003 -chr1 . exon 43000 43902 . + . ID=exon50003;Parent=mRNA50001,mRNA50003 -chr1 . exon 45000 45500 . + . ID=exon50004;Parent=mRNA50001,mRNA50002,mRNA50003 -chr1 . exon 47000 49000 . + . ID=exon50005;Parent=mRNA50001,mRNA50002,mRNA50003 -chr1 . five_prime_UTR 41050 41200 . + . ID=five_prime_UTR50001;Parent=mRNA50001,mRNA50002 -chr1 . five_prime_UTR 41300 41500 . + . ID=five_prime_UTR50002;Parent=mRNA50003 -chr1 . five_prime_UTR 43000 43300 . + . ID=five_prime_UTR50003;Parent=mRNA50003 -chr1 . five_prime_UTR 43000 43390 . + . ID=five_prime_UTR50004;Parent=mRNA50003 -chr1 . five_prime_UTR 43000 43390 . + . ID=five_prime_UTR50004;Parent=mRNA50003 -chr1 . three_prime_UTR 47601 49000 . + . ID=three_prime_UTR50001;Parent=mRNA50001,mRNA50002,mRNA50003 -chr1 . CDS 41201 41500 . + 0 ID=cds50001;Parent=mRNA50001;Name=edenprotein.1 -chr1 . CDS 43000 43902 . + 0 ID=cds50001;Parent=mRNA50001;Name=edenprotein.1 -chr1 . CDS 45000 45500 . + 0 ID=cds50001;Parent=mRNA50001;Name=edenprotein.1 -chr1 . CDS 47000 47600 . + 0 ID=cds50001;Parent=mRNA50001;Name=edenprotein.1 -chr1 . CDS 41201 41500 . + 0 ID=cds50002;Parent=mRNA50002;Name=edenprotein.2 -chr1 . CDS 45000 45500 . + 0 ID=cds50002;Parent=mRNA50002;Name=edenprotein.2 -chr1 . CDS 47000 47600 . + 0 ID=cds50002;Parent=mRNA50002;Name=edenprotein.2 -chr1 . CDS 43301 43902 . + 0 ID=cds50003;Parent=mRNA50003;Name=edenprotein.3 -chr1 . CDS 45000 45500 . + 1 ID=cds50003;Parent=mRNA50003;Name=edenprotein.3 -chr1 . CDS 47000 47600 . + 1 ID=cds50003;Parent=mRNA50003;Name=edenprotein.3 -chr1 . CDS 43391 43902 . + 0 ID=cds50004;Parent=mRNA50003;Name=edenprotein.4 -chr1 . CDS 45000 45500 . + 1 ID=cds50004;Parent=mRNA50003;Name=edenprotein.4 -chr1 . CDS 47000 47600 . + 1 ID=cds50004;Parent=mRNA50003;Name=edenprotein.4 -# example 6 -chr1 . gene 51000 59000 . + . ID=gene60001;Name=EDEN -chr1 . mRNA 51050 59000 . + . ID=mRNA60001;Parent=gene60001;Name=EDEN.1 -chr1 . mRNA 51050 59000 . + . ID=mRNA60002;Parent=gene60001;Name=EDEN.2 -chr1 . mRNA 51300 59000 . + . ID=mRNA60003;Parent=gene60001;Name=EDEN.3 -chr1 . exon 51050 51500 . + . ID=exon60001;Parent=mRNA60001,mRNA60002 -chr1 . exon 51300 51500 . + . ID=exon60002;Parent=mRNA60003 -chr1 . exon 53000 53902 . + . ID=exon60003;Parent=mRNA60001,mRNA60003 -chr1 . exon 55000 55500 . + . ID=exon60004;Parent=mRNA60001,mRNA60002,mRNA60003 -chr1 . exon 57000 59000 . + . ID=exon60005;Parent=mRNA60001,mRNA60002,mRNA60003 -chr1 . five_prime_UTR 51050 51200 . + . ID=five_prime_UTR60001;Parent=mRNA60001,mRNA60002 -chr1 . five_prime_UTR 51300 51500 . + . ID=five_prime_UTR60002;Parent=mRNA60003 -chr1 . five_prime_UTR 53000 53300 . + . ID=five_prime_UTR60003;Parent=mRNA60003 -chr1 . five_prime_UTR 53000 53390 . + . ID=five_prime_UTR60004;Parent=mRNA60003 -chr1 . five_prime_UTR 53000 53390 . + . ID=five_prime_UTR60004;Parent=mRNA60003 -chr1 . three_prime_UTR 57601 59000 . + . ID=three_prime_UTR60001;Parent=mRNA60001,mRNA60002,mRNA60003 -chr1 . CDS 51201 51500 . + 0 ID=cds60001;Parent=mRNA60001;Name=edenprotein.1 -chr1 . CDS 53000 53902 . + 0 ID=cds60001;Parent=mRNA60001;Name=edenprotein.1 -chr1 . CDS 55000 55500 . + 0 ID=cds60001;Parent=mRNA60001;Name=edenprotein.1 -chr1 . CDS 57000 57600 . + 0 ID=cds60001;Parent=mRNA60001;Name=edenprotein.1 -chr1 . CDS 51201 51500 . + 0 ID=cds60002;Parent=mRNA60002;Name=edenprotein.2 -chr1 . CDS 55000 55500 . + 0 ID=cds60002;Parent=mRNA60002;Name=edenprotein.2 -chr1 . CDS 57000 57600 . + 0 ID=cds60002;Parent=mRNA60002;Name=edenprotein.2 -chr1 . CDS 53301 53902 . + 0 ID=cds60003;Parent=mRNA60003;Name=edenprotein.3 -chr1 . CDS 55000 55500 . + 1 ID=cds60003;Parent=mRNA60003;Name=edenprotein.3 -chr1 . CDS 57000 57600 . + 1 ID=cds60003;Parent=mRNA60003;Name=edenprotein.3 -chr1 . CDS 53391 53902 . + 0 ID=cds60004;Parent=mRNA60003;Name=edenprotein.4 -chr1 . CDS 55000 55500 . + 1 ID=cds60004;Parent=mRNA60003;Name=edenprotein.4 -chr1 . CDS 57000 57600 . + 1 ID=cds60004;Parent=mRNA60003;Name=edenprotein.4 ##FASTA >chr1 cattgttgcggagttgaacaACGGCATTAGGAACACTTCCGTCTCtcacttttatacgat diff --git a/packages/apollo-shared/test_data/one_cds.gff3 b/packages/apollo-shared/test_data/one_cds.gff3 new file mode 100644 index 000000000..81626f6e3 --- /dev/null +++ b/packages/apollo-shared/test_data/one_cds.gff3 @@ -0,0 +1,9 @@ +##gff-version 3 +##sequence-region chr1 1000 9000 +#example01 +chr1 . gene 1000 9000 . + . ID=gene10001;Name=EDEN;testid=t003 +chr1 . mRNA 1050 9000 . + . ID=mRNA10001;Parent=gene10001;Name=EDEN.1;testid=t004,t001,t004 +chr1 . exon 1050 1500 . + . ID=exon10001;Parent=mRNA10001;testid=t007 +chr1 . exon 5000 5500 . + . ID=exon10004;Parent=mRNA10001;testid=t010 +chr1 . CDS 1201 1500 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1;testid=t012,t013,t014 +chr1 . CDS 5000 5000 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1;testid=t015,t014 diff --git a/packages/apollo-shared/test_data/one_cds.json b/packages/apollo-shared/test_data/one_cds.json new file mode 100644 index 000000000..185f5579c --- /dev/null +++ b/packages/apollo-shared/test_data/one_cds.json @@ -0,0 +1,67 @@ +{ + "_id": "66d70e4ccc30b55b65e5f619", + "refSeq": "chr1", + "type": "gene", + "min": 999, + "max": 9000, + "strand": 1, + "children": { + "66d70e4ccc30b55b65e5f618": { + "_id": "66d70e4ccc30b55b65e5f618", + "refSeq": "chr1", + "type": "mRNA", + "min": 1049, + "max": 9000, + "strand": 1, + "children": { + "66d70e4ccc30b55b65e5f615": { + "_id": "66d70e4ccc30b55b65e5f615", + "refSeq": "chr1", + "type": "exon", + "min": 1049, + "max": 1500, + "strand": 1, + "attributes": { + "gff_id": ["exon10001"], + "testid": ["t007"] + } + }, + "66d70e4ccc30b55b65e5f616": { + "_id": "66d70e4ccc30b55b65e5f616", + "refSeq": "chr1", + "type": "exon", + "min": 4999, + "max": 5500, + "strand": 1, + "attributes": { + "gff_id": ["exon10004"], + "testid": ["t010"] + } + }, + "66d70e4ccc30b55b65e5f617": { + "_id": "66d70e4ccc30b55b65e5f617", + "refSeq": "chr1", + "type": "CDS", + "min": 1200, + "max": 5000, + "strand": 1, + "attributes": { + "gff_id": ["cds10001"], + "gff_name": ["edenprotein.1"], + "testid": ["t012", "t013", "t014", "t015"] + } + } + }, + "attributes": { + "gff_id": ["mRNA10001"], + "gff_name": ["EDEN.1"], + "testid": ["t004", "t001", "t004"] + } + } + }, + "attributes": { + "gff_id": ["gene10001"], + "gff_name": ["EDEN"], + "testid": ["t003"] + } +} diff --git a/packages/apollo-shared/test_data/two_cds.gff3 b/packages/apollo-shared/test_data/two_cds.gff3 new file mode 100644 index 000000000..fd58a8a33 --- /dev/null +++ b/packages/apollo-shared/test_data/two_cds.gff3 @@ -0,0 +1,9 @@ +##gff-version 3 +##sequence-region chr1 1000 9000 +#example01 +chr1 . gene 1000 9000 . + . ID=gene10001;Name=EDEN;testid=t003 +chr1 . mRNA 1050 9000 . + . ID=mRNA10001;Parent=gene10001;Name=EDEN.1;testid=t004,t001,t004 +chr1 . exon 1050 1500 . + . ID=exon10001;Parent=mRNA10001;testid=t007 +chr1 . exon 5000 5500 . + . ID=exon10004;Parent=mRNA10001;testid=t010 +chr1 . CDS 1201 1500 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1;testid=t012,t013,t014 +chr1 . CDS 5000 5000 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1;testid=t014 diff --git a/packages/apollo-shared/test_data/two_cds.json b/packages/apollo-shared/test_data/two_cds.json new file mode 100644 index 000000000..6500195a7 --- /dev/null +++ b/packages/apollo-shared/test_data/two_cds.json @@ -0,0 +1,67 @@ +{ + "_id": "66d70f3b9c7a7460925687a3", + "refSeq": "chr1", + "type": "gene", + "min": 999, + "max": 9000, + "strand": 1, + "children": { + "66d70f3b9c7a7460925687a2": { + "_id": "66d70f3b9c7a7460925687a2", + "refSeq": "chr1", + "type": "mRNA", + "min": 1049, + "max": 9000, + "strand": 1, + "children": { + "66d70f3b9c7a74609256879f": { + "_id": "66d70f3b9c7a74609256879f", + "refSeq": "chr1", + "type": "exon", + "min": 1049, + "max": 1500, + "strand": 1, + "attributes": { + "gff_id": ["exon10001"], + "testid": ["t007"] + } + }, + "66d70f3b9c7a7460925687a0": { + "_id": "66d70f3b9c7a7460925687a0", + "refSeq": "chr1", + "type": "exon", + "min": 4999, + "max": 5500, + "strand": 1, + "attributes": { + "gff_id": ["exon10004"], + "testid": ["t010"] + } + }, + "66d70f3b9c7a7460925687a1": { + "_id": "66d70f3b9c7a7460925687a1", + "refSeq": "chr1", + "type": "CDS", + "min": 1200, + "max": 5000, + "strand": 1, + "attributes": { + "gff_id": ["cds10001"], + "gff_name": ["edenprotein.1"], + "testid": ["t012", "t013", "t014"] + } + } + }, + "attributes": { + "gff_id": ["mRNA10001"], + "gff_name": ["EDEN.1"], + "testid": ["t004", "t001", "t004"] + } + } + }, + "attributes": { + "gff_id": ["gene10001"], + "gff_name": ["EDEN"], + "testid": ["t003"] + } +} diff --git a/yarn.lock b/yarn.lock index f51810c29..890e5c4df 100644 --- a/yarn.lock +++ b/yarn.lock @@ -330,7 +330,7 @@ __metadata: "@oclif/core": "npm:^3.18.2" "@oclif/plugin-help": "npm:^6.0.8" "@oclif/test": "npm:^3.1.3" - "@types/chai": "npm:^4" + "@types/chai": "npm:^4.3.19" "@types/cli-progress": "npm:^3" "@types/inquirer": "npm:^9.0.7" "@types/mocha": "npm:^10" @@ -591,9 +591,12 @@ __metadata: "@jbrowse/core": "npm:^2.13.1" "@nestjs/common": "npm:^10.1.0" "@nestjs/core": "npm:^10.1.0" + "@types/chai": "npm:^4.3.19" "@types/node": "npm:^18.14.2" "@types/rimraf": "npm:^3" bson-objectid: "npm:^2.0.4" + chai: "npm:^5.1.1" + chai-exclude: "npm:^3.0.0" generic-filehandle: "npm:^3.0.0" glob: "npm:^11.0.0" jwt-decode: "npm:^3.1.2" @@ -9323,13 +9326,20 @@ __metadata: languageName: node linkType: hard -"@types/chai@npm:*, @types/chai@npm:^4": +"@types/chai@npm:*": version: 4.3.11 resolution: "@types/chai@npm:4.3.11" checksum: 10c0/0c216ac4a19bfbf8318bb104d32e50704ee2ffc4b538b976c4326e6638fee121462402caa570662227a2a218810388aadb14bdbd3d3d474ec300b00695db448a languageName: node linkType: hard +"@types/chai@npm:^4.3.19": + version: 4.3.19 + resolution: "@types/chai@npm:4.3.19" + checksum: 10c0/8fd573192e486803c4d04185f2b0fab554660d9a1300dbed5bde9747ab8bef15f462a226f560ed5ca48827eecaf8d71eed64aa653ff9aec72fb2eae272e43a84 + languageName: node + linkType: hard + "@types/cli-progress@npm:^3, @types/cli-progress@npm:^3.11.5": version: 3.11.5 resolution: "@types/cli-progress@npm:3.11.5" @@ -11562,6 +11572,13 @@ __metadata: languageName: node linkType: hard +"assertion-error@npm:^2.0.1": + version: 2.0.1 + resolution: "assertion-error@npm:2.0.1" + checksum: 10c0/bbbcb117ac6480138f8c93cf7f535614282dea9dc828f540cdece85e3c665e8f78958b96afac52f29ff883c72638e6a87d469ecc9fe5bc902df03ed24a55dba8 + languageName: node + linkType: hard + "ast-types-flow@npm:^0.0.7": version: 0.0.7 resolution: "ast-types-flow@npm:0.0.7" @@ -12882,6 +12899,17 @@ __metadata: languageName: node linkType: hard +"chai-exclude@npm:^3.0.0": + version: 3.0.0 + resolution: "chai-exclude@npm:3.0.0" + dependencies: + fclone: "npm:^1.0.11" + peerDependencies: + chai: ">= 5" + checksum: 10c0/c0bbe2f29398e9a9338e0aa86835668b701bf220e49ec24cf80d58f3b322b4a9dd332e3052c757f97ea5ba2c0086c64c7014a3b07862fa370480f350ceac4066 + languageName: node + linkType: hard + "chai@npm:*, chai@npm:^4.3.10": version: 4.3.10 resolution: "chai@npm:4.3.10" @@ -12897,6 +12925,19 @@ __metadata: languageName: node linkType: hard +"chai@npm:^5.1.1": + version: 5.1.1 + resolution: "chai@npm:5.1.1" + dependencies: + assertion-error: "npm:^2.0.1" + check-error: "npm:^2.1.1" + deep-eql: "npm:^5.0.1" + loupe: "npm:^3.1.0" + pathval: "npm:^2.0.0" + checksum: 10c0/e7f00e5881e3d5224f08fe63966ed6566bd9fdde175863c7c16dd5240416de9b34c4a0dd925f4fd64ad56256ca6507d32cf6131c49e1db65c62578eb31d4566c + languageName: node + linkType: hard + "chalk-template@npm:0.4.0": version: 0.4.0 resolution: "chalk-template@npm:0.4.0" @@ -13029,6 +13070,13 @@ __metadata: languageName: node linkType: hard +"check-error@npm:^2.1.1": + version: 2.1.1 + resolution: "check-error@npm:2.1.1" + checksum: 10c0/979f13eccab306cf1785fa10941a590b4e7ea9916ea2a4f8c87f0316fc3eab07eabefb6e587424ef0f88cbcd3805791f172ea739863ca3d7ce2afc54641c7f0e + languageName: node + linkType: hard + "check-more-types@npm:2.24.0, check-more-types@npm:^2.24.0": version: 2.24.0 resolution: "check-more-types@npm:2.24.0" @@ -14677,6 +14725,13 @@ __metadata: languageName: node linkType: hard +"deep-eql@npm:^5.0.1": + version: 5.0.2 + resolution: "deep-eql@npm:5.0.2" + checksum: 10c0/7102cf3b7bb719c6b9c0db2e19bf0aa9318d141581befe8c7ce8ccd39af9eaa4346e5e05adef7f9bd7015da0f13a3a25dcfe306ef79dc8668aedbecb658dd247 + languageName: node + linkType: hard + "deep-extend@npm:^0.6.0": version: 0.6.0 resolution: "deep-extend@npm:0.6.0" @@ -16775,6 +16830,13 @@ __metadata: languageName: node linkType: hard +"fclone@npm:^1.0.11": + version: 1.0.11 + resolution: "fclone@npm:1.0.11" + checksum: 10c0/dbe3ebd0883edeec2998874bf951aa03198d727f1091351b22af250ff53e227ee94872487ae88ba7280b2469fb164a7d4dd4e5ece10afd4988ab4712f49bc43b + languageName: node + linkType: hard + "fd-slicer@npm:~1.1.0": version: 1.1.0 resolution: "fd-slicer@npm:1.1.0" @@ -21199,6 +21261,15 @@ __metadata: languageName: node linkType: hard +"loupe@npm:^3.1.0": + version: 3.1.1 + resolution: "loupe@npm:3.1.1" + dependencies: + get-func-name: "npm:^2.0.1" + checksum: 10c0/99f88badc47e894016df0c403de846fedfea61154aadabbf776c8428dd59e8d8378007135d385d737de32ae47980af07d22ba7bec5ef7beebd721de9baa0a0af + languageName: node + linkType: hard + "lower-case@npm:^2.0.2": version: 2.0.2 resolution: "lower-case@npm:2.0.2" @@ -24600,6 +24671,13 @@ __metadata: languageName: node linkType: hard +"pathval@npm:^2.0.0": + version: 2.0.0 + resolution: "pathval@npm:2.0.0" + checksum: 10c0/602e4ee347fba8a599115af2ccd8179836a63c925c23e04bd056d0674a64b39e3a081b643cc7bc0b84390517df2d800a46fcc5598d42c155fe4977095c2f77c5 + languageName: node + linkType: hard + "pause-stream@npm:0.0.11": version: 0.0.11 resolution: "pause-stream@npm:0.0.11"