Skip to content

Commit

Permalink
Some working tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dariober committed Sep 4, 2024
1 parent a438acf commit aa161b6
Show file tree
Hide file tree
Showing 17 changed files with 2,736 additions and 12 deletions.
72 changes: 62 additions & 10 deletions packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import gff from '@gmod/gff'
import { assert, use } from 'chai'
import chaiExclude from 'chai-exclude'

import { readFileSync } from 'node:fs'
import { gff3ToAnnotationFeature } from './gff3ToAnnotationFeature'
import { AnnotationFeatureSnapshot } from '@apollo-annotation/mst'

Expand Down Expand Up @@ -106,15 +107,66 @@ function compareFeatures(
)
}

describe('gff3ToAnnotationFeature', () => {
for (const testCase of testCases) {
const [description, featureLine, convertedFeature] = testCase
it(`converts ${description}`, () => {
const gff3Feature = gff.parseStringSync(featureLine, {
parseSequences: false,
})
const feature = gff3ToAnnotationFeature(gff3Feature[0])
compareFeatures(convertedFeature, feature)
})
function readSingleFeatureFile(fn: string): GFF3Feature {
const lines = readFileSync(fn).toString().split('\n')
const feature: string[] = []
for (const line of lines) {
if (!line.startsWith('#')) {
feature.push(line)
}
}
const inGff = gff.parseStringSync(feature.join('\n')) as GFF3Feature[]
if (inGff.length != 1) {
throw new Error(`Exactly 1 feature expected in file ${fn}`)
}
return inGff[0]
}

function readAnnotationFeatureSnapshot(fn: string): AnnotationFeatureSnapshot {
const lines = readFileSync(fn).toString()
return JSON.parse(lines) as AnnotationFeatureSnapshot
}

describe('gff3ToAnnotationFeature examples', () => {
it('Convert one CDS', () => {
const actual = gff3ToAnnotationFeature(
readSingleFeatureFile('test_data/one_cds.gff3'),
)
const expected = readAnnotationFeatureSnapshot('test_data/one_cds.json')
compareFeatures(actual, expected)
})
it('Convert two CDSs', () => {
const actual = gff3ToAnnotationFeature(
readSingleFeatureFile('test_data/two_cds.gff3'),
)
const expected = readAnnotationFeatureSnapshot('test_data/two_cds.json')
compareFeatures(actual, expected)
})
it('Convert example 1', () => {
const actual = gff3ToAnnotationFeature(
readSingleFeatureFile('test_data/example01.gff3'),
)
const expected = readAnnotationFeatureSnapshot('test_data/example01.json')
compareFeatures(actual, expected)
})
it('Convert example 2', () => {
const actual = gff3ToAnnotationFeature(
readSingleFeatureFile('test_data/example02.gff3'),
)
const expected = readAnnotationFeatureSnapshot('test_data/example02.json')
compareFeatures(actual, expected)
})
})

// describe('gff3ToAnnotationFeature', () => {
// for (const testCase of testCases) {
// const [description, featureLine, convertedFeature] = testCase
// it(`converts ${description}`, () => {
// const gff3Feature = gff.parseStringSync(featureLine, {
// parseSequences: false,
// })
// const feature = gff3ToAnnotationFeature(gff3Feature[0])
// compareFeatures(convertedFeature, feature)
// })
// }
// })
3 changes: 1 addition & 2 deletions packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ function convertFeatureAttributes(
convertedAttributes.gff_source = [source]
}
if (attributesCollections.length > 0) {
// const newAttributes: Record<string, string[] | undefined> = {}
for (const attributesCollection of attributesCollections) {
for (const [key, val] of Object.entries(attributesCollection)) {
if (!val || key === 'Parent') {
Expand All @@ -121,7 +120,7 @@ function convertFeatureAttributes(
const newKey = isGFFReservedAttribute(key) ? gffToInternal[key] : key
const existingVal = convertedAttributes[newKey]
if (existingVal) {
const valSet = new Set(...existingVal, ...val)
const valSet = new Set([...existingVal, ...val])
convertedAttributes[newKey] = [...valSet]
} else {
convertedAttributes[newKey] = val
Expand Down
26 changes: 26 additions & 0 deletions packages/apollo-shared/test_data/example01.gff3
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
##gff-version 3
##sequence-region chr1 1000 9000
#example01
chr1 . gene 1000 9000 . + . ID=gene10001;Name=EDEN
chr1 . TF_binding_site 1000 1012 . + . ID=tfbs10001;Parent=gene10001
chr1 . mRNA 1050 9000 . + . ID=mRNA10001;Parent=gene10001;Name=EDEN.1
chr1 . mRNA 1050 9000 . + . ID=mRNA10002;Parent=gene10001;Name=EDEN.2
chr1 . mRNA 1300 9000 . + . ID=mRNA10003;Parent=gene10001;Name=EDEN.3
chr1 . exon 1050 1500 . + . ID=exon10001;Parent=mRNA10001,mRNA10002
chr1 . exon 1300 1500 . + . ID=exon10002;Parent=mRNA10003
chr1 . exon 3000 3902 . + . ID=exon10003;Parent=mRNA10001,mRNA10003
chr1 . exon 5000 5500 . + . ID=exon10004;Parent=mRNA10001,mRNA10002,mRNA10003
chr1 . exon 7000 9000 . + . ID=exon10005;Parent=mRNA10001,mRNA10002,mRNA10003
chr1 . CDS 1201 1500 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1
chr1 . CDS 3000 3902 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1
chr1 . CDS 5000 5500 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1
chr1 . CDS 7000 7600 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1
chr1 . CDS 1201 1500 . + 0 ID=cds10002;Parent=mRNA10002;Name=edenprotein.2
chr1 . CDS 5000 5500 . + 0 ID=cds10002;Parent=mRNA10002;Name=edenprotein.2
chr1 . CDS 7000 7600 . + 0 ID=cds10002;Parent=mRNA10002;Name=edenprotein.2
chr1 . CDS 3301 3902 . + 0 ID=cds10003;Parent=mRNA10003;Name=edenprotein.3
chr1 . CDS 5000 5500 . + 1 ID=cds10003;Parent=mRNA10003;Name=edenprotein.3
chr1 . CDS 7000 7600 . + 1 ID=cds10003;Parent=mRNA10003;Name=edenprotein.3
chr1 . CDS 3391 3902 . + 0 ID=cds10004;Parent=mRNA10003;Name=edenprotein.4
chr1 . CDS 5000 5500 . + 1 ID=cds10004;Parent=mRNA10003;Name=edenprotein.4
chr1 . CDS 7000 7600 . + 1 ID=cds10004;Parent=mRNA10003;Name=edenprotein.4
236 changes: 236 additions & 0 deletions packages/apollo-shared/test_data/example01.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
{
"_id": "66d7151100dcdf7dda49178d",
"refSeq": "chr1",
"type": "gene",
"min": 999,
"max": 9000,
"strand": 1,
"children": {
"66d7151100dcdf7dda49177a": {
"_id": "66d7151100dcdf7dda49177a",
"refSeq": "chr1",
"type": "TF_binding_site",
"min": 999,
"max": 1012,
"strand": 1,
"attributes": {
"gff_id": ["tfbs10001"]
}
},
"66d7151100dcdf7dda491780": {
"_id": "66d7151100dcdf7dda491780",
"refSeq": "chr1",
"type": "mRNA",
"min": 1049,
"max": 9000,
"strand": 1,
"children": {
"66d7151100dcdf7dda49177b": {
"_id": "66d7151100dcdf7dda49177b",
"refSeq": "chr1",
"type": "exon",
"min": 1049,
"max": 1500,
"strand": 1,
"attributes": {
"gff_id": ["exon10001"]
}
},
"66d7151100dcdf7dda49177c": {
"_id": "66d7151100dcdf7dda49177c",
"refSeq": "chr1",
"type": "exon",
"min": 2999,
"max": 3902,
"strand": 1,
"attributes": {
"gff_id": ["exon10003"]
}
},
"66d7151100dcdf7dda49177d": {
"_id": "66d7151100dcdf7dda49177d",
"refSeq": "chr1",
"type": "exon",
"min": 4999,
"max": 5500,
"strand": 1,
"attributes": {
"gff_id": ["exon10004"]
}
},
"66d7151100dcdf7dda49177e": {
"_id": "66d7151100dcdf7dda49177e",
"refSeq": "chr1",
"type": "exon",
"min": 6999,
"max": 9000,
"strand": 1,
"attributes": {
"gff_id": ["exon10005"]
}
},
"66d7151100dcdf7dda49177f": {
"_id": "66d7151100dcdf7dda49177f",
"refSeq": "chr1",
"type": "CDS",
"min": 1200,
"max": 7600,
"strand": 1,
"attributes": {
"gff_id": ["cds10001"],
"gff_name": ["edenprotein.1"]
}
}
},
"attributes": {
"gff_id": ["mRNA10001"],
"gff_name": ["EDEN.1"]
}
},
"66d7151100dcdf7dda491785": {
"_id": "66d7151100dcdf7dda491785",
"refSeq": "chr1",
"type": "mRNA",
"min": 1049,
"max": 9000,
"strand": 1,
"children": {
"66d7151100dcdf7dda491781": {
"_id": "66d7151100dcdf7dda491781",
"refSeq": "chr1",
"type": "exon",
"min": 1049,
"max": 1500,
"strand": 1,
"attributes": {
"gff_id": ["exon10001"]
}
},
"66d7151100dcdf7dda491782": {
"_id": "66d7151100dcdf7dda491782",
"refSeq": "chr1",
"type": "exon",
"min": 4999,
"max": 5500,
"strand": 1,
"attributes": {
"gff_id": ["exon10004"]
}
},
"66d7151100dcdf7dda491783": {
"_id": "66d7151100dcdf7dda491783",
"refSeq": "chr1",
"type": "exon",
"min": 6999,
"max": 9000,
"strand": 1,
"attributes": {
"gff_id": ["exon10005"]
}
},
"66d7151100dcdf7dda491784": {
"_id": "66d7151100dcdf7dda491784",
"refSeq": "chr1",
"type": "CDS",
"min": 1200,
"max": 7600,
"strand": 1,
"attributes": {
"gff_id": ["cds10002"],
"gff_name": ["edenprotein.2"]
}
}
},
"attributes": {
"gff_id": ["mRNA10002"],
"gff_name": ["EDEN.2"]
}
},
"66d7151100dcdf7dda49178c": {
"_id": "66d7151100dcdf7dda49178c",
"refSeq": "chr1",
"type": "mRNA",
"min": 1299,
"max": 9000,
"strand": 1,
"children": {
"66d7151100dcdf7dda491786": {
"_id": "66d7151100dcdf7dda491786",
"refSeq": "chr1",
"type": "exon",
"min": 1299,
"max": 1500,
"strand": 1,
"attributes": {
"gff_id": ["exon10002"]
}
},
"66d7151100dcdf7dda491787": {
"_id": "66d7151100dcdf7dda491787",
"refSeq": "chr1",
"type": "exon",
"min": 2999,
"max": 3902,
"strand": 1,
"attributes": {
"gff_id": ["exon10003"]
}
},
"66d7151100dcdf7dda491788": {
"_id": "66d7151100dcdf7dda491788",
"refSeq": "chr1",
"type": "exon",
"min": 4999,
"max": 5500,
"strand": 1,
"attributes": {
"gff_id": ["exon10004"]
}
},
"66d7151100dcdf7dda491789": {
"_id": "66d7151100dcdf7dda491789",
"refSeq": "chr1",
"type": "exon",
"min": 6999,
"max": 9000,
"strand": 1,
"attributes": {
"gff_id": ["exon10005"]
}
},
"66d7151100dcdf7dda49178a": {
"_id": "66d7151100dcdf7dda49178a",
"refSeq": "chr1",
"type": "CDS",
"min": 3300,
"max": 7600,
"strand": 1,
"attributes": {
"gff_id": ["cds10003"],
"gff_name": ["edenprotein.3"]
}
},
"66d7151100dcdf7dda49178b": {
"_id": "66d7151100dcdf7dda49178b",
"refSeq": "chr1",
"type": "CDS",
"min": 3390,
"max": 7600,
"strand": 1,
"attributes": {
"gff_id": ["cds10004"],
"gff_name": ["edenprotein.4"]
}
}
},
"attributes": {
"gff_id": ["mRNA10003"],
"gff_name": ["EDEN.3"]
}
}
},
"attributes": {
"gff_id": ["gene10001"],
"gff_name": ["EDEN"]
}
}
31 changes: 31 additions & 0 deletions packages/apollo-shared/test_data/example02.gff3
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
##gff-version 3
##sequence-region chr1 1000 9000
# example 2
chr1 . gene 1000 9000 . + . ID=gene10001;Name=EDEN
chr1 . mRNA 1050 9000 . + . ID=mRNA10001;Parent=gene10001;Name=EDEN.1
chr1 . mRNA 1050 9000 . + . ID=mRNA10002;Parent=gene10001;Name=EDEN.2
chr1 . mRNA 1300 9000 . + . ID=mRNA10003;Parent=gene10001;Name=EDEN.3
chr1 . exon 1050 1500 . + . ID=exon10001;Parent=mRNA10001
chr1 . exon 1050 1500 . + . ID=exon10002;Parent=mRNA10002
chr1 . exon 1300 1500 . + . ID=exon10003;Parent=mRNA10003
chr1 . exon 3000 3902 . + . ID=exon10004;Parent=mRNA10001
chr1 . exon 3000 3902 . + . ID=exon10005;Parent=mRNA10003
chr1 . exon 5000 5500 . + . ID=exon10006;Parent=mRNA10001
chr1 . exon 5000 5500 . + . ID=exon10007;Parent=mRNA10002
chr1 . exon 5000 5500 . + . ID=exon10008;Parent=mRNA10003
chr1 . exon 7000 9000 . + . ID=exon10009;Parent=mRNA10001
chr1 . exon 7000 9000 . + . ID=exon20010;Parent=mRNA10002
chr1 . exon 7000 9000 . + . ID=exon20011;Parent=mRNA10003
chr1 . CDS 1201 1500 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1
chr1 . CDS 3000 3902 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1
chr1 . CDS 5000 5500 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1
chr1 . CDS 7000 7600 . + 0 ID=cds10001;Parent=mRNA10001;Name=edenprotein.1
chr1 . CDS 1201 1500 . + 0 ID=cds10002;Parent=mRNA10002;Name=edenprotein.2
chr1 . CDS 5000 5500 . + 0 ID=cds10002;Parent=mRNA10002;Name=edenprotein.2
chr1 . CDS 7000 7600 . + 0 ID=cds10002;Parent=mRNA10002;Name=edenprotein.2
chr1 . CDS 3301 3902 . + 0 ID=cds10003;Parent=mRNA10003;Name=edenprotein.3
chr1 . CDS 5000 5500 . + 1 ID=cds10003;Parent=mRNA10003;Name=edenprotein.3
chr1 . CDS 7000 7600 . + 1 ID=cds10003;Parent=mRNA10003;Name=edenprotein.3
chr1 . CDS 3391 3902 . + 0 ID=cds10004;Parent=mRNA10003;Name=edenprotein.4
chr1 . CDS 5000 5500 . + 1 ID=cds10004;Parent=mRNA10003;Name=edenprotein.4
chr1 . CDS 7000 7600 . + 1 ID=cds10004;Parent=mRNA10003;Name=edenprotein.4
Loading

0 comments on commit aa161b6

Please sign in to comment.