Skip to content

Commit

Permalink
Temp commit
Browse files Browse the repository at this point in the history
  • Loading branch information
dariober committed Sep 2, 2024
1 parent 22db7bd commit bb62e79
Show file tree
Hide file tree
Showing 2 changed files with 176 additions and 9 deletions.
177 changes: 172 additions & 5 deletions packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
/* eslint-disable @typescript-eslint/no-unsafe-argument */
/* eslint-disable @typescript-eslint/no-unsafe-member-access */
/* eslint-disable @typescript-eslint/no-unsafe-assignment */
/* eslint-disable @typescript-eslint/no-floating-promises */
import { strict as assert } from 'node:assert'
import { describe, it } from 'node:test'
import gff from '@gmod/gff'
import gff, { GFF3Feature } from '@gmod/gff'

import { readFileSync } from 'node:fs'

import { gff3ToAnnotationFeature } from './gff3ToAnnotationFeature'
import { AnnotationFeatureSnapshot } from '@apollo-annotation/mst'
Expand All @@ -25,14 +30,92 @@ const testCases: [string, string, AnnotationFeatureSnapshot][] = [
],
]

function readSingleFeatureFile(fn: string): GFF3Feature {
const lines = readFileSync(fn).toString().split('\n')
const feature: string[] = []
for (const line of lines) {
if (!line.startsWith('#')) {
feature.push(line)
}
}
const inGff = gff.parseStringSync(feature.join('\n')) as GFF3Feature[]
if (inGff.length != 1) {
throw new Error(`Exactly 1 feature expected in file ${fn}`)
}
return inGff[0]
}

function renameIds(x: AnnotationFeatureSnapshot): AnnotationFeatureSnapshot {
const gene = JSON.parse(JSON.stringify(x))
if (gene.children) {
for (const mrnaId of Object.keys(gene.children)) {
const mrnaNewId = gene.children[mrnaId].attributes?.testid
if (!mrnaNewId) {
throw new Error('Expected to find testid')
}
const mrna = gene.children[mrnaId]
gene.children[mrnaNewId] = mrna
delete gene.children[mrnaId]
if (mrna.children) {
for (const exonId of Object.keys(mrna.children)) {
const exonNewId = mrna.children[exonId].attributes?.testid
if (!exonNewId) {
throw new Error('Expected to find testid')
}
const exon = gene.children[mrnaNewId].children[exonId]
gene.children[mrnaNewId].children[exonNewId] = exon
delete gene.children[mrnaNewId].children[exonId]
if (exon.children) {
for (const cdsId of Object.keys(exon.children)) {
const cdsNewId = exon.children[cdsId].attributes?.testid
if (!cdsNewId) {
throw new Error('Expected to find testid')
}
const cds =
gene.children[mrnaNewId].children[exonNewId].children[cdsId]
gene.children[mrnaNewId].children[exonNewId].children[cdsNewId] =
cds
delete gene.children[mrnaNewId].children[exonNewId].children[
cdsId
]
if (cds.children) {
for (const subId of Object.keys(cds.children)) {
const subNewId = cds.children[subId].attributes?.testid
if (!subNewId) {
throw new Error('Expected to find testid')
}
const sub =
gene.children[mrnaNewId].children[exonNewId].children[
cdsNewId
].children[subId]
gene.children[mrnaNewId].children[exonNewId].children[
cdsNewId
].children[subNewId] = sub
delete gene.children[mrnaNewId].children[exonNewId].children[
cdsNewId
].children[subId]
}
}
}
}
}
}
}
}
return gene as AnnotationFeatureSnapshot
}

function compareFeatures(
feature1: AnnotationFeatureSnapshot,
feature2: AnnotationFeatureSnapshot,
) {
assert.deepEqual(
{ ...feature1, _id: undefined },
{ ...feature2, _id: undefined },
)
const f1 = structuredClone(feature1)
const f2 = structuredClone(feature2)
assert.ok(f1.attributes)
assert.ok(f2.attributes)
f1.attributes.testid = undefined
f2.attributes.testid = undefined
assert.deepEqual({ ...f1, _id: undefined }, { ...f2, _id: undefined })
}

describe('gff3ToAnnotationFeature', () => {
Expand All @@ -46,4 +129,88 @@ describe('gff3ToAnnotationFeature', () => {
compareFeatures(convertedFeature, feature)
})
}

it.skip('converts gene with mRNA', () => {
const expected: AnnotationFeatureSnapshot = {
_id: '66cc92b3f580b24ed78564b5',
refSeq: 'ctgA',
type: 'gene',
min: 999,
max: 2000,
children: {
'66cc92b3f580b24ed78564b6': {
_id: '66cc92b3f580b24ed78564b6',
refSeq: 'ctgA',
type: 'mRNA',
min: 999,
max: 2000,
attributes: {
gff_source: ['example'],
gff_name: ['mrnaA'],
gff_alias: ['hga'],
},
},
},
attributes: {
gff_source: ['example'],
gff_id: ['gene01'],
gff_name: ['geneA'],
gff_alias: ['hga'],
},
}

const feature: GFF3Feature = readSingleFeatureFile(
'test_data/gene_mrna.gff3',
)
const actual = gff3ToAnnotationFeature(feature)
compareFeatures(actual, expected)
})

it.only('converts example', () => {
const _ex1 = gff3ToAnnotationFeature(
readSingleFeatureFile('test_data/example01.gff3'),
)
console.log(JSON.stringify(_ex1, null, 2))

const ex1 = renameIds(_ex1)

const _ex2 = gff3ToAnnotationFeature(
readSingleFeatureFile('test_data/example02.gff3'),
)
const ex2 = renameIds(_ex2)

const gene1 = structuredClone(ex1)
gene1.children = undefined
const gene2 = structuredClone(ex2)
gene2.children = undefined
compareFeatures(gene1, gene2)

assert.ok(ex1.children)
assert.ok(ex2.children)

assert.equal(
JSON.stringify(Object.keys(ex1.children).sort()),
JSON.stringify(['t004', 't005', 't006', 'x001']),
)
assert.equal(
JSON.stringify(Object.keys(ex2.children).sort()),
JSON.stringify(['t004', 't005', 't006']),
)

const mrna_1 = structuredClone(ex1).children?.t004
const mrna_2 = structuredClone(ex2).children?.t004

console.log(JSON.stringify(mrna_1, null, 2))

assert.ok(mrna_1?.children)
assert.ok(mrna_2?.children)

assert.equal(
Object.keys(mrna_1.children).length,
Object.keys(mrna_2.children).length,
)

compareFeatures(mrna_1.children.t007, mrna_2.children.t007)
compareFeatures(mrna_1.children.t015, mrna_2.children.t021)
})
})
8 changes: 4 additions & 4 deletions packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ function getFeatureMinMax(gff3Feature: GFF3Feature): [number, number] {

function convertFeatureAttributes(
gff3Feature: GFF3Feature,
): Record<string, string[]> | undefined {
const convertedAttributes: Record<string, string[]> = {}
): Record<string, string[] | undefined> | undefined {
const convertedAttributes: Record<string, string[] | undefined> = {}
const scores = gff3Feature
.map((f) => f.score)
.filter((score) => score !== null)
Expand Down Expand Up @@ -112,14 +112,14 @@ function convertFeatureAttributes(
convertedAttributes.gff_source = [source]
}
if (attributesCollections.length > 0) {
const newAttributes: Record<string, string[] | undefined> = {}
// const newAttributes: Record<string, string[] | undefined> = {}
for (const attributesCollection of attributesCollections) {
for (const [key, val] of Object.entries(attributesCollection)) {
if (!val || key === 'Parent') {
continue
}
const newKey = isGFFReservedAttribute(key) ? gffToInternal[key] : key
const existingVal = newAttributes[newKey]
const existingVal = convertedAttributes[newKey]
if (existingVal) {
const valSet = new Set(...existingVal, ...val)
convertedAttributes[newKey] = [...valSet]
Expand Down

0 comments on commit bb62e79

Please sign in to comment.