Skip to content

Commit

Permalink
responded to comments and removed the AnonomyousOptionalField string …
Browse files Browse the repository at this point in the history
…from GencodeGTFFeature (which had a bug with ordering before)
  • Loading branch information
jamesemery committed Jun 27, 2023
1 parent 09a2be8 commit 125aa5a
Show file tree
Hide file tree
Showing 9 changed files with 501 additions and 757 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,48 @@
* be instantiated if necessary to preserve the spec compliance of funcotator against future releases of Gencode.
*/
public class GencodeGTFFieldConstants {

Check warning on line 14 in src/main/java/org/broadinstitute/hellbender/utils/codecs/gtf/GencodeGTFFieldConstants.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/utils/codecs/gtf/GencodeGTFFieldConstants.java#L14

Added line #L14 was not covered by tests

/**
* A list of required fields from the GencodeGTF codec. These are the fields that are currently first-class members of the
* GencodeGTFFeature and are handled specially handled by the GencodeGTFCodec.
*/
public static final String GENE_ID = "gene_id";
public static final String TRANSCRIPT_ID = "transcript_id";
public static final String GENE_TYPE = "gene_type";
public static final String GENE_BIOTYPE = "gene_biotype";
public static final String GENE_STATUS = "gene_status";
public static final String GENE_NAME = "gene_name";
public static final String TRANSCRIPT_TYPE = "transcript_type";
public static final String TRANSCRIPT_BIOTYPE = "transcript_biotype";
public static final String TRANSCRIPT_STATUS = "transcript_status";
public static final String TRANSCRIPT_NAME = "transcript_name";
public static final String LEVEL = "level";

// Mandatory except in Gene and Transcript lines (not enforced by this codec)
public static final String EXON_NUMBER = "exon_number";
public static final String EXON_ID = "exon_id";

public class GencodeOptionalFields {

Check warning on line 36 in src/main/java/org/broadinstitute/hellbender/utils/codecs/gtf/GencodeGTFFieldConstants.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/utils/codecs/gtf/GencodeGTFFieldConstants.java#L36

Added line #L36 was not covered by tests
/**
* Spec optional gencode fields:
*/
public static final String TAG = "tag";
public static final String CCDSID = "ccdsid";
public static final String HAVANA_GENE = "havana_gene";
public static final String HAVANA_TRANSCRIPT = "havana_transcript";
public static final String PROTEIN_ID = "protein_id";
public static final String ONT = "ont";
public static final String TRANSCRIPT_SUPPORT_LEVEL = "transcript_support_level";
public static final String REMAP_STATUS = "remap_status";
public static final String REMAP_ORIGINAL_ID = "remap_original_id";
public static final String REMAP_ORIGINAL_LOCATION = "remap_original_location";
public static final String REMAP_NUM_MAPPINGS = "remap_num_mappings";
public static final String REMAP_TARGET_STATUS = "remap_target_status";
public static final String REMAP_SUBSTITUTED_MISSING_TARGET = "remap_substituted_missing_target";
public static final String HGNC_ID = "hgnc_id";
public static final String MGI_ID = "mgi_id";
}

/**
* Biotype / transcript type for the transcript or gene represented in a feature.
* This is a tag of some biological function associated with a feature.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ final public class GencodeGtfCodec extends AbstractGtfCodec {
/**
* Maximum version of gencode that will not generate a warning. This parser will still attempt to parse versions above this number, but a warning about potential errors will appear.
*/
private static final int GENCODE_GTF_MAX_VERSION_NUM_INCLUSIVE = 43;
private static final int GENCODE_GTF_MAX_TESTED_VERSION = 43;

private int currentLineNum = 1;
private final List<String> header = new ArrayList<>();
Expand Down Expand Up @@ -315,8 +315,8 @@ boolean validateHeader(final List<String> header, final boolean throwIfInvalid)
}
}

if (versionNumber > GENCODE_GTF_MAX_VERSION_NUM_INCLUSIVE) {
logger.warn("GENCODE GTF Header line 1 has a version number that is above maximum tested version (v " + GENCODE_GTF_MAX_VERSION_NUM_INCLUSIVE + ") (given: " +
if (versionNumber > GENCODE_GTF_MAX_TESTED_VERSION) {
logger.warn("GENCODE GTF Header line 1 has a version number that is above maximum tested version (v " + GENCODE_GTF_MAX_TESTED_VERSION + ") (given: " +

Check warning on line 319 in src/main/java/org/broadinstitute/hellbender/utils/codecs/gtf/GencodeGtfCodec.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/utils/codecs/gtf/GencodeGtfCodec.java#L319

Added line #L319 was not covered by tests
versionNumber + "): " + header.get(0) + " Continuing, but errors may occur.");
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,100 +148,53 @@ protected GencodeGtfFeature(final String[] gtfFields, final String gtfFileType)

switch (fieldName) {
// Find the right field to set:
case "gene_id":
case GencodeGTFFieldConstants.GENE_ID:
baseData.geneId = fieldValue;
break;
case "transcript_id":
case GencodeGTFFieldConstants.TRANSCRIPT_ID:
baseData.transcriptId = fieldValue;
break;
case "gene_type":
case GencodeGTFFieldConstants.GENE_TYPE:
baseData.geneType = fieldValue;
break;
// For ENSEMBL GTF files:
case "gene_biotype":
case GencodeGTFFieldConstants.GENE_BIOTYPE:
baseData.geneType = fieldValue;
break;
case "gene_status":
case GencodeGTFFieldConstants.GENE_STATUS:
baseData.geneStatus = fieldValue;
break;
case "gene_name":
case GencodeGTFFieldConstants.GENE_NAME:
baseData.geneName = fieldValue;
break;
case "transcript_type":
case GencodeGTFFieldConstants.TRANSCRIPT_TYPE:
baseData.transcriptType = fieldValue;
break;
case "transcript_biotype":
case GencodeGTFFieldConstants.TRANSCRIPT_BIOTYPE:
baseData.transcriptType = fieldValue;
break;
case "transcript_status":
case GencodeGTFFieldConstants.TRANSCRIPT_STATUS:
baseData.transcriptStatus = fieldValue;
break;
case "transcript_name":
case GencodeGTFFieldConstants.TRANSCRIPT_NAME:
baseData.transcriptName = fieldValue;
break;
case "exon_number":
case GencodeGTFFieldConstants.EXON_NUMBER:
try {
baseData.exonNumber = Integer.valueOf(fieldValue);
}
catch (final NumberFormatException ex) {
throw new UserException.MalformedFile("Could not convert field value into integer: " + fieldValue);
}
break;
case "exon_id":
case GencodeGTFFieldConstants.EXON_ID:
baseData.exonId = fieldValue;
break;
case "level":
case GencodeGTFFieldConstants.LEVEL:
baseData.locusLevel = fieldValue;
break;
case "tag":
optionalField = new OptionalField<>(fieldName, fieldValue);
break;
case "ccdsid":
optionalField = new OptionalField<>(fieldName, fieldValue);
break;
case "havana_gene":
optionalField = new OptionalField<>(fieldName, fieldValue);
break;
case "havana_transcript":
optionalField = new OptionalField<>(fieldName, fieldValue);
break;
case "protein_id":
optionalField = new OptionalField<>(fieldName, fieldValue);
break;
case "ont":
optionalField = new OptionalField<>(fieldName, fieldValue);
break;
case "transcript_support_level":
optionalField = new OptionalField<>(fieldName, fieldValue);
break;
case "remap_status":
optionalField = new OptionalField<>(fieldName, fieldValue);
break;
case "remap_original_id":
optionalField = new OptionalField<>(fieldName, fieldValue);
break;
case "remap_original_location":
try {
optionalField = new OptionalField<>(fieldName, Long.valueOf(fieldValue));
}
catch (final NumberFormatException nfe) {
// We must have gotten a field that has a different format.
// For now, just copy it over:
optionalField = new OptionalField<>(fieldName, fieldValue);
}
break;
case "remap_num_mappings":
optionalField = new OptionalField<>(fieldName, Long.valueOf(fieldValue));
break;
case "remap_target_status":
optionalField = new OptionalField<>(fieldName, fieldValue);
break;
case "remap_substituted_missing_target":
optionalField = new OptionalField<>(fieldName, fieldValue);
break;
default:
anonymousOptionalFieldBuilder.append(extraField);
anonymousOptionalFieldBuilder.append(EXTRA_FIELD_DELIMITER);
optionalField = new OptionalField<>(fieldName, fieldValue);
break;
}

Expand All @@ -250,11 +203,6 @@ protected GencodeGtfFeature(final String[] gtfFields, final String gtfFileType)
baseData.optionalFields.add(optionalField);
}
}

// Save our anonymous optional fields:
if ( anonymousOptionalFieldBuilder.length() != 0 ) {
baseData.anonymousOptionalFields = anonymousOptionalFieldBuilder.toString();
}
}

/**
Expand Down Expand Up @@ -458,10 +406,6 @@ private String serializeToStringHelper() {
baseData.optionalFields.stream().map(Object::toString).collect(Collectors.joining(" "))
);

if ( baseData.anonymousOptionalFields != null ) {
stringBuilder.append(baseData.anonymousOptionalFields);
}

return stringBuilder.toString().trim();
}

Expand Down Expand Up @@ -581,11 +525,7 @@ public List<OptionalField<?>> getOptionalFields() {
return baseData.optionalFields;
}

public String getAnonymousOptionalFields() {
return baseData.anonymousOptionalFields;
}

// Certian optional fields support multiple values, so we need to return a list of them.
// Certian optional fields support multiple values (for example "tag"), so we need to return a list of them.
public List<OptionalField<?>> getOptionalField(final String key) {
final List<OptionalField<?>> optionalFields = new ArrayList<>();
for (final OptionalField<?> optionalField : baseData.optionalFields) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,17 +83,13 @@ final public class GencodeGtfFeatureBaseData {

/**
* Optional GENCODE GTF Fields.
* This also includes unidentified GTF fields that may not be specified in the Gencode GTF specification.
* For details, see the following:
* https://www.gencodegenes.org/data_format.html
* https://www.gencodegenes.org/gencode_tags.html
*/
public List<GencodeGtfFeature.OptionalField<?>> optionalFields = new ArrayList<>();

/**
* Additional optional GTF fields.
*/
public String anonymousOptionalFields = null;

public GencodeGtfFeatureBaseData() {}

public GencodeGtfFeatureBaseData(
Expand All @@ -117,8 +113,7 @@ public GencodeGtfFeatureBaseData(
final int exonNumber,
final String exonId,
final String locusLevel,
final List<GencodeGtfFeature.OptionalField<?>> optionalFields,
final String anonymousOptionalFields
final List<GencodeGtfFeature.OptionalField<?>> optionalFields
) {
this.gtfSourceFileType = gtfSourceFileType;
this.featureOrderNumber = featureOrderNumber;
Expand All @@ -145,7 +140,6 @@ public GencodeGtfFeatureBaseData(
this.optionalFields = optionalFields;
}

this.anonymousOptionalFields = anonymousOptionalFields;
}

@Override
Expand Down Expand Up @@ -183,8 +177,7 @@ else if ( this == that ) {
Objects.equals(exonNumber, thatBaseData.exonNumber) &&
Objects.equals(exonId, thatBaseData.exonId) &&
Objects.equals(locusLevel, thatBaseData.locusLevel) &&
Objects.equals(anonymousOptionalFields, thatBaseData.anonymousOptionalFields) &&
Objects.equals(optionalFields, thatBaseData.optionalFields);
Objects.equals(optionalFields, thatBaseData.optionalFields); //TODO should equality enforce the order of the optional fields?
}

return isEqual;
Expand All @@ -211,7 +204,6 @@ public int hashCode() {
result = 31 * result + (exonId != null ? exonId.hashCode() : 0);
result = 31 * result + (locusLevel != null ? locusLevel.hashCode() : 0);
result = 31 * result + (optionalFields != null ? optionalFields.hashCode() : 0);
result = 31 * result + (anonymousOptionalFields != null ? anonymousOptionalFields.hashCode() : 0);
return result;
}
}
Loading

0 comments on commit 125aa5a

Please sign in to comment.