/// <summary> /// points to a translation that has already been created /// </summary> public static void ParseReference(ObjectValue objectValue, DataStructures.VEP.Translation translation, ImportDataStore dataStore) { // loop over all of the key/value pairs in the translation object foreach (AbstractData ad in objectValue) { if (!DumperUtilities.IsReference(ad)) { continue; } // handle each key var referenceKeyValue = ad as ReferenceKeyValue; if (referenceKeyValue == null) { continue; } switch (referenceKeyValue.Key) { case AdaptorKey: // skip this key break; case EndExonKey: translation.EndExon = Exon.ParseReference(referenceKeyValue.Value, dataStore); break; case StartExonKey: translation.StartExon = Exon.ParseReference(referenceKeyValue.Value, dataStore); break; case TranscriptKey: translation.Transcript = Transcript.ParseReference(referenceKeyValue.Value, dataStore); break; default: throw new GeneralException( $"Found an unhandled reference in the translation object: {ad.Key}"); } } }
/// <summary> /// parses the relevant data from each transcript /// </summary> public static void Parse(ObjectValue objectValue, int transcriptIndex, ImportDataStore dataStore) { // Console.WriteLine("*** Parse {0} ***", transcriptIndex + 1); var bioType = BioType.Unknown; var geneSymbolSource = GeneSymbolSource.Unknown; // HGNC SimpleInterval[] microRnas = null; DataStructures.VEP.Exon[] transExons = null; DataStructures.VEP.Gene gene = null; DataStructures.VEP.Translation translation = null; DataStructures.VEP.VariantEffectFeatureCache variantEffectCache = null; DataStructures.VEP.Slice slice = null; bool onReverseStrand = false; bool isCanonical = false; int compDnaCodingStart = -1; int compDnaCodingEnd = -1; int start = -1; int end = -1; byte version = 1; string ccdsId = null; string databaseId = null; string proteinId = null; string refSeqId = null; string geneStableId = null; string stableId = null; string geneSymbol = null; // DDX11L1 int hgncId = -1; // 37102 // loop over all of the key/value pairs in the transcript object foreach (AbstractData ad in objectValue) { // sanity check: make sure we know about the keys are used for if (!KnownKeys.Contains(ad.Key)) { throw new GeneralException($"Encountered an unknown key in the dumper transcript object: {ad.Key}"); } // handle each key switch (ad.Key) { case CodingRegionStartKey: case CodingRegionEndKey: case CreatedDateKey: case DescriptionKey: case DisplayXrefKey: case ExternalDbKey: case ExternalDisplayNameKey: case ExternalNameKey: case ExternalStatusKey: case GenePhenotypeKey: case ModifiedDateKey: case SourceKey: case SwissProtKey: case TremblKey: case UniParcKey: // not used break; case AttributesKey: var attributesList = ad as ListObjectKeyValue; if (attributesList != null) { microRnas = Attribute.ParseList(attributesList.Values); } break; case BiotypeKey: bioType = TranscriptUtilities.GetBiotype(ad); break; case CcdsKey: ccdsId = DumperUtilities.GetString(ad); if (ccdsId == "-" || ccdsId == "") { ccdsId = null; } break; case CdnaCodingEndKey: compDnaCodingEnd = DumperUtilities.GetInt32(ad); break; case CdnaCodingStartKey: compDnaCodingStart = DumperUtilities.GetInt32(ad); break; case DbIdKey: databaseId = DumperUtilities.GetString(ad); if (databaseId == "-" || databaseId == "") { databaseId = null; } break; case EndKey: end = DumperUtilities.GetInt32(ad); break; case GeneHgncIdKey: var hgnc = DumperUtilities.GetString(ad); if (hgnc != null && hgnc.StartsWith("HGNC:")) { hgnc = hgnc.Substring(5); } if (hgnc == "-" || hgnc == "") { hgnc = null; } if (hgnc != null) { hgncId = int.Parse(hgnc); } break; case GeneSymbolKey: case GeneHgncKey: // older key geneSymbol = DumperUtilities.GetString(ad); if (geneSymbol == "-" || geneSymbol == "") { geneSymbol = null; } break; case GeneSymbolSourceKey: geneSymbolSource = TranscriptUtilities.GetGeneSymbolSource(ad); break; case GeneKey: var geneNode = ad as ObjectKeyValue; if (geneNode != null) { gene = Gene.Parse(geneNode.Value, dataStore.CurrentReferenceIndex); } break; case GeneStableIdKey: geneStableId = DumperUtilities.GetString(ad); if (geneStableId == "-" || geneStableId == "") { geneStableId = null; } break; case IsCanonicalKey: isCanonical = DumperUtilities.GetBool(ad); break; case ProteinKey: proteinId = DumperUtilities.GetString(ad); if (proteinId == "-" || proteinId == "") { proteinId = null; } break; case RefseqKey: refSeqId = DumperUtilities.GetString(ad); if (refSeqId == "-" || refSeqId == "") { refSeqId = null; } break; case SliceKey: var sliceNode = ad as ObjectKeyValue; if (sliceNode != null) { slice = Slice.Parse(sliceNode.Value, dataStore.CurrentReferenceIndex); } break; case StableIdKey: stableId = DumperUtilities.GetString(ad); if (stableId == "-" || stableId == "") { stableId = null; } break; case StartKey: start = DumperUtilities.GetInt32(ad); break; case StrandKey: onReverseStrand = TranscriptUtilities.GetStrand(ad); break; case TransExonArrayKey: var exonsList = ad as ListObjectKeyValue; if (exonsList != null) { transExons = Exon.ParseList(exonsList.Values, dataStore); } else { throw new GeneralException($"Could not transform the AbstractData object into a ListObjectKeyValue: [{ad.GetType()}]"); } break; case TranslationKey: var translationNode = ad as ObjectKeyValue; if (translationNode != null) { translation = Translation.Parse(translationNode.Value, dataStore); } else if (DumperUtilities.IsUndefined(ad)) { translation = null; } else { throw new GeneralException($"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]"); } break; case VariationEffectFeatureCacheKey: var cacheNode = ad as ObjectKeyValue; if (cacheNode == null) { throw new GeneralException($"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]"); } variantEffectCache = VariantEffectFeatureCache.Parse(cacheNode.Value, dataStore); break; case VersionKey: version = (byte)DumperUtilities.GetInt32(ad); break; default: throw new GeneralException($"Unknown key found: {ad.Key}"); } } dataStore.Transcripts.Add(new DataStructures.VEP.Transcript(bioType, transExons, gene, translation, variantEffectCache, slice, onReverseStrand, isCanonical, compDnaCodingStart, compDnaCodingEnd, dataStore.CurrentReferenceIndex, start, end, ccdsId, databaseId, proteinId, refSeqId, geneStableId, stableId, geneSymbol, geneSymbolSource, hgncId, version, microRnas)); }
/// <summary> /// parses the relevant data from each translation object /// </summary> public static DataStructures.VEP.Translation Parse(ObjectValue objectValue, ImportDataStore dataStore) { var translation = new DataStructures.VEP.Translation(); // loop over all of the key/value pairs in the translation object foreach (AbstractData ad in objectValue) { // sanity check: make sure we know about the keys are used for if (!KnownKeys.Contains(ad.Key)) { throw new GeneralException($"Encountered an unknown key in the dumper mapper object: {ad.Key}"); } // handle each key ObjectKeyValue exonNode; switch (ad.Key) { case AdaptorKey: case SequenceKey: case Transcript.DbIdKey: case Transcript.StableIdKey: // skip this key break; case EndExonKey: exonNode = ad as ObjectKeyValue; if (exonNode != null) { var newExon = Exon.Parse(exonNode.Value, dataStore.CurrentReferenceIndex); translation.EndExon = newExon; } break; case StartExonKey: exonNode = ad as ObjectKeyValue; if (exonNode != null) { var newExon = Exon.Parse(exonNode.Value, dataStore.CurrentReferenceIndex); translation.StartExon = newExon; } break; case TranscriptKey: // parse this during the references if (!DumperUtilities.IsReference(ad)) { throw new GeneralException("Found a Translation->Transcript entry that wasn't a reference."); } break; case Transcript.EndKey: translation.End = DumperUtilities.GetInt32(ad); break; case Transcript.StartKey: translation.Start = DumperUtilities.GetInt32(ad); break; case Transcript.VersionKey: translation.Version = (byte)DumperUtilities.GetInt32(ad); break; default: throw new GeneralException($"Unknown key found: {ad.Key}"); } } return(translation); }