/// <summary> /// parses the relevant data from each intron object /// </summary> private static void ParseReference(ObjectValue objectValue, DataStructures.VEP.Intron intron, ImportDataStore dataStore) { // loop over all of the key/value pairs in the intron object foreach (AbstractData ad in objectValue) { // skip normal entries if (!DumperUtilities.IsReference(ad)) { continue; } // handle each key switch (ad.Key) { case Transcript.SliceKey: var referenceKeyValue = ad as ReferenceKeyValue; if (referenceKeyValue != null) { intron.Slice = Slice.ParseReference(referenceKeyValue.Value, dataStore); } break; default: throw new GeneralException($"Found an unhandled reference in the intron object: {ad.Key}"); } } }
/// <summary> /// parses the relevant data from each protein function prediction object /// </summary> public static void ParseReference(ObjectValue objectValue, DataStructures.VEP.ProteinFunctionPredictions cache, ImportDataStore dataStore) { // loop over all of the key/value pairs in the cache object foreach (AbstractData ad in objectValue) { if (!DumperUtilities.IsReference(ad)) { continue; } // handle each key var referenceKeyValue = ad as ReferenceKeyValue; if (referenceKeyValue == null) { continue; } switch (referenceKeyValue.Key) { case PolyPhenHumVarKey: cache.PolyPhen = PolyPhen.ParseReference(referenceKeyValue.Value, dataStore); break; case SiftKey: cache.Sift = Sift.ParseReference(referenceKeyValue.Value, dataStore); break; default: throw new GeneralException( $"Found an unhandled reference in the protein function prediction object: {ad.Key}"); } } }
/// <summary> /// parses the relevant data from each PolyPhen object /// </summary> public static DataStructures.VEP.PolyPhen Parse(ObjectValue objectValue) { string matrix = null; // loop over all of the key/value pairs in the PolyPhen object foreach (AbstractData ad in objectValue) { // sanity check: make sure we know about the keys are used for if (!KnownKeys.Contains(ad.Key)) { throw new GeneralException($"Encountered an unknown key in the dumper PolyPhen object: {ad.Key}"); } // handle each key switch (ad.Key) { case AnalysisKey: case IsMatrixCompressedKey: case PeptideLengthKey: case SubAnalysisKey: case TranslationMD5Key: break; case MatrixKey: matrix = DumperUtilities.GetString(ad); break; default: throw new GeneralException($"Unknown key found: {ad.Key}"); } } return(new DataStructures.VEP.PolyPhen(matrix)); }
/// <summary> /// parses the relevant data from each regulatory element /// </summary> public static void Parse(ObjectValue objectValue, int regulatoryFeatureIndex, ImportDataStore dataStore) { // Console.WriteLine("*** Parse {0} ***", regulatoryFeatureIndex + 1); int start = -1; int end = -1; string stableId = null; string type = null; // loop over all of the key/value pairs in the transcript object foreach (AbstractData ad in objectValue) { // sanity check: make sure we know about the keys are used for if (!KnownKeys.Contains(ad.Key)) { throw new GeneralException( $"Encountered an unknown key in the dumper regulatory element object: {ad.Key}"); } // handle each key switch (ad.Key) { case BoundLengthsKey: case CellTypeCountKey: case CellTypesKey: case DbIdKey: case DisplayLabelKey: case HasEvidenceKey: case ProjectedKey: case SetKey: case Transcript.StrandKey: case Transcript.SliceKey: // not used break; case FeatureTypeKey: type = DumperUtilities.GetString(ad); break; case Transcript.EndKey: end = DumperUtilities.GetInt32(ad); break; case Transcript.StableIdKey: stableId = DumperUtilities.GetString(ad); break; case Transcript.StartKey: start = DumperUtilities.GetInt32(ad); break; default: throw new GeneralException($"Unknown key found: {ad.Key}"); } } dataStore.RegulatoryFeatures.Add(new DataStructures.VEP.RegulatoryFeature(dataStore.CurrentReferenceIndex, start, end, stableId, type)); }
/// <summary> /// returns a new exon given an ObjectValue /// </summary> public static DataStructures.VEP.Exon Parse(ObjectValue objectValue, ushort currentReferenceIndex) { bool onReverseStrand = false; int end = -1; byte?phase = null; int start = -1; string stableId = null; // loop over all of the key/value pairs in the exon object foreach (AbstractData ad in objectValue) { // sanity check: make sure we know about the keys are used for if (!KnownKeys.Contains(ad.Key)) { throw new GeneralException($"Encountered an unknown key in the dumper mapper object: {ad.Key}"); } // handle each key switch (ad.Key) { case Transcript.EndKey: end = DumperUtilities.GetInt32(ad); break; case EndPhaseKey: break; case PhaseKey: int phaseInt = DumperUtilities.GetInt32(ad); if (phaseInt != -1) { phase = (byte)phaseInt; } break; case Transcript.StableIdKey: stableId = DumperUtilities.GetString(ad); break; case Transcript.StartKey: start = DumperUtilities.GetInt32(ad); break; case Transcript.StrandKey: onReverseStrand = TranscriptUtilities.GetStrand(ad); break; default: throw new GeneralException($"Unknown key found: {ad.Key}"); } } return(new DataStructures.VEP.Exon(currentReferenceIndex, start, end, stableId, onReverseStrand, phase)); }
/// <summary> /// parses the relevant data from each attribute /// </summary> private static SimpleInterval Parse(ObjectValue objectValue) { string key = null; string value = null; // loop over all of the key/value pairs in the gene object foreach (AbstractData ad in objectValue) { // sanity check: make sure we know about the keys are used for if (!KnownKeys.Contains(ad.Key)) { throw new GeneralException( $"Encountered an unknown key in the dumper attribute object: {ad.Key}"); } // handle each key switch (ad.Key) { case NameKey: case DescriptionKey: // not used break; case CodeKey: key = DumperUtilities.GetString(ad); break; case ValueKey: value = DumperUtilities.GetString(ad); break; default: throw new GeneralException($"Unknown key found: {ad.Key}"); } } // sanity check: make sure this is a miRNA object if (key != "miRNA" || value == null) { // Console.WriteLine("DEBUG: Found alternate attribute: {0}", key); return(null); } var rangeMatch = RangeRegex.Match(value); if (!rangeMatch.Success) { throw new GeneralException("Unable to convert the Attribute to a miRNA object. The value string failed the regex: " + value); } int start = int.Parse(rangeMatch.Groups[1].Value); int end = int.Parse(rangeMatch.Groups[2].Value); return(new SimpleInterval(start, end)); }
/// <summary> /// parses the relevant data from each mapper pairs object /// </summary> public static List <DataStructures.VEP.MapperPair> ParseList(List <AbstractData> abstractDataList, ImportDataStore dataStore) { var mapperPairs = DumperUtilities.GetPopulatedList <DataStructures.VEP.MapperPair>(abstractDataList.Count); // loop over all of the key/value pairs in the mapper pairs object for (int mapperPairIndex = 0; mapperPairIndex < abstractDataList.Count; mapperPairIndex++) { var ad = abstractDataList[mapperPairIndex]; // skip references if (DumperUtilities.IsReference(ad)) { continue; } if (ad.DataType != DataType) { throw new GeneralException( $"Expected a mapper pair data type, but found the following data type: [{ad.DataType}]"); } var mapperPairNode = ad as ObjectValue; if (mapperPairNode == null) { throw new GeneralException( $"Could not transform the AbstractData object into an ObjectValue: [{ad.GetType()}]"); } var newMapperPair = Parse(mapperPairNode, dataStore.CurrentReferenceIndex); // DS.VEP.MapperPair oldMapperPair; // if (dataStore.MapperPairs.TryGetValue(newMapperPair, out oldMapperPair)) //{ // mapperPairs[mapperPairIndex] = oldMapperPair; //} // else //{ mapperPairs[mapperPairIndex] = newMapperPair; // dataStore.MapperPairs[newMapperPair] = newMapperPair; //} } return(mapperPairs); }
/// <summary> /// places a reference to already existing exons into the array of exons /// </summary> public static void ParseListReference(List <AbstractData> abstractDataList, DataStructures.VEP.Exon[] exons, ImportDataStore dataStore) { // loop over all of the exons for (int exonIndex = 0; exonIndex < abstractDataList.Count; exonIndex++) { var exonNode = abstractDataList[exonIndex]; // skip normal exons if (!DumperUtilities.IsReference(exonNode)) { continue; } var referenceStringValue = exonNode as ReferenceStringValue; if (referenceStringValue != null) { exons[exonIndex] = ParseReference(referenceStringValue.Value, dataStore); } } }
/// <summary> /// returns an array of miRNAs given a list of ObjectValues (AbstractData) /// </summary> public static SimpleInterval[] ParseList(List <AbstractData> abstractDataList) { var microRnas = new List <SimpleInterval>(); foreach (var ad in abstractDataList) { // skip references if (DumperUtilities.IsReference(ad)) { continue; } var objectValue = ad as ObjectValue; if (objectValue != null) { var newMicroRna = Parse(objectValue); if (newMicroRna != null) { // MicroRna oldMicroRna; // if (dataStore.MicroRnas.TryGetValue(newMicroRna, out oldMicroRna)) //{ // microRnas.Add(oldMicroRna); //} // else //{ microRnas.Add(newMicroRna); // dataStore.MicroRnas[newMicroRna] = newMicroRna; //} } } else { throw new GeneralException( $"Could not transform the AbstractData object into an ObjectValue: [{ad.GetType()}]"); } } return(microRnas.Count == 0 ? null : microRnas.ToArray()); }
/// <summary> /// points to a translation that has already been created /// </summary> public static void ParseReference(ObjectValue objectValue, DataStructures.VEP.Translation translation, ImportDataStore dataStore) { // loop over all of the key/value pairs in the translation object foreach (AbstractData ad in objectValue) { if (!DumperUtilities.IsReference(ad)) { continue; } // handle each key var referenceKeyValue = ad as ReferenceKeyValue; if (referenceKeyValue == null) { continue; } switch (referenceKeyValue.Key) { case AdaptorKey: // skip this key break; case EndExonKey: translation.EndExon = Exon.ParseReference(referenceKeyValue.Value, dataStore); break; case StartExonKey: translation.StartExon = Exon.ParseReference(referenceKeyValue.Value, dataStore); break; case TranscriptKey: translation.Transcript = Transcript.ParseReference(referenceKeyValue.Value, dataStore); break; default: throw new GeneralException( $"Found an unhandled reference in the translation object: {ad.Key}"); } } }
/// <summary> /// parses the relevant data from each mapper pairs object /// </summary> public static void ParseListReference(List <AbstractData> abstractDataList, List <DataStructures.VEP.MapperPair> mapperPairs, ImportDataStore dataStore) { // loop over all of the key/value pairs in the mapper pairs object for (int mapperPairIndex = 0; mapperPairIndex < abstractDataList.Count; mapperPairIndex++) { var mapperNode = abstractDataList[mapperPairIndex]; // skip normal mapper pairs if (!DumperUtilities.IsReference(mapperNode)) { continue; } var referenceStringValue = mapperNode as ReferenceStringValue; if (referenceStringValue != null) { var mapperPair = ParseReference(referenceStringValue.Value, dataStore); mapperPairs[mapperPairIndex] = mapperPair; } } }
/// <summary> /// parses the relevant data from each pair genomic object /// </summary> public static DataStructures.VEP.PairGenomic Parse(ObjectValue objectValue, ImportDataStore dataStore) { var pairGenomic = new DataStructures.VEP.PairGenomic(); // loop over all of the key/value pairs in the pair genomic object foreach (AbstractData ad in objectValue) { // sanity check: make sure we know about the keys are used for if (!KnownKeys.Contains(ad.Key)) { throw new GeneralException($"Encountered an unknown key in the pair genomic object: {ad.Key}"); } // handle each key switch (ad.Key) { case GenomicKey: var genomicNode = ad as ListObjectKeyValue; if (genomicNode != null) { pairGenomic.Genomic = MapperPair.ParseList(genomicNode.Values, dataStore); } else if (DumperUtilities.IsUndefined(ad)) { pairGenomic.Genomic = null; } else { throw new GeneralException( $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]"); } break; default: throw new GeneralException($"Unknown key found: {ad.Key}"); } } return(pairGenomic); }
/// <summary> /// returns an array of exons given a list of ObjectValues (AbstractData) /// </summary> public static DataStructures.VEP.Exon[] ParseList(List <AbstractData> abstractDataList, ImportDataStore dataStore) { var exons = new DataStructures.VEP.Exon[abstractDataList.Count]; // loop over all of the exons for (int exonIndex = 0; exonIndex < abstractDataList.Count; exonIndex++) { // skip references if (DumperUtilities.IsReference(abstractDataList[exonIndex])) { continue; } var objectValue = abstractDataList[exonIndex] as ObjectValue; if (objectValue != null) { var newExon = Parse(objectValue, dataStore.CurrentReferenceIndex); // DS.VEP.Exon oldExon; // if (dataStore.Exons.TryGetValue(newExon, out oldExon)) //{ // exons[exonIndex] = oldExon; //} // else //{ exons[exonIndex] = newExon; // dataStore.Exons[newExon] = newExon; //} } else { throw new GeneralException( $"Could not transform the AbstractData object into an ObjectValue: [{abstractDataList[exonIndex].GetType()}]"); } } return(exons); }
/// <summary> /// parses the relevant data from each mapper unit object /// </summary> public static DataStructures.VEP.MapperUnit Parse(ObjectValue objectValue, ushort currentReferenceIndex) { var id = MapperUnitType.Unknown; int end = -1; int start = -1; // loop over all of the key/value pairs in the mapper unit object foreach (AbstractData ad in objectValue) { // sanity check: make sure we know about the keys are used for if (!KnownKeys.Contains(ad.Key)) { throw new GeneralException($"Encountered an unknown key in the mapper unit object: {ad.Key}"); } // handle each key switch (ad.Key) { case Transcript.EndKey: end = DumperUtilities.GetInt32(ad); break; case IdKey: id = TranscriptUtilities.GetMapperUnitType(ad); break; case Transcript.StartKey: start = DumperUtilities.GetInt32(ad); break; default: throw new GeneralException($"Unknown key found: {ad.Key}"); } } return(new DataStructures.VEP.MapperUnit(currentReferenceIndex, start, end, id)); }
/// <summary> /// returns a new exon given an ObjectValue /// </summary> private static DataStructures.VEP.Intron Parse(ObjectValue objectValue, ImportDataStore dataStore) { var intron = new DataStructures.VEP.Intron(); // loop over all of the key/value pairs in the intron object foreach (AbstractData ad in objectValue) { // sanity check: make sure we know about the keys are used for if (!KnownKeys.Contains(ad.Key)) { throw new GeneralException($"Encountered an unknown key in the dumper mapper object: {ad.Key}"); } // handle each key switch (ad.Key) { case Transcript.EndKey: intron.End = DumperUtilities.GetInt32(ad); break; case Transcript.SliceKey: var sliceNode = ad as ObjectKeyValue; if (sliceNode != null) { var newSlice = Slice.Parse(sliceNode.Value, dataStore.CurrentReferenceIndex); // DS.VEP.Slice oldSlice; // if (dataStore.Slices.TryGetValue(newSlice, out oldSlice)) //{ // intron.Slice = oldSlice; //} // else //{ intron.Slice = newSlice; // dataStore.Slices[newSlice] = newSlice; //} } else if (DumperUtilities.IsReference(ad)) { // skip references until the second pass } else { throw new GeneralException( $"Could not transform the AbstractData object into an ObjectKeyValue or ReferenceKeyValue: [{ad.GetType()}]"); } break; case Transcript.StartKey: intron.Start = DumperUtilities.GetInt32(ad); break; case Transcript.StrandKey: TranscriptUtilities.GetStrand(ad); break; default: throw new GeneralException($"Unknown key found: {ad.Key}"); } } return(intron); }
/// <summary> /// parses the relevant data from each exon coordinate mapper object /// </summary> public static DataStructures.VEP.Mapper Parse(ObjectValue objectValue, ImportDataStore dataStore) { var mapper = new DataStructures.VEP.Mapper(); // loop over all of the key/value pairs in the exon coordinate mapper object foreach (AbstractData ad in objectValue) { // sanity check: make sure we know about the keys are used for if (!KnownKeys.Contains(ad.Key)) { throw new GeneralException($"Encountered an unknown key in the dumper mapper object: {ad.Key}"); } // handle each key switch (ad.Key) { case FromCoordSystemKey: if (!DumperUtilities.IsUndefined(ad)) { throw new GeneralException("Found an unexpected value in FromCoordSystemKey"); } break; case FromNameKey: mapper.FromType = DumperUtilities.GetString(ad); break; case IsSortedKey: mapper.IsSorted = DumperUtilities.GetBool(ad); break; case PairCodingDnaKey: var pairCodingDnaNode = ad as ObjectKeyValue; if (pairCodingDnaNode != null) { mapper.PairCodingDna = PairCodingDna.Parse(pairCodingDnaNode.Value, dataStore); } else if (DumperUtilities.IsUndefined(ad)) { mapper.PairCodingDna = null; } else { throw new GeneralException( $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]"); } break; case PairCountKey: mapper.PairCount = DumperUtilities.GetInt32(ad); break; case PairGenomicKey: var pairGenomicNode = ad as ObjectKeyValue; if (pairGenomicNode != null) { mapper.PairGenomic = PairGenomic.Parse(pairGenomicNode.Value, dataStore); } else if (DumperUtilities.IsUndefined(ad)) { mapper.PairGenomic = null; } else { throw new GeneralException( $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]"); } break; case ToCoordSystemKey: if (!DumperUtilities.IsUndefined(ad)) { throw new GeneralException("Found an unexpected value in ToCoordSystemKey"); } break; case ToNameKey: mapper.ToType = DumperUtilities.GetString(ad); break; default: throw new GeneralException($"Unknown key found: {ad.Key}"); } } return(mapper); }
/// <summary> /// parses the relevant data from each variant effect feature cache /// </summary> public static DataStructures.VEP.VariantEffectFeatureCache Parse(ObjectValue objectValue, ImportDataStore dataStore) { var cache = new DataStructures.VEP.VariantEffectFeatureCache(); // loop over all of the key/value pairs in the cache object foreach (AbstractData ad in objectValue) { // sanity check: make sure we know about the keys are used for if (!KnownKeys.Contains(ad.Key)) { throw new GeneralException( $"Encountered an unknown key in the dumper variant effect feature cache object: {ad.Key}"); } // handle each key switch (ad.Key) { case SelenocysteinesKey: case ThreePrimeUtrKey: case SeqEditsKey: case CodonTableKey: case ProteinFeaturesKey: // not used break; case IntronsKey: var intronsList = ad as ListObjectKeyValue; if (intronsList != null) { cache.Introns = Intron.ParseList(intronsList.Values, dataStore); } else if (DumperUtilities.IsUndefined(ad)) { cache.Introns = null; } else { throw new GeneralException( $"Could not transform the AbstractData object into a ListObjectKeyValue: [{ad.GetType()}]"); } break; case MapperKey: var mapperNode = ad as ObjectKeyValue; if (mapperNode != null) { cache.Mapper = TranscriptMapper.Parse(mapperNode.Value, dataStore); } else { throw new GeneralException( $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]"); } break; case PeptideKey: cache.Peptide = DumperUtilities.GetString(ad); break; case ProteinFunctionPredictionsKey: var predictionsNode = ad as ObjectKeyValue; if (predictionsNode != null) { cache.ProteinFunctionPredictions = ProteinFunctionPredictions.Parse(predictionsNode.Value); } else { throw new GeneralException( $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]"); } break; case SortedExonsKey: var exonsList = ad as ListObjectKeyValue; if (exonsList != null) { cache.Exons = Exon.ParseList(exonsList.Values, dataStore); } else { throw new GeneralException($"Could not transform the AbstractData object into a ListObjectKeyValue: [{ad.GetType()}]"); } break; case TranslateableSeqKey: cache.TranslateableSeq = DumperUtilities.GetString(ad); break; default: throw new GeneralException($"Unknown key found: {ad.Key}"); } } return(cache); }
/// <summary> /// parses the relevant data from each transcript /// </summary> public static void Parse(ObjectValue objectValue, int transcriptIndex, ImportDataStore dataStore) { // Console.WriteLine("*** Parse {0} ***", transcriptIndex + 1); var bioType = BioType.Unknown; var geneSymbolSource = GeneSymbolSource.Unknown; // HGNC SimpleInterval[] microRnas = null; DataStructures.VEP.Exon[] transExons = null; DataStructures.VEP.Gene gene = null; DataStructures.VEP.Translation translation = null; DataStructures.VEP.VariantEffectFeatureCache variantEffectCache = null; DataStructures.VEP.Slice slice = null; bool onReverseStrand = false; bool isCanonical = false; int compDnaCodingStart = -1; int compDnaCodingEnd = -1; int start = -1; int end = -1; byte version = 1; string ccdsId = null; string databaseId = null; string proteinId = null; string refSeqId = null; string geneStableId = null; string stableId = null; string geneSymbol = null; // DDX11L1 int hgncId = -1; // 37102 // loop over all of the key/value pairs in the transcript object foreach (AbstractData ad in objectValue) { // sanity check: make sure we know about the keys are used for if (!KnownKeys.Contains(ad.Key)) { throw new GeneralException($"Encountered an unknown key in the dumper transcript object: {ad.Key}"); } // handle each key switch (ad.Key) { case CodingRegionStartKey: case CodingRegionEndKey: case CreatedDateKey: case DescriptionKey: case DisplayXrefKey: case ExternalDbKey: case ExternalDisplayNameKey: case ExternalNameKey: case ExternalStatusKey: case GenePhenotypeKey: case ModifiedDateKey: case SourceKey: case SwissProtKey: case TremblKey: case UniParcKey: // not used break; case AttributesKey: var attributesList = ad as ListObjectKeyValue; if (attributesList != null) { microRnas = Attribute.ParseList(attributesList.Values); } break; case BiotypeKey: bioType = TranscriptUtilities.GetBiotype(ad); break; case CcdsKey: ccdsId = DumperUtilities.GetString(ad); if (ccdsId == "-" || ccdsId == "") { ccdsId = null; } break; case CdnaCodingEndKey: compDnaCodingEnd = DumperUtilities.GetInt32(ad); break; case CdnaCodingStartKey: compDnaCodingStart = DumperUtilities.GetInt32(ad); break; case DbIdKey: databaseId = DumperUtilities.GetString(ad); if (databaseId == "-" || databaseId == "") { databaseId = null; } break; case EndKey: end = DumperUtilities.GetInt32(ad); break; case GeneHgncIdKey: var hgnc = DumperUtilities.GetString(ad); if (hgnc != null && hgnc.StartsWith("HGNC:")) { hgnc = hgnc.Substring(5); } if (hgnc == "-" || hgnc == "") { hgnc = null; } if (hgnc != null) { hgncId = int.Parse(hgnc); } break; case GeneSymbolKey: case GeneHgncKey: // older key geneSymbol = DumperUtilities.GetString(ad); if (geneSymbol == "-" || geneSymbol == "") { geneSymbol = null; } break; case GeneSymbolSourceKey: geneSymbolSource = TranscriptUtilities.GetGeneSymbolSource(ad); break; case GeneKey: var geneNode = ad as ObjectKeyValue; if (geneNode != null) { gene = Gene.Parse(geneNode.Value, dataStore.CurrentReferenceIndex); } break; case GeneStableIdKey: geneStableId = DumperUtilities.GetString(ad); if (geneStableId == "-" || geneStableId == "") { geneStableId = null; } break; case IsCanonicalKey: isCanonical = DumperUtilities.GetBool(ad); break; case ProteinKey: proteinId = DumperUtilities.GetString(ad); if (proteinId == "-" || proteinId == "") { proteinId = null; } break; case RefseqKey: refSeqId = DumperUtilities.GetString(ad); if (refSeqId == "-" || refSeqId == "") { refSeqId = null; } break; case SliceKey: var sliceNode = ad as ObjectKeyValue; if (sliceNode != null) { slice = Slice.Parse(sliceNode.Value, dataStore.CurrentReferenceIndex); } break; case StableIdKey: stableId = DumperUtilities.GetString(ad); if (stableId == "-" || stableId == "") { stableId = null; } break; case StartKey: start = DumperUtilities.GetInt32(ad); break; case StrandKey: onReverseStrand = TranscriptUtilities.GetStrand(ad); break; case TransExonArrayKey: var exonsList = ad as ListObjectKeyValue; if (exonsList != null) { transExons = Exon.ParseList(exonsList.Values, dataStore); } else { throw new GeneralException($"Could not transform the AbstractData object into a ListObjectKeyValue: [{ad.GetType()}]"); } break; case TranslationKey: var translationNode = ad as ObjectKeyValue; if (translationNode != null) { translation = Translation.Parse(translationNode.Value, dataStore); } else if (DumperUtilities.IsUndefined(ad)) { translation = null; } else { throw new GeneralException($"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]"); } break; case VariationEffectFeatureCacheKey: var cacheNode = ad as ObjectKeyValue; if (cacheNode == null) { throw new GeneralException($"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]"); } variantEffectCache = VariantEffectFeatureCache.Parse(cacheNode.Value, dataStore); break; case VersionKey: version = (byte)DumperUtilities.GetInt32(ad); break; default: throw new GeneralException($"Unknown key found: {ad.Key}"); } } dataStore.Transcripts.Add(new DataStructures.VEP.Transcript(bioType, transExons, gene, translation, variantEffectCache, slice, onReverseStrand, isCanonical, compDnaCodingStart, compDnaCodingEnd, dataStore.CurrentReferenceIndex, start, end, ccdsId, databaseId, proteinId, refSeqId, geneStableId, stableId, geneSymbol, geneSymbolSource, hgncId, version, microRnas)); }
/// <summary> /// parses the relevant data from each transcript /// </summary> public static void ParseReferences(ObjectValue objectValue, int transcriptIndex, ImportDataStore dataStore) { // Console.WriteLine("*** ParseReferences {0} / {1} ***", transcriptIndex + 1, _tempTranscripts.Count); var transcript = dataStore.Transcripts[transcriptIndex]; // loop over all of the key/value pairs in the transcript object foreach (AbstractData ad in objectValue) { // skip undefined keys if (DumperUtilities.IsUndefined(ad)) { continue; } // handle each key ReferenceKeyValue referenceKeyValue; // references found in: // 'transcript' -> '_variation_effect_feature_cache' -> 'introns' -> 'slice' has references // 'transcript' -> 'gene' has references // 'transcript' -> 'slice' has references // 'transcript' -> '_trans_exon_array' -> [] has references // 'transcript' -> 'translation'-> 'end_exon' has references // 'transcript' -> 'translation'-> 'start_exon' has references // 'transcript' -> 'translation'-> 'transcript' has references switch (ad.Key) { case GeneKey: // works well if (DumperUtilities.IsReference(ad)) { referenceKeyValue = ad as ReferenceKeyValue; if (referenceKeyValue != null) { transcript.Gene = Gene.ParseReference(referenceKeyValue.Value, dataStore); } } break; case SliceKey: if (DumperUtilities.IsReference(ad)) { referenceKeyValue = ad as ReferenceKeyValue; if (referenceKeyValue != null) { transcript.Slice = Slice.ParseReference(referenceKeyValue.Value, dataStore); } } break; case TransExonArrayKey: var exonsList = ad as ListObjectKeyValue; if (exonsList != null) { Exon.ParseListReference(exonsList.Values, transcript.TransExons, dataStore); } break; case TranslationKey: var translationNode = ad as ObjectKeyValue; if (translationNode != null) { Translation.ParseReference(translationNode.Value, transcript.Translation, dataStore); } break; case VariationEffectFeatureCacheKey: var cacheNode = ad as ObjectKeyValue; if (cacheNode != null) { VariantEffectFeatureCache.ParseReference(cacheNode.Value, transcript.VariantEffectCache, dataStore); } break; } } }
/// <summary> /// parses the relevant data from each slice /// </summary> public static DataStructures.VEP.Slice Parse(ObjectValue objectValue, ushort currentReferenceIndex) { DataStructures.VEP.CoordSystem coordinateSystem = null; bool isCircular = false; bool isTopLevel = false; bool onReverseStrand = false; int start = -1; int end = -1; int sequenceRegionLen = -1; string sequenceRegionName = null; // loop over all of the key/value pairs in the gene object foreach (AbstractData ad in objectValue) { // sanity check: make sure we know about the keys are used for if (!KnownKeys.Contains(ad.Key)) { throw new GeneralException($"Encountered an unknown key in the dumper slice object: {ad.Key}"); } // handle each key switch (ad.Key) { case CoordSystemKey: var coordSystemNode = ad as ObjectKeyValue; if (coordSystemNode != null) { coordinateSystem = CoordSystem.Parse(coordSystemNode.Value); } break; case CircularKey: isCircular = DumperUtilities.GetBool(ad); break; case Transcript.EndKey: end = DumperUtilities.GetInt32(ad); break; case SequenceRegionLenKey: sequenceRegionLen = DumperUtilities.GetInt32(ad); break; case SequenceRegionNameKey: sequenceRegionName = DumperUtilities.GetString(ad); break; case Transcript.StartKey: start = DumperUtilities.GetInt32(ad); break; case Transcript.StrandKey: onReverseStrand = TranscriptUtilities.GetStrand(ad); break; case TopLevelSliceKey: isTopLevel = DumperUtilities.GetBool(ad); break; default: throw new GeneralException($"Unknown key found: {ad.Key}"); } } return(new DataStructures.VEP.Slice(currentReferenceIndex, start, end, onReverseStrand, isCircular, isTopLevel, coordinateSystem, sequenceRegionLen, sequenceRegionName)); }
/// <summary> /// parses the relevant data from each translation object /// </summary> public static DataStructures.VEP.Translation Parse(ObjectValue objectValue, ImportDataStore dataStore) { var translation = new DataStructures.VEP.Translation(); // loop over all of the key/value pairs in the translation object foreach (AbstractData ad in objectValue) { // sanity check: make sure we know about the keys are used for if (!KnownKeys.Contains(ad.Key)) { throw new GeneralException($"Encountered an unknown key in the dumper mapper object: {ad.Key}"); } // handle each key ObjectKeyValue exonNode; switch (ad.Key) { case AdaptorKey: case SequenceKey: case Transcript.DbIdKey: case Transcript.StableIdKey: // skip this key break; case EndExonKey: exonNode = ad as ObjectKeyValue; if (exonNode != null) { var newExon = Exon.Parse(exonNode.Value, dataStore.CurrentReferenceIndex); translation.EndExon = newExon; } break; case StartExonKey: exonNode = ad as ObjectKeyValue; if (exonNode != null) { var newExon = Exon.Parse(exonNode.Value, dataStore.CurrentReferenceIndex); translation.StartExon = newExon; } break; case TranscriptKey: // parse this during the references if (!DumperUtilities.IsReference(ad)) { throw new GeneralException("Found a Translation->Transcript entry that wasn't a reference."); } break; case Transcript.EndKey: translation.End = DumperUtilities.GetInt32(ad); break; case Transcript.StartKey: translation.Start = DumperUtilities.GetInt32(ad); break; case Transcript.VersionKey: translation.Version = (byte)DumperUtilities.GetInt32(ad); break; default: throw new GeneralException($"Unknown key found: {ad.Key}"); } } return(translation); }
/// <summary> /// parses the relevant data from each protein function predictions object /// </summary> public static DataStructures.VEP.ProteinFunctionPredictions Parse(ObjectValue objectValue) { var predictions = new DataStructures.VEP.ProteinFunctionPredictions(); // loop over all of the key/value pairs in the protein function predictions object foreach (AbstractData ad in objectValue) { // sanity check: make sure we know about the keys are used for if (!KnownKeys.Contains(ad.Key)) { throw new GeneralException($"Encountered an unknown key in the dumper mapper object: {ad.Key}"); } // handle each key switch (ad.Key) { case PolyPhenHumDivKey: // not used by default break; case PolyPhenKey: if (DumperUtilities.IsUndefined(ad)) { // do nothing } else { throw new GeneralException($"Could not handle the PolyPhen key: [{ad.GetType()}]"); } break; case PolyPhenHumVarKey: // used by default var polyPhenHumVarNode = ad as ObjectKeyValue; if (polyPhenHumVarNode != null) { predictions.PolyPhen = PolyPhen.Parse(polyPhenHumVarNode.Value); } else if (DumperUtilities.IsUndefined(ad)) { predictions.PolyPhen = null; } else if (DumperUtilities.IsReference(ad)) { // skip references for now } else { throw new GeneralException( $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]"); } break; case SiftKey: var siftNode = ad as ObjectKeyValue; if (siftNode != null) { predictions.Sift = Sift.Parse(siftNode.Value); } else if (DumperUtilities.IsUndefined(ad)) { predictions.Sift = null; } else if (DumperUtilities.IsReference(ad)) { // skip references for now } else { throw new GeneralException( $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]"); } break; default: throw new GeneralException($"Unknown key found: {ad.Key}"); } } return(predictions); }