/// <summary> /// parses the relevant data from each pair genomic object /// </summary> public static DataStructures.VEP.PairGenomic Parse(ObjectValue objectValue, ImportDataStore dataStore) { var pairGenomic = new DataStructures.VEP.PairGenomic(); // loop over all of the key/value pairs in the pair genomic object foreach (AbstractData ad in objectValue) { // sanity check: make sure we know about the keys are used for if (!KnownKeys.Contains(ad.Key)) { throw new GeneralException($"Encountered an unknown key in the pair genomic object: {ad.Key}"); } // handle each key switch (ad.Key) { case GenomicKey: var genomicNode = ad as ListObjectKeyValue; if (genomicNode != null) { pairGenomic.Genomic = MapperPair.ParseList(genomicNode.Values, dataStore); } else if (DumperUtilities.IsUndefined(ad)) { pairGenomic.Genomic = null; } else { throw new GeneralException( $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]"); } break; default: throw new GeneralException($"Unknown key found: {ad.Key}"); } } return(pairGenomic); }
/// <summary> /// parses the relevant data from each variant effect feature cache /// </summary> public static DataStructures.VEP.VariantEffectFeatureCache Parse(ObjectValue objectValue, ImportDataStore dataStore) { var cache = new DataStructures.VEP.VariantEffectFeatureCache(); // loop over all of the key/value pairs in the cache object foreach (AbstractData ad in objectValue) { // sanity check: make sure we know about the keys are used for if (!KnownKeys.Contains(ad.Key)) { throw new GeneralException( $"Encountered an unknown key in the dumper variant effect feature cache object: {ad.Key}"); } // handle each key switch (ad.Key) { case SelenocysteinesKey: case ThreePrimeUtrKey: case SeqEditsKey: case CodonTableKey: case ProteinFeaturesKey: // not used break; case IntronsKey: var intronsList = ad as ListObjectKeyValue; if (intronsList != null) { cache.Introns = Intron.ParseList(intronsList.Values, dataStore); } else if (DumperUtilities.IsUndefined(ad)) { cache.Introns = null; } else { throw new GeneralException( $"Could not transform the AbstractData object into a ListObjectKeyValue: [{ad.GetType()}]"); } break; case MapperKey: var mapperNode = ad as ObjectKeyValue; if (mapperNode != null) { cache.Mapper = TranscriptMapper.Parse(mapperNode.Value, dataStore); } else { throw new GeneralException( $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]"); } break; case PeptideKey: cache.Peptide = DumperUtilities.GetString(ad); break; case ProteinFunctionPredictionsKey: var predictionsNode = ad as ObjectKeyValue; if (predictionsNode != null) { cache.ProteinFunctionPredictions = ProteinFunctionPredictions.Parse(predictionsNode.Value); } else { throw new GeneralException( $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]"); } break; case SortedExonsKey: var exonsList = ad as ListObjectKeyValue; if (exonsList != null) { cache.Exons = Exon.ParseList(exonsList.Values, dataStore); } else { throw new GeneralException($"Could not transform the AbstractData object into a ListObjectKeyValue: [{ad.GetType()}]"); } break; case TranslateableSeqKey: cache.TranslateableSeq = DumperUtilities.GetString(ad); break; default: throw new GeneralException($"Unknown key found: {ad.Key}"); } } return(cache); }
/// <summary> /// parses the relevant data from each transcript /// </summary> public static void ParseReferences(ObjectValue objectValue, int transcriptIndex, ImportDataStore dataStore) { // Console.WriteLine("*** ParseReferences {0} / {1} ***", transcriptIndex + 1, _tempTranscripts.Count); var transcript = dataStore.Transcripts[transcriptIndex]; // loop over all of the key/value pairs in the transcript object foreach (AbstractData ad in objectValue) { // skip undefined keys if (DumperUtilities.IsUndefined(ad)) { continue; } // handle each key ReferenceKeyValue referenceKeyValue; // references found in: // 'transcript' -> '_variation_effect_feature_cache' -> 'introns' -> 'slice' has references // 'transcript' -> 'gene' has references // 'transcript' -> 'slice' has references // 'transcript' -> '_trans_exon_array' -> [] has references // 'transcript' -> 'translation'-> 'end_exon' has references // 'transcript' -> 'translation'-> 'start_exon' has references // 'transcript' -> 'translation'-> 'transcript' has references switch (ad.Key) { case GeneKey: // works well if (DumperUtilities.IsReference(ad)) { referenceKeyValue = ad as ReferenceKeyValue; if (referenceKeyValue != null) { transcript.Gene = Gene.ParseReference(referenceKeyValue.Value, dataStore); } } break; case SliceKey: if (DumperUtilities.IsReference(ad)) { referenceKeyValue = ad as ReferenceKeyValue; if (referenceKeyValue != null) { transcript.Slice = Slice.ParseReference(referenceKeyValue.Value, dataStore); } } break; case TransExonArrayKey: var exonsList = ad as ListObjectKeyValue; if (exonsList != null) { Exon.ParseListReference(exonsList.Values, transcript.TransExons, dataStore); } break; case TranslationKey: var translationNode = ad as ObjectKeyValue; if (translationNode != null) { Translation.ParseReference(translationNode.Value, transcript.Translation, dataStore); } break; case VariationEffectFeatureCacheKey: var cacheNode = ad as ObjectKeyValue; if (cacheNode != null) { VariantEffectFeatureCache.ParseReference(cacheNode.Value, transcript.VariantEffectCache, dataStore); } break; } } }
/// <summary> /// parses the relevant data from each transcript /// </summary> public static void Parse(ObjectValue objectValue, int transcriptIndex, ImportDataStore dataStore) { // Console.WriteLine("*** Parse {0} ***", transcriptIndex + 1); var bioType = BioType.Unknown; var geneSymbolSource = GeneSymbolSource.Unknown; // HGNC SimpleInterval[] microRnas = null; DataStructures.VEP.Exon[] transExons = null; DataStructures.VEP.Gene gene = null; DataStructures.VEP.Translation translation = null; DataStructures.VEP.VariantEffectFeatureCache variantEffectCache = null; DataStructures.VEP.Slice slice = null; bool onReverseStrand = false; bool isCanonical = false; int compDnaCodingStart = -1; int compDnaCodingEnd = -1; int start = -1; int end = -1; byte version = 1; string ccdsId = null; string databaseId = null; string proteinId = null; string refSeqId = null; string geneStableId = null; string stableId = null; string geneSymbol = null; // DDX11L1 int hgncId = -1; // 37102 // loop over all of the key/value pairs in the transcript object foreach (AbstractData ad in objectValue) { // sanity check: make sure we know about the keys are used for if (!KnownKeys.Contains(ad.Key)) { throw new GeneralException($"Encountered an unknown key in the dumper transcript object: {ad.Key}"); } // handle each key switch (ad.Key) { case CodingRegionStartKey: case CodingRegionEndKey: case CreatedDateKey: case DescriptionKey: case DisplayXrefKey: case ExternalDbKey: case ExternalDisplayNameKey: case ExternalNameKey: case ExternalStatusKey: case GenePhenotypeKey: case ModifiedDateKey: case SourceKey: case SwissProtKey: case TremblKey: case UniParcKey: // not used break; case AttributesKey: var attributesList = ad as ListObjectKeyValue; if (attributesList != null) { microRnas = Attribute.ParseList(attributesList.Values); } break; case BiotypeKey: bioType = TranscriptUtilities.GetBiotype(ad); break; case CcdsKey: ccdsId = DumperUtilities.GetString(ad); if (ccdsId == "-" || ccdsId == "") { ccdsId = null; } break; case CdnaCodingEndKey: compDnaCodingEnd = DumperUtilities.GetInt32(ad); break; case CdnaCodingStartKey: compDnaCodingStart = DumperUtilities.GetInt32(ad); break; case DbIdKey: databaseId = DumperUtilities.GetString(ad); if (databaseId == "-" || databaseId == "") { databaseId = null; } break; case EndKey: end = DumperUtilities.GetInt32(ad); break; case GeneHgncIdKey: var hgnc = DumperUtilities.GetString(ad); if (hgnc != null && hgnc.StartsWith("HGNC:")) { hgnc = hgnc.Substring(5); } if (hgnc == "-" || hgnc == "") { hgnc = null; } if (hgnc != null) { hgncId = int.Parse(hgnc); } break; case GeneSymbolKey: case GeneHgncKey: // older key geneSymbol = DumperUtilities.GetString(ad); if (geneSymbol == "-" || geneSymbol == "") { geneSymbol = null; } break; case GeneSymbolSourceKey: geneSymbolSource = TranscriptUtilities.GetGeneSymbolSource(ad); break; case GeneKey: var geneNode = ad as ObjectKeyValue; if (geneNode != null) { gene = Gene.Parse(geneNode.Value, dataStore.CurrentReferenceIndex); } break; case GeneStableIdKey: geneStableId = DumperUtilities.GetString(ad); if (geneStableId == "-" || geneStableId == "") { geneStableId = null; } break; case IsCanonicalKey: isCanonical = DumperUtilities.GetBool(ad); break; case ProteinKey: proteinId = DumperUtilities.GetString(ad); if (proteinId == "-" || proteinId == "") { proteinId = null; } break; case RefseqKey: refSeqId = DumperUtilities.GetString(ad); if (refSeqId == "-" || refSeqId == "") { refSeqId = null; } break; case SliceKey: var sliceNode = ad as ObjectKeyValue; if (sliceNode != null) { slice = Slice.Parse(sliceNode.Value, dataStore.CurrentReferenceIndex); } break; case StableIdKey: stableId = DumperUtilities.GetString(ad); if (stableId == "-" || stableId == "") { stableId = null; } break; case StartKey: start = DumperUtilities.GetInt32(ad); break; case StrandKey: onReverseStrand = TranscriptUtilities.GetStrand(ad); break; case TransExonArrayKey: var exonsList = ad as ListObjectKeyValue; if (exonsList != null) { transExons = Exon.ParseList(exonsList.Values, dataStore); } else { throw new GeneralException($"Could not transform the AbstractData object into a ListObjectKeyValue: [{ad.GetType()}]"); } break; case TranslationKey: var translationNode = ad as ObjectKeyValue; if (translationNode != null) { translation = Translation.Parse(translationNode.Value, dataStore); } else if (DumperUtilities.IsUndefined(ad)) { translation = null; } else { throw new GeneralException($"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]"); } break; case VariationEffectFeatureCacheKey: var cacheNode = ad as ObjectKeyValue; if (cacheNode == null) { throw new GeneralException($"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]"); } variantEffectCache = VariantEffectFeatureCache.Parse(cacheNode.Value, dataStore); break; case VersionKey: version = (byte)DumperUtilities.GetInt32(ad); break; default: throw new GeneralException($"Unknown key found: {ad.Key}"); } } dataStore.Transcripts.Add(new DataStructures.VEP.Transcript(bioType, transExons, gene, translation, variantEffectCache, slice, onReverseStrand, isCanonical, compDnaCodingStart, compDnaCodingEnd, dataStore.CurrentReferenceIndex, start, end, ccdsId, databaseId, proteinId, refSeqId, geneStableId, stableId, geneSymbol, geneSymbolSource, hgncId, version, microRnas)); }
/// <summary> /// parses the relevant data from each exon coordinate mapper object /// </summary> public static DataStructures.VEP.Mapper Parse(ObjectValue objectValue, ImportDataStore dataStore) { var mapper = new DataStructures.VEP.Mapper(); // loop over all of the key/value pairs in the exon coordinate mapper object foreach (AbstractData ad in objectValue) { // sanity check: make sure we know about the keys are used for if (!KnownKeys.Contains(ad.Key)) { throw new GeneralException($"Encountered an unknown key in the dumper mapper object: {ad.Key}"); } // handle each key switch (ad.Key) { case FromCoordSystemKey: if (!DumperUtilities.IsUndefined(ad)) { throw new GeneralException("Found an unexpected value in FromCoordSystemKey"); } break; case FromNameKey: mapper.FromType = DumperUtilities.GetString(ad); break; case IsSortedKey: mapper.IsSorted = DumperUtilities.GetBool(ad); break; case PairCodingDnaKey: var pairCodingDnaNode = ad as ObjectKeyValue; if (pairCodingDnaNode != null) { mapper.PairCodingDna = PairCodingDna.Parse(pairCodingDnaNode.Value, dataStore); } else if (DumperUtilities.IsUndefined(ad)) { mapper.PairCodingDna = null; } else { throw new GeneralException( $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]"); } break; case PairCountKey: mapper.PairCount = DumperUtilities.GetInt32(ad); break; case PairGenomicKey: var pairGenomicNode = ad as ObjectKeyValue; if (pairGenomicNode != null) { mapper.PairGenomic = PairGenomic.Parse(pairGenomicNode.Value, dataStore); } else if (DumperUtilities.IsUndefined(ad)) { mapper.PairGenomic = null; } else { throw new GeneralException( $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]"); } break; case ToCoordSystemKey: if (!DumperUtilities.IsUndefined(ad)) { throw new GeneralException("Found an unexpected value in ToCoordSystemKey"); } break; case ToNameKey: mapper.ToType = DumperUtilities.GetString(ad); break; default: throw new GeneralException($"Unknown key found: {ad.Key}"); } } return(mapper); }
/// <summary> /// parses the relevant data from each protein function predictions object /// </summary> public static DataStructures.VEP.ProteinFunctionPredictions Parse(ObjectValue objectValue) { var predictions = new DataStructures.VEP.ProteinFunctionPredictions(); // loop over all of the key/value pairs in the protein function predictions object foreach (AbstractData ad in objectValue) { // sanity check: make sure we know about the keys are used for if (!KnownKeys.Contains(ad.Key)) { throw new GeneralException($"Encountered an unknown key in the dumper mapper object: {ad.Key}"); } // handle each key switch (ad.Key) { case PolyPhenHumDivKey: // not used by default break; case PolyPhenKey: if (DumperUtilities.IsUndefined(ad)) { // do nothing } else { throw new GeneralException($"Could not handle the PolyPhen key: [{ad.GetType()}]"); } break; case PolyPhenHumVarKey: // used by default var polyPhenHumVarNode = ad as ObjectKeyValue; if (polyPhenHumVarNode != null) { predictions.PolyPhen = PolyPhen.Parse(polyPhenHumVarNode.Value); } else if (DumperUtilities.IsUndefined(ad)) { predictions.PolyPhen = null; } else if (DumperUtilities.IsReference(ad)) { // skip references for now } else { throw new GeneralException( $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]"); } break; case SiftKey: var siftNode = ad as ObjectKeyValue; if (siftNode != null) { predictions.Sift = Sift.Parse(siftNode.Value); } else if (DumperUtilities.IsUndefined(ad)) { predictions.Sift = null; } else if (DumperUtilities.IsReference(ad)) { // skip references for now } else { throw new GeneralException( $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]"); } break; default: throw new GeneralException($"Unknown key found: {ad.Key}"); } } return(predictions); }