Beispiel #1
0
        /// <summary>
        /// parses the relevant data from each pair genomic object
        /// </summary>
        public static DataStructures.VEP.PairGenomic Parse(ObjectValue objectValue, ImportDataStore dataStore)
        {
            var pairGenomic = new DataStructures.VEP.PairGenomic();

            // loop over all of the key/value pairs in the pair genomic object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException($"Encountered an unknown key in the pair genomic object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case GenomicKey:
                    var genomicNode = ad as ListObjectKeyValue;
                    if (genomicNode != null)
                    {
                        pairGenomic.Genomic = MapperPair.ParseList(genomicNode.Values, dataStore);
                    }
                    else if (DumperUtilities.IsUndefined(ad))
                    {
                        pairGenomic.Genomic = null;
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            return(pairGenomic);
        }
Beispiel #2
0
        /// <summary>
        /// parses the relevant data from each variant effect feature cache
        /// </summary>
        public static DataStructures.VEP.VariantEffectFeatureCache Parse(ObjectValue objectValue, ImportDataStore dataStore)
        {
            var cache = new DataStructures.VEP.VariantEffectFeatureCache();

            // loop over all of the key/value pairs in the cache object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException(
                              $"Encountered an unknown key in the dumper variant effect feature cache object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case SelenocysteinesKey:
                case ThreePrimeUtrKey:
                case SeqEditsKey:
                case CodonTableKey:
                case ProteinFeaturesKey:
                    // not used
                    break;

                case IntronsKey:
                    var intronsList = ad as ListObjectKeyValue;
                    if (intronsList != null)
                    {
                        cache.Introns = Intron.ParseList(intronsList.Values, dataStore);
                    }
                    else if (DumperUtilities.IsUndefined(ad))
                    {
                        cache.Introns = null;
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into a ListObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case MapperKey:
                    var mapperNode = ad as ObjectKeyValue;
                    if (mapperNode != null)
                    {
                        cache.Mapper = TranscriptMapper.Parse(mapperNode.Value, dataStore);
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case PeptideKey:
                    cache.Peptide = DumperUtilities.GetString(ad);
                    break;

                case ProteinFunctionPredictionsKey:
                    var predictionsNode = ad as ObjectKeyValue;
                    if (predictionsNode != null)
                    {
                        cache.ProteinFunctionPredictions = ProteinFunctionPredictions.Parse(predictionsNode.Value);
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }

                    break;

                case SortedExonsKey:
                    var exonsList = ad as ListObjectKeyValue;
                    if (exonsList != null)
                    {
                        cache.Exons = Exon.ParseList(exonsList.Values, dataStore);
                    }
                    else
                    {
                        throw new GeneralException($"Could not transform the AbstractData object into a ListObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case TranslateableSeqKey:
                    cache.TranslateableSeq = DumperUtilities.GetString(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            return(cache);
        }
Beispiel #3
0
        /// <summary>
        /// parses the relevant data from each transcript
        /// </summary>
        public static void ParseReferences(ObjectValue objectValue, int transcriptIndex, ImportDataStore dataStore)
        {
            // Console.WriteLine("*** ParseReferences {0} / {1} ***", transcriptIndex + 1, _tempTranscripts.Count);
            var transcript = dataStore.Transcripts[transcriptIndex];

            // loop over all of the key/value pairs in the transcript object
            foreach (AbstractData ad in objectValue)
            {
                // skip undefined keys
                if (DumperUtilities.IsUndefined(ad))
                {
                    continue;
                }

                // handle each key
                ReferenceKeyValue referenceKeyValue;

                // references found in:
                // 'transcript' -> '_variation_effect_feature_cache' -> 'introns' -> 'slice' has references
                // 'transcript' -> 'gene' has references
                // 'transcript' -> 'slice' has references
                // 'transcript' -> '_trans_exon_array' -> [] has references
                // 'transcript' -> 'translation'-> 'end_exon' has references
                // 'transcript' -> 'translation'-> 'start_exon' has references
                // 'transcript' -> 'translation'-> 'transcript' has references

                switch (ad.Key)
                {
                case GeneKey:
                    // works well
                    if (DumperUtilities.IsReference(ad))
                    {
                        referenceKeyValue = ad as ReferenceKeyValue;
                        if (referenceKeyValue != null)
                        {
                            transcript.Gene = Gene.ParseReference(referenceKeyValue.Value, dataStore);
                        }
                    }
                    break;

                case SliceKey:
                    if (DumperUtilities.IsReference(ad))
                    {
                        referenceKeyValue = ad as ReferenceKeyValue;
                        if (referenceKeyValue != null)
                        {
                            transcript.Slice = Slice.ParseReference(referenceKeyValue.Value, dataStore);
                        }
                    }
                    break;

                case TransExonArrayKey:
                    var exonsList = ad as ListObjectKeyValue;
                    if (exonsList != null)
                    {
                        Exon.ParseListReference(exonsList.Values, transcript.TransExons, dataStore);
                    }
                    break;

                case TranslationKey:
                    var translationNode = ad as ObjectKeyValue;
                    if (translationNode != null)
                    {
                        Translation.ParseReference(translationNode.Value, transcript.Translation, dataStore);
                    }
                    break;

                case VariationEffectFeatureCacheKey:
                    var cacheNode = ad as ObjectKeyValue;
                    if (cacheNode != null)
                    {
                        VariantEffectFeatureCache.ParseReference(cacheNode.Value, transcript.VariantEffectCache, dataStore);
                    }
                    break;
                }
            }
        }
Beispiel #4
0
        /// <summary>
        /// parses the relevant data from each transcript
        /// </summary>
        public static void Parse(ObjectValue objectValue, int transcriptIndex, ImportDataStore dataStore)
        {
            // Console.WriteLine("*** Parse {0} ***", transcriptIndex + 1);

            var bioType          = BioType.Unknown;
            var geneSymbolSource = GeneSymbolSource.Unknown; // HGNC

            SimpleInterval[]               microRnas   = null;
            DataStructures.VEP.Exon[]      transExons  = null;
            DataStructures.VEP.Gene        gene        = null;
            DataStructures.VEP.Translation translation = null;
            DataStructures.VEP.VariantEffectFeatureCache variantEffectCache = null;
            DataStructures.VEP.Slice slice = null;

            bool onReverseStrand = false;
            bool isCanonical     = false;

            int compDnaCodingStart = -1;
            int compDnaCodingEnd   = -1;

            int  start   = -1;
            int  end     = -1;
            byte version = 1;

            string ccdsId       = null;
            string databaseId   = null;
            string proteinId    = null;
            string refSeqId     = null;
            string geneStableId = null;
            string stableId     = null;

            string geneSymbol = null; // DDX11L1
            int    hgncId     = -1;   // 37102

            // loop over all of the key/value pairs in the transcript object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException($"Encountered an unknown key in the dumper transcript object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case CodingRegionStartKey:
                case CodingRegionEndKey:
                case CreatedDateKey:
                case DescriptionKey:
                case DisplayXrefKey:
                case ExternalDbKey:
                case ExternalDisplayNameKey:
                case ExternalNameKey:
                case ExternalStatusKey:
                case GenePhenotypeKey:
                case ModifiedDateKey:
                case SourceKey:
                case SwissProtKey:
                case TremblKey:
                case UniParcKey:
                    // not used
                    break;

                case AttributesKey:
                    var attributesList = ad as ListObjectKeyValue;
                    if (attributesList != null)
                    {
                        microRnas = Attribute.ParseList(attributesList.Values);
                    }
                    break;

                case BiotypeKey:
                    bioType = TranscriptUtilities.GetBiotype(ad);
                    break;

                case CcdsKey:
                    ccdsId = DumperUtilities.GetString(ad);
                    if (ccdsId == "-" || ccdsId == "")
                    {
                        ccdsId = null;
                    }
                    break;

                case CdnaCodingEndKey:
                    compDnaCodingEnd = DumperUtilities.GetInt32(ad);
                    break;

                case CdnaCodingStartKey:
                    compDnaCodingStart = DumperUtilities.GetInt32(ad);
                    break;

                case DbIdKey:
                    databaseId = DumperUtilities.GetString(ad);
                    if (databaseId == "-" || databaseId == "")
                    {
                        databaseId = null;
                    }
                    break;

                case EndKey:
                    end = DumperUtilities.GetInt32(ad);
                    break;

                case GeneHgncIdKey:
                    var hgnc = DumperUtilities.GetString(ad);
                    if (hgnc != null && hgnc.StartsWith("HGNC:"))
                    {
                        hgnc = hgnc.Substring(5);
                    }
                    if (hgnc == "-" || hgnc == "")
                    {
                        hgnc = null;
                    }

                    if (hgnc != null)
                    {
                        hgncId = int.Parse(hgnc);
                    }
                    break;

                case GeneSymbolKey:
                case GeneHgncKey:     // older key
                    geneSymbol = DumperUtilities.GetString(ad);
                    if (geneSymbol == "-" || geneSymbol == "")
                    {
                        geneSymbol = null;
                    }
                    break;

                case GeneSymbolSourceKey:
                    geneSymbolSource = TranscriptUtilities.GetGeneSymbolSource(ad);
                    break;

                case GeneKey:
                    var geneNode = ad as ObjectKeyValue;
                    if (geneNode != null)
                    {
                        gene = Gene.Parse(geneNode.Value, dataStore.CurrentReferenceIndex);
                    }
                    break;

                case GeneStableIdKey:
                    geneStableId = DumperUtilities.GetString(ad);
                    if (geneStableId == "-" || geneStableId == "")
                    {
                        geneStableId = null;
                    }
                    break;

                case IsCanonicalKey:
                    isCanonical = DumperUtilities.GetBool(ad);
                    break;

                case ProteinKey:
                    proteinId = DumperUtilities.GetString(ad);
                    if (proteinId == "-" || proteinId == "")
                    {
                        proteinId = null;
                    }
                    break;

                case RefseqKey:
                    refSeqId = DumperUtilities.GetString(ad);
                    if (refSeqId == "-" || refSeqId == "")
                    {
                        refSeqId = null;
                    }
                    break;

                case SliceKey:
                    var sliceNode = ad as ObjectKeyValue;
                    if (sliceNode != null)
                    {
                        slice = Slice.Parse(sliceNode.Value, dataStore.CurrentReferenceIndex);
                    }
                    break;

                case StableIdKey:
                    stableId = DumperUtilities.GetString(ad);
                    if (stableId == "-" || stableId == "")
                    {
                        stableId = null;
                    }
                    break;

                case StartKey:
                    start = DumperUtilities.GetInt32(ad);
                    break;

                case StrandKey:
                    onReverseStrand = TranscriptUtilities.GetStrand(ad);
                    break;

                case TransExonArrayKey:
                    var exonsList = ad as ListObjectKeyValue;
                    if (exonsList != null)
                    {
                        transExons = Exon.ParseList(exonsList.Values, dataStore);
                    }
                    else
                    {
                        throw new GeneralException($"Could not transform the AbstractData object into a ListObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case TranslationKey:
                    var translationNode = ad as ObjectKeyValue;
                    if (translationNode != null)
                    {
                        translation = Translation.Parse(translationNode.Value, dataStore);
                    }
                    else if (DumperUtilities.IsUndefined(ad))
                    {
                        translation = null;
                    }
                    else
                    {
                        throw new GeneralException($"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case VariationEffectFeatureCacheKey:
                    var cacheNode = ad as ObjectKeyValue;
                    if (cacheNode == null)
                    {
                        throw new GeneralException($"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    variantEffectCache = VariantEffectFeatureCache.Parse(cacheNode.Value, dataStore);
                    break;

                case VersionKey:
                    version = (byte)DumperUtilities.GetInt32(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            dataStore.Transcripts.Add(new DataStructures.VEP.Transcript(bioType, transExons, gene, translation, variantEffectCache, slice,
                                                                        onReverseStrand, isCanonical, compDnaCodingStart, compDnaCodingEnd, dataStore.CurrentReferenceIndex, start, end,
                                                                        ccdsId, databaseId, proteinId, refSeqId, geneStableId, stableId, geneSymbol, geneSymbolSource, hgncId, version,
                                                                        microRnas));
        }
Beispiel #5
0
        /// <summary>
        /// parses the relevant data from each exon coordinate mapper object
        /// </summary>
        public static DataStructures.VEP.Mapper Parse(ObjectValue objectValue, ImportDataStore dataStore)
        {
            var mapper = new DataStructures.VEP.Mapper();

            // loop over all of the key/value pairs in the exon coordinate mapper object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException($"Encountered an unknown key in the dumper mapper object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case FromCoordSystemKey:
                    if (!DumperUtilities.IsUndefined(ad))
                    {
                        throw new GeneralException("Found an unexpected value in FromCoordSystemKey");
                    }
                    break;

                case FromNameKey:
                    mapper.FromType = DumperUtilities.GetString(ad);
                    break;

                case IsSortedKey:
                    mapper.IsSorted = DumperUtilities.GetBool(ad);
                    break;

                case PairCodingDnaKey:
                    var pairCodingDnaNode = ad as ObjectKeyValue;
                    if (pairCodingDnaNode != null)
                    {
                        mapper.PairCodingDna = PairCodingDna.Parse(pairCodingDnaNode.Value, dataStore);
                    }
                    else if (DumperUtilities.IsUndefined(ad))
                    {
                        mapper.PairCodingDna = null;
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case PairCountKey:
                    mapper.PairCount = DumperUtilities.GetInt32(ad);
                    break;

                case PairGenomicKey:
                    var pairGenomicNode = ad as ObjectKeyValue;
                    if (pairGenomicNode != null)
                    {
                        mapper.PairGenomic = PairGenomic.Parse(pairGenomicNode.Value, dataStore);
                    }
                    else if (DumperUtilities.IsUndefined(ad))
                    {
                        mapper.PairGenomic = null;
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case ToCoordSystemKey:
                    if (!DumperUtilities.IsUndefined(ad))
                    {
                        throw new GeneralException("Found an unexpected value in ToCoordSystemKey");
                    }
                    break;

                case ToNameKey:
                    mapper.ToType = DumperUtilities.GetString(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            return(mapper);
        }
        /// <summary>
        /// parses the relevant data from each protein function predictions object
        /// </summary>
        public static DataStructures.VEP.ProteinFunctionPredictions Parse(ObjectValue objectValue)
        {
            var predictions = new DataStructures.VEP.ProteinFunctionPredictions();

            // loop over all of the key/value pairs in the protein function predictions object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException($"Encountered an unknown key in the dumper mapper object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case PolyPhenHumDivKey:
                    // not used by default
                    break;

                case PolyPhenKey:
                    if (DumperUtilities.IsUndefined(ad))
                    {
                        // do nothing
                    }
                    else
                    {
                        throw new GeneralException($"Could not handle the PolyPhen key: [{ad.GetType()}]");
                    }
                    break;

                case PolyPhenHumVarKey:
                    // used by default
                    var polyPhenHumVarNode = ad as ObjectKeyValue;
                    if (polyPhenHumVarNode != null)
                    {
                        predictions.PolyPhen = PolyPhen.Parse(polyPhenHumVarNode.Value);
                    }
                    else if (DumperUtilities.IsUndefined(ad))
                    {
                        predictions.PolyPhen = null;
                    }
                    else if (DumperUtilities.IsReference(ad))
                    {
                        // skip references for now
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case SiftKey:
                    var siftNode = ad as ObjectKeyValue;
                    if (siftNode != null)
                    {
                        predictions.Sift = Sift.Parse(siftNode.Value);
                    }
                    else if (DumperUtilities.IsUndefined(ad))
                    {
                        predictions.Sift = null;
                    }
                    else if (DumperUtilities.IsReference(ad))
                    {
                        // skip references for now
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            return(predictions);
        }