Exemple #1
0
        /// <summary>
        /// returns a reference to a gene given an a reference string
        /// </summary>
        public static DataStructures.VEP.Gene ParseReference(string reference, ImportDataStore dataStore)
        {
            var geneReferenceMatch = ReferenceRegex.Match(reference);

            if (!geneReferenceMatch.Success)
            {
                throw new GeneralException(
                          $"Unable to use the regular expression on the gene reference string: [{reference}]");
            }

            int transcriptIndex;

            if (!int.TryParse(geneReferenceMatch.Groups[1].Value, out transcriptIndex))
            {
                throw new GeneralException(
                          $"Unable to convert the transcript index from a string to an integer: [{geneReferenceMatch.Groups[1].Value}]");
            }

            // sanity check: make sure we have at least that many transcripts in our list
            if (transcriptIndex < 0 || transcriptIndex >= dataStore.Transcripts.Count)
            {
                throw new GeneralException(
                          $"Unable to link the gene reference: transcript index: [{transcriptIndex}], current # of transcripts: [{dataStore.Transcripts.Count}]");
            }

            return(dataStore.Transcripts[transcriptIndex].Gene);
        }
Exemple #2
0
        /// <summary>
        /// returns a reference to an exon given an a reference string
        /// </summary>
        public static DataStructures.VEP.Exon ParseReference(string reference, ImportDataStore dataStore)
        {
            var transExonArrayReferenceMatch = TransExonArrayReferenceRegex.Match(reference);

            if (transExonArrayReferenceMatch.Success)
            {
                return(ParseTransExonArrayReference(transExonArrayReferenceMatch, dataStore));
            }

            var sortedExonsReferenceMatch = SortedExonsReferenceRegex.Match(reference);

            if (sortedExonsReferenceMatch.Success)
            {
                return(ParseSortedExonsReference(sortedExonsReferenceMatch, dataStore));
            }

            var translationReferenceMatch = TranslationReferenceRegex.Match(reference);

            if (translationReferenceMatch.Success)
            {
                return(ParseTranslationReference(translationReferenceMatch, dataStore));
            }

            throw new GeneralException($"Unable to use the regular expression on the exon translation reference string: [{reference}]");
        }
        public void Merge(ImportDataStore originalDataStore, ImportDataStore mergedDataStore, FeatureStatistics statistics)
        {
            var regulatoryDict = GetMergedRegulatoryRegions(originalDataStore);

            mergedDataStore.RegulatoryFeatures.AddRange(regulatoryDict.Values.ToList());
            statistics.Increment(mergedDataStore.RegulatoryFeatures.Count, originalDataStore.RegulatoryFeatures.Count);
        }
        public void Merge(ImportDataStore originalDataStore, ImportDataStore mergedDataStore,
                          FeatureStatistics statistics)
        {
            var transcriptDict = GetMergedTranscripts(originalDataStore);

            mergedDataStore.Transcripts.AddRange(transcriptDict.Values.ToList());
            statistics.Increment(mergedDataStore.Transcripts.Count, originalDataStore.Transcripts.Count);
        }
Exemple #5
0
        /// <summary>
        /// constructor
        /// </summary>
        public VepCacheParser(TranscriptDataSource ds)
        {
            ImportDataStore.TranscriptSource = ds;

            _uniqueDataStore    = new ImportDataStore();
            _nonUniquedataStore = new ImportDataStore();
            _tempDataStore      = new ImportDataStore();

            _regulatoryStatistics = new FeatureStatistics("Regulatory");
            _transcriptStatistics = new FeatureStatistics("Transcripts");
            _geneStatistics       = new FeatureStatistics("Genes");
            _intronStatistics     = new FeatureStatistics("Introns");
            _exonStatistics       = new FeatureStatistics("Exons");
            _mirnaStatistics      = new FeatureStatistics("miRNAs");
            _siftStatistics       = new FeatureStatistics("SIFT matrices");
            _polyphenStatistics   = new FeatureStatistics("PolyPhen matrices");
            _cdnaStatistics       = new FeatureStatistics("cDNA seqs");
            _peptideStatistics    = new FeatureStatistics("Peptide seqs");
        }
Exemple #6
0
        /// <summary>
        /// returns a reference to a PolyPhen object given an a reference string
        /// </summary>
        public static DataStructures.VEP.PolyPhen ParseReference(string reference, ImportDataStore dataStore)
        {
            var polyPhenReferenceMatch = PolyPhenReferenceRegex.Match(reference);

            int transcriptIndex;

            if (!int.TryParse(polyPhenReferenceMatch.Groups[1].Value, out transcriptIndex))
            {
                throw new GeneralException(
                          $"Unable to convert the transcript index from a string to an integer: [{polyPhenReferenceMatch.Groups[1].Value}]");
            }

            // sanity check: make sure we have at least that many transcripts in our list
            if (transcriptIndex < 0 || transcriptIndex >= dataStore.Transcripts.Count)
            {
                throw new GeneralException(
                          $"Unable to link the PolyPhen reference: transcript index: [{transcriptIndex}], current # of transcripts: [{dataStore.Transcripts.Count}]");
            }

            return(dataStore.Transcripts[transcriptIndex].VariantEffectCache.ProteinFunctionPredictions.PolyPhen);
        }
Exemple #7
0
        /// <summary>
        /// parses the relevant data from each pair genomic object
        /// </summary>
        public static DataStructures.VEP.PairGenomic Parse(ObjectValue objectValue, ImportDataStore dataStore)
        {
            var pairGenomic = new DataStructures.VEP.PairGenomic();

            // loop over all of the key/value pairs in the pair genomic object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException($"Encountered an unknown key in the pair genomic object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case GenomicKey:
                    var genomicNode = ad as ListObjectKeyValue;
                    if (genomicNode != null)
                    {
                        pairGenomic.Genomic = MapperPair.ParseList(genomicNode.Values, dataStore);
                    }
                    else if (DumperUtilities.IsUndefined(ad))
                    {
                        pairGenomic.Genomic = null;
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            return(pairGenomic);
        }
Exemple #8
0
        /// <summary>
        /// places a reference to already existing exons into the array of exons
        /// </summary>
        public static void ParseListReference(List <AbstractData> abstractDataList, DataStructures.VEP.Exon[] exons, ImportDataStore dataStore)
        {
            // loop over all of the exons
            for (int exonIndex = 0; exonIndex < abstractDataList.Count; exonIndex++)
            {
                var exonNode = abstractDataList[exonIndex];

                // skip normal exons
                if (!DumperUtilities.IsReference(exonNode))
                {
                    continue;
                }

                var referenceStringValue = exonNode as ReferenceStringValue;
                if (referenceStringValue != null)
                {
                    exons[exonIndex] = ParseReference(referenceStringValue.Value, dataStore);
                }
            }
        }
Exemple #9
0
        /// <summary>
        /// returns an array of exons given a list of ObjectValues (AbstractData)
        /// </summary>
        public static DataStructures.VEP.Exon[] ParseList(List <AbstractData> abstractDataList, ImportDataStore dataStore)
        {
            var exons = new DataStructures.VEP.Exon[abstractDataList.Count];

            // loop over all of the exons
            for (int exonIndex = 0; exonIndex < abstractDataList.Count; exonIndex++)
            {
                // skip references
                if (DumperUtilities.IsReference(abstractDataList[exonIndex]))
                {
                    continue;
                }

                var objectValue = abstractDataList[exonIndex] as ObjectValue;
                if (objectValue != null)
                {
                    var newExon = Parse(objectValue, dataStore.CurrentReferenceIndex);
                    // DS.VEP.Exon oldExon;
                    // if (dataStore.Exons.TryGetValue(newExon, out oldExon))
                    //{
                    //    exons[exonIndex] = oldExon;
                    //}
                    // else
                    //{
                    exons[exonIndex] = newExon;
                    //    dataStore.Exons[newExon] = newExon;
                    //}
                }
                else
                {
                    throw new GeneralException(
                              $"Could not transform the AbstractData object into an ObjectValue: [{abstractDataList[exonIndex].GetType()}]");
                }
            }

            return(exons);
        }
Exemple #10
0
        /// <summary>
        /// returns a reference to an exon given a translation reference string
        /// </summary>
        private static DataStructures.VEP.Exon ParseTranslationReference(Match referenceMatch, ImportDataStore dataStore)
        {
            int transcriptIndex;

            if (!int.TryParse(referenceMatch.Groups[1].Value, out transcriptIndex))
            {
                throw new GeneralException(
                          $"Unable to convert the transcript index from a string to an integer: [{referenceMatch.Groups[1].Value}]");
            }

            // sanity check: make sure we have at least that many transcripts in our list
            if (transcriptIndex < 0 || transcriptIndex >= dataStore.Transcripts.Count)
            {
                throw new GeneralException(
                          $"Unable to link the exon reference: transcript index: [{transcriptIndex}], current # of transcripts: [{dataStore.Transcripts.Count}]");
            }

            string exonKey = referenceMatch.Groups[2].Value;

            // Console.WriteLine("reference: {0}", reference);
            // Console.WriteLine("transcript index: {0}", transcriptIndex);
            // Console.WriteLine("exon key: {0}", exonKey);

            DataStructures.VEP.Exon ret;

            switch (exonKey)
            {
            case Translation.EndExonKey:
                ret = dataStore.Transcripts[transcriptIndex].Translation.EndExon;
                break;

            case Translation.StartExonKey:
                ret = dataStore.Transcripts[transcriptIndex].Translation.StartExon;
                break;

            default:
                throw new GeneralException($"Unable to determine the correct exon translation to use: {exonKey}");
            }

            return(ret);
        }
Exemple #11
0
        private static DataStructures.VEP.Exon ParseTransExonArrayReference(Match referenceMatch, ImportDataStore dataStore)
        {
            int transcriptIndex;

            if (!int.TryParse(referenceMatch.Groups[1].Value, out transcriptIndex))
            {
                throw new GeneralException(
                          $"Unable to convert the transcript index from a string to an integer: [{referenceMatch.Groups[1].Value}]");
            }

            // sanity check: make sure we have at least that many transcripts in our list
            if (transcriptIndex < 0 || transcriptIndex >= dataStore.Transcripts.Count)
            {
                throw new GeneralException(
                          $"Unable to link the exon reference: transcript index: [{transcriptIndex}], current # of transcripts: [{dataStore.Transcripts.Count}]");
            }

            int exonIndex;

            if (!int.TryParse(referenceMatch.Groups[2].Value, out exonIndex))
            {
                throw new GeneralException(
                          $"Unable to convert the exon index from a string to an integer: [{referenceMatch.Groups[2].Value}]");
            }

            // sanity check: make sure we have at least that many exons in our list
            if (exonIndex < 0 || exonIndex >= dataStore.Transcripts[transcriptIndex].TransExons.Length)
            {
                throw new GeneralException(
                          $"Unable to link the exon reference: exon index: [{exonIndex}], current # of exons: [{dataStore.Transcripts[transcriptIndex].TransExons.Length}]");
            }

            // Console.WriteLine("reference: {0}", reference);
            // Console.WriteLine("transcript index: {0}", transcriptIndex);
            // Console.WriteLine("exon index: {0}", exonIndex);

            return(dataStore.Transcripts[transcriptIndex].TransExons[exonIndex]);
        }
Exemple #12
0
        /// <summary>
        /// parses the relevant data from each mapper
        /// </summary>
        public static void ParseReference(ObjectValue objectValue, DataStructures.VEP.Mapper mapper, ImportDataStore dataStore)
        {
            // loop over all of the key/value pairs in the mapper object
            foreach (AbstractData ad in objectValue)
            {
                switch (ad.Key)
                {
                case PairCodingDnaKey:
                    var pairCodingDnaNode = ad as ObjectKeyValue;
                    if (pairCodingDnaNode != null)
                    {
                        PairCodingDna.ParseReference(pairCodingDnaNode.Value, mapper.PairCodingDna, dataStore);
                    }
                    break;

                case PairGenomicKey:
                    var pairGenomicNode = ad as ObjectKeyValue;
                    if (pairGenomicNode != null)
                    {
                        PairGenomic.ParseReference(pairGenomicNode.Value, mapper.PairGenomic, dataStore);
                    }
                    break;
                }
            }
        }
Exemple #13
0
        /// <summary>
        /// parses the relevant data from each transcript
        /// </summary>
        public static void ParseReferences(ObjectValue objectValue, int transcriptIndex, ImportDataStore dataStore)
        {
            // Console.WriteLine("*** ParseReferences {0} / {1} ***", transcriptIndex + 1, _tempTranscripts.Count);
            var transcript = dataStore.Transcripts[transcriptIndex];

            // loop over all of the key/value pairs in the transcript object
            foreach (AbstractData ad in objectValue)
            {
                // skip undefined keys
                if (DumperUtilities.IsUndefined(ad))
                {
                    continue;
                }

                // handle each key
                ReferenceKeyValue referenceKeyValue;

                // references found in:
                // 'transcript' -> '_variation_effect_feature_cache' -> 'introns' -> 'slice' has references
                // 'transcript' -> 'gene' has references
                // 'transcript' -> 'slice' has references
                // 'transcript' -> '_trans_exon_array' -> [] has references
                // 'transcript' -> 'translation'-> 'end_exon' has references
                // 'transcript' -> 'translation'-> 'start_exon' has references
                // 'transcript' -> 'translation'-> 'transcript' has references

                switch (ad.Key)
                {
                case GeneKey:
                    // works well
                    if (DumperUtilities.IsReference(ad))
                    {
                        referenceKeyValue = ad as ReferenceKeyValue;
                        if (referenceKeyValue != null)
                        {
                            transcript.Gene = Gene.ParseReference(referenceKeyValue.Value, dataStore);
                        }
                    }
                    break;

                case SliceKey:
                    if (DumperUtilities.IsReference(ad))
                    {
                        referenceKeyValue = ad as ReferenceKeyValue;
                        if (referenceKeyValue != null)
                        {
                            transcript.Slice = Slice.ParseReference(referenceKeyValue.Value, dataStore);
                        }
                    }
                    break;

                case TransExonArrayKey:
                    var exonsList = ad as ListObjectKeyValue;
                    if (exonsList != null)
                    {
                        Exon.ParseListReference(exonsList.Values, transcript.TransExons, dataStore);
                    }
                    break;

                case TranslationKey:
                    var translationNode = ad as ObjectKeyValue;
                    if (translationNode != null)
                    {
                        Translation.ParseReference(translationNode.Value, transcript.Translation, dataStore);
                    }
                    break;

                case VariationEffectFeatureCacheKey:
                    var cacheNode = ad as ObjectKeyValue;
                    if (cacheNode != null)
                    {
                        VariantEffectFeatureCache.ParseReference(cacheNode.Value, transcript.VariantEffectCache, dataStore);
                    }
                    break;
                }
            }
        }
Exemple #14
0
        /// <summary>
        /// parses the relevant data from each variant effect feature cache
        /// </summary>
        public static DataStructures.VEP.VariantEffectFeatureCache Parse(ObjectValue objectValue, ImportDataStore dataStore)
        {
            var cache = new DataStructures.VEP.VariantEffectFeatureCache();

            // loop over all of the key/value pairs in the cache object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException(
                              $"Encountered an unknown key in the dumper variant effect feature cache object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case SelenocysteinesKey:
                case ThreePrimeUtrKey:
                case SeqEditsKey:
                case CodonTableKey:
                case ProteinFeaturesKey:
                    // not used
                    break;

                case IntronsKey:
                    var intronsList = ad as ListObjectKeyValue;
                    if (intronsList != null)
                    {
                        cache.Introns = Intron.ParseList(intronsList.Values, dataStore);
                    }
                    else if (DumperUtilities.IsUndefined(ad))
                    {
                        cache.Introns = null;
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into a ListObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case MapperKey:
                    var mapperNode = ad as ObjectKeyValue;
                    if (mapperNode != null)
                    {
                        cache.Mapper = TranscriptMapper.Parse(mapperNode.Value, dataStore);
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case PeptideKey:
                    cache.Peptide = DumperUtilities.GetString(ad);
                    break;

                case ProteinFunctionPredictionsKey:
                    var predictionsNode = ad as ObjectKeyValue;
                    if (predictionsNode != null)
                    {
                        cache.ProteinFunctionPredictions = ProteinFunctionPredictions.Parse(predictionsNode.Value);
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }

                    break;

                case SortedExonsKey:
                    var exonsList = ad as ListObjectKeyValue;
                    if (exonsList != null)
                    {
                        cache.Exons = Exon.ParseList(exonsList.Values, dataStore);
                    }
                    else
                    {
                        throw new GeneralException($"Could not transform the AbstractData object into a ListObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case TranslateableSeqKey:
                    cache.TranslateableSeq = DumperUtilities.GetString(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            return(cache);
        }
Exemple #15
0
        /// <summary>
        /// returns a new exon given an ObjectValue
        /// </summary>
        private static DataStructures.VEP.Intron Parse(ObjectValue objectValue, ImportDataStore dataStore)
        {
            var intron = new DataStructures.VEP.Intron();

            // loop over all of the key/value pairs in the intron object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException($"Encountered an unknown key in the dumper mapper object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case Transcript.EndKey:
                    intron.End = DumperUtilities.GetInt32(ad);
                    break;

                case Transcript.SliceKey:
                    var sliceNode = ad as ObjectKeyValue;
                    if (sliceNode != null)
                    {
                        var newSlice = Slice.Parse(sliceNode.Value, dataStore.CurrentReferenceIndex);
                        // DS.VEP.Slice oldSlice;
                        // if (dataStore.Slices.TryGetValue(newSlice, out oldSlice))
                        //{
                        //    intron.Slice = oldSlice;
                        //}
                        // else
                        //{
                        intron.Slice = newSlice;
                        //    dataStore.Slices[newSlice] = newSlice;
                        //}
                    }
                    else if (DumperUtilities.IsReference(ad))
                    {
                        // skip references until the second pass
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue or ReferenceKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case Transcript.StartKey:
                    intron.Start = DumperUtilities.GetInt32(ad);
                    break;

                case Transcript.StrandKey:
                    TranscriptUtilities.GetStrand(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            return(intron);
        }
Exemple #16
0
        /// <summary>
        /// parses the relevant data from each intron object
        /// </summary>
        public static DataStructures.VEP.Intron[] ParseList(List <AbstractData> abstractDataList, ImportDataStore dataStore)
        {
            var introns = new DataStructures.VEP.Intron[abstractDataList.Count];

            // loop over all of the introns
            for (int intronIndex = 0; intronIndex < abstractDataList.Count; intronIndex++)
            {
                var objectValue = abstractDataList[intronIndex] as ObjectValue;
                if (objectValue == null)
                {
                    throw new GeneralException(
                              $"Could not transform the AbstractData object into an ObjectValue: [{abstractDataList[intronIndex].GetType()}]");
                }
                introns[intronIndex] = Parse(objectValue, dataStore);
            }

            return(introns);
        }
Exemple #17
0
        /// <summary>
        /// parses the relevant data from each transcript mapper
        /// </summary>
        public static DataStructures.VEP.TranscriptMapper Parse(ObjectValue objectValue, ImportDataStore dataStore)
        {
            var mapper = new DataStructures.VEP.TranscriptMapper();

            // loop over all of the key/value pairs in the transcript mapper object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException(
                              $"Encountered an unknown key in the dumper transcript mapper object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case CodingDnaCodingEndKey:
                case CodingDnaCodingStartKey:
                case StartPhaseKey:
                    break;

                case ExonCoordinateMapperKey:
                    var exonCoordMapperNode = ad as ObjectKeyValue;
                    if (exonCoordMapperNode != null)
                    {
                        mapper.ExonCoordinateMapper = Mapper.Parse(exonCoordMapperNode.Value, dataStore);
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            return(mapper);
        }
Exemple #18
0
        private Dictionary <string, DataStructures.VEP.Transcript> GetMergedTranscripts(ImportDataStore other)
        {
            var transcriptDict = new Dictionary <string, DataStructures.VEP.Transcript>();

            foreach (var transcript in other.Transcripts)
            {
                if (string.IsNullOrEmpty(transcript.StableId))
                {
                    throw new GeneralException("Found a transcript with no ID.");
                }

                // apply whitelist filtering
                if (!FoundPrefix(transcript.StableId))
                {
                    continue;
                }

                // ignore transcripts with the name dupl
                if (transcript.StableId.Contains("dupl"))
                {
                    continue;
                }

                // merge transcripts
                var transcriptKey = $"{transcript.StableId}.{transcript.Start}.{transcript.End}";
                DataStructures.VEP.Transcript prevTranscript;

                if (transcriptDict.TryGetValue(transcriptKey, out prevTranscript))
                {
                    MergeTranscript(prevTranscript, transcript);
                }
                else
                {
                    transcriptDict[transcriptKey] = transcript;
                }
            }

            return(transcriptDict);
        }
Exemple #19
0
        /// <summary>
        /// parses the relevant data from each transcript mapper cache
        /// </summary>
        public static void ParseReference(ObjectValue objectValue, DataStructures.VEP.TranscriptMapper transcriptMapper, ImportDataStore dataStore)
        {
            // loop over all of the key/value pairs in the transcript mapper object
            foreach (AbstractData ad in objectValue)
            {
                if (ad.Key != ExonCoordinateMapperKey)
                {
                    continue;
                }

                var exonMapperNode = ad as ObjectKeyValue;
                if (exonMapperNode != null)
                {
                    Mapper.ParseReference(exonMapperNode.Value, transcriptMapper.ExonCoordinateMapper, dataStore);
                }
            }
        }
Exemple #20
0
        /// <summary>
        /// points to a slice that has already been created
        /// </summary>
        private static DataStructures.VEP.Slice ParseCacheReference(string reference, ImportDataStore dataStore)
        {
            var sliceReferenceMatch = CacheReferenceRegex.Match(reference);

            if (!sliceReferenceMatch.Success)
            {
                throw new GeneralException(
                          $"Unable to use the regular expression on the slice reference string: [{reference}]");
            }

            int transcriptIndex;

            if (!int.TryParse(sliceReferenceMatch.Groups[1].Value, out transcriptIndex))
            {
                throw new GeneralException(
                          $"Unable to convert the transcript index from a string to an integer: [{sliceReferenceMatch.Groups[1].Value}]");
            }

            // sanity check: make sure we have at least that many transcripts in our list
            if (transcriptIndex < 0 || transcriptIndex >= dataStore.Transcripts.Count)
            {
                throw new GeneralException(
                          $"Unable to link the slice reference: transcript index: [{transcriptIndex}], current # of transcripts: [{dataStore.Transcripts.Count}]");
            }

            int intronIndex;

            if (!int.TryParse(sliceReferenceMatch.Groups[2].Value, out intronIndex))
            {
                throw new GeneralException(
                          $"Unable to convert the intron index from a string to an integer: [{sliceReferenceMatch.Groups[2].Value}]");
            }

            // sanity check: make sure we have at least that many introns in our list
            if (intronIndex < 0 || intronIndex >= dataStore.Transcripts[transcriptIndex].VariantEffectCache.Introns.Length)
            {
                throw new GeneralException(
                          $"Unable to link the intron reference: intron index: [{intronIndex}], current # of introns: [{dataStore.Transcripts[transcriptIndex].VariantEffectCache.Introns.Length}]");
            }

            // Console.WriteLine("reference: {0}", reference);
            // Console.WriteLine("transcript index: {0}", transcriptIndex);
            // Console.WriteLine("intron index: {0}", intronIndex);
            // Environment.Exit(1);

            return(dataStore.Transcripts[transcriptIndex].VariantEffectCache.Introns[intronIndex].Slice);
        }
        /// <summary>
        /// parses the relevant data from each regulatory element
        /// </summary>
        public static void Parse(ObjectValue objectValue, int regulatoryFeatureIndex, ImportDataStore dataStore)
        {
            // Console.WriteLine("*** Parse {0} ***", regulatoryFeatureIndex + 1);

            int    start    = -1;
            int    end      = -1;
            string stableId = null;
            string type     = null;

            // loop over all of the key/value pairs in the transcript object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException(
                              $"Encountered an unknown key in the dumper regulatory element object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case BoundLengthsKey:
                case CellTypeCountKey:
                case CellTypesKey:
                case DbIdKey:
                case DisplayLabelKey:
                case HasEvidenceKey:
                case ProjectedKey:
                case SetKey:
                case Transcript.StrandKey:
                case Transcript.SliceKey:
                    // not used
                    break;

                case FeatureTypeKey:
                    type = DumperUtilities.GetString(ad);
                    break;

                case Transcript.EndKey:
                    end = DumperUtilities.GetInt32(ad);
                    break;

                case Transcript.StableIdKey:
                    stableId = DumperUtilities.GetString(ad);
                    break;

                case Transcript.StartKey:
                    start = DumperUtilities.GetInt32(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            dataStore.RegulatoryFeatures.Add(new DataStructures.VEP.RegulatoryFeature(dataStore.CurrentReferenceIndex, start, end, stableId, type));
        }
Exemple #22
0
        /// <summary>
        /// parses the relevant data from each exon coordinate mapper object
        /// </summary>
        public static DataStructures.VEP.Mapper Parse(ObjectValue objectValue, ImportDataStore dataStore)
        {
            var mapper = new DataStructures.VEP.Mapper();

            // loop over all of the key/value pairs in the exon coordinate mapper object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException($"Encountered an unknown key in the dumper mapper object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case FromCoordSystemKey:
                    if (!DumperUtilities.IsUndefined(ad))
                    {
                        throw new GeneralException("Found an unexpected value in FromCoordSystemKey");
                    }
                    break;

                case FromNameKey:
                    mapper.FromType = DumperUtilities.GetString(ad);
                    break;

                case IsSortedKey:
                    mapper.IsSorted = DumperUtilities.GetBool(ad);
                    break;

                case PairCodingDnaKey:
                    var pairCodingDnaNode = ad as ObjectKeyValue;
                    if (pairCodingDnaNode != null)
                    {
                        mapper.PairCodingDna = PairCodingDna.Parse(pairCodingDnaNode.Value, dataStore);
                    }
                    else if (DumperUtilities.IsUndefined(ad))
                    {
                        mapper.PairCodingDna = null;
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case PairCountKey:
                    mapper.PairCount = DumperUtilities.GetInt32(ad);
                    break;

                case PairGenomicKey:
                    var pairGenomicNode = ad as ObjectKeyValue;
                    if (pairGenomicNode != null)
                    {
                        mapper.PairGenomic = PairGenomic.Parse(pairGenomicNode.Value, dataStore);
                    }
                    else if (DumperUtilities.IsUndefined(ad))
                    {
                        mapper.PairGenomic = null;
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case ToCoordSystemKey:
                    if (!DumperUtilities.IsUndefined(ad))
                    {
                        throw new GeneralException("Found an unexpected value in ToCoordSystemKey");
                    }
                    break;

                case ToNameKey:
                    mapper.ToType = DumperUtilities.GetString(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            return(mapper);
        }
Exemple #23
0
        /// <summary>
        /// parses the relevant data from each variant effect feature cache
        /// </summary>
        public static void ParseReference(ObjectValue objectValue, DataStructures.VEP.VariantEffectFeatureCache cache, ImportDataStore dataStore)
        {
            // loop over all of the key/value pairs in the cache object
            foreach (AbstractData ad in objectValue)
            {
                switch (ad.Key)
                {
                case IntronsKey:
                    var intronsList = ad as ListObjectKeyValue;
                    if (intronsList != null)
                    {
                        Intron.ParseListReference(intronsList.Values, cache.Introns, dataStore);
                    }
                    break;

                case MapperKey:
                    var mapperNode = ad as ObjectKeyValue;
                    if (mapperNode != null)
                    {
                        TranscriptMapper.ParseReference(mapperNode.Value, cache.Mapper, dataStore);
                    }
                    break;

                case ProteinFunctionPredictionsKey:
                    var predictionsNode = ad as ObjectKeyValue;
                    if (predictionsNode != null)
                    {
                        ProteinFunctionPredictions.ParseReference(predictionsNode.Value, cache.ProteinFunctionPredictions, dataStore);
                    }
                    break;
                }
            }
        }
Exemple #24
0
        /// <summary>
        /// parses the relevant data from each mapper pairs object
        /// </summary>
        public static void ParseListReference(List <AbstractData> abstractDataList, List <DataStructures.VEP.MapperPair> mapperPairs, ImportDataStore dataStore)
        {
            // loop over all of the key/value pairs in the mapper pairs object
            for (int mapperPairIndex = 0; mapperPairIndex < abstractDataList.Count; mapperPairIndex++)
            {
                var mapperNode = abstractDataList[mapperPairIndex];

                // skip normal mapper pairs
                if (!DumperUtilities.IsReference(mapperNode))
                {
                    continue;
                }

                var referenceStringValue = mapperNode as ReferenceStringValue;
                if (referenceStringValue != null)
                {
                    var mapperPair = ParseReference(referenceStringValue.Value, dataStore);
                    mapperPairs[mapperPairIndex] = mapperPair;
                }
            }
        }
Exemple #25
0
        /// <summary>
        /// parses the relevant data from each pair genomic object
        /// </summary>
        public static void ParseReference(ObjectValue objectValue, DataStructures.VEP.PairGenomic pairGenomic, ImportDataStore dataStore)
        {
            // loop over all of the key/value pairs in the pair genomic object
            foreach (AbstractData ad in objectValue)
            {
                // handle each key
                switch (ad.Key)
                {
                case GenomicKey:
                    var genomicNode = ad as ListObjectKeyValue;
                    if (genomicNode != null)
                    {
                        MapperPair.ParseListReference(genomicNode.Values, pairGenomic.Genomic, dataStore);
                    }
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }
        }
Exemple #26
0
        /// <summary>
        /// parses the relevant data from each transcript
        /// </summary>
        public static void Parse(ObjectValue objectValue, int transcriptIndex, ImportDataStore dataStore)
        {
            // Console.WriteLine("*** Parse {0} ***", transcriptIndex + 1);

            var bioType          = BioType.Unknown;
            var geneSymbolSource = GeneSymbolSource.Unknown; // HGNC

            SimpleInterval[]               microRnas   = null;
            DataStructures.VEP.Exon[]      transExons  = null;
            DataStructures.VEP.Gene        gene        = null;
            DataStructures.VEP.Translation translation = null;
            DataStructures.VEP.VariantEffectFeatureCache variantEffectCache = null;
            DataStructures.VEP.Slice slice = null;

            bool onReverseStrand = false;
            bool isCanonical     = false;

            int compDnaCodingStart = -1;
            int compDnaCodingEnd   = -1;

            int  start   = -1;
            int  end     = -1;
            byte version = 1;

            string ccdsId       = null;
            string databaseId   = null;
            string proteinId    = null;
            string refSeqId     = null;
            string geneStableId = null;
            string stableId     = null;

            string geneSymbol = null; // DDX11L1
            int    hgncId     = -1;   // 37102

            // loop over all of the key/value pairs in the transcript object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException($"Encountered an unknown key in the dumper transcript object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case CodingRegionStartKey:
                case CodingRegionEndKey:
                case CreatedDateKey:
                case DescriptionKey:
                case DisplayXrefKey:
                case ExternalDbKey:
                case ExternalDisplayNameKey:
                case ExternalNameKey:
                case ExternalStatusKey:
                case GenePhenotypeKey:
                case ModifiedDateKey:
                case SourceKey:
                case SwissProtKey:
                case TremblKey:
                case UniParcKey:
                    // not used
                    break;

                case AttributesKey:
                    var attributesList = ad as ListObjectKeyValue;
                    if (attributesList != null)
                    {
                        microRnas = Attribute.ParseList(attributesList.Values);
                    }
                    break;

                case BiotypeKey:
                    bioType = TranscriptUtilities.GetBiotype(ad);
                    break;

                case CcdsKey:
                    ccdsId = DumperUtilities.GetString(ad);
                    if (ccdsId == "-" || ccdsId == "")
                    {
                        ccdsId = null;
                    }
                    break;

                case CdnaCodingEndKey:
                    compDnaCodingEnd = DumperUtilities.GetInt32(ad);
                    break;

                case CdnaCodingStartKey:
                    compDnaCodingStart = DumperUtilities.GetInt32(ad);
                    break;

                case DbIdKey:
                    databaseId = DumperUtilities.GetString(ad);
                    if (databaseId == "-" || databaseId == "")
                    {
                        databaseId = null;
                    }
                    break;

                case EndKey:
                    end = DumperUtilities.GetInt32(ad);
                    break;

                case GeneHgncIdKey:
                    var hgnc = DumperUtilities.GetString(ad);
                    if (hgnc != null && hgnc.StartsWith("HGNC:"))
                    {
                        hgnc = hgnc.Substring(5);
                    }
                    if (hgnc == "-" || hgnc == "")
                    {
                        hgnc = null;
                    }

                    if (hgnc != null)
                    {
                        hgncId = int.Parse(hgnc);
                    }
                    break;

                case GeneSymbolKey:
                case GeneHgncKey:     // older key
                    geneSymbol = DumperUtilities.GetString(ad);
                    if (geneSymbol == "-" || geneSymbol == "")
                    {
                        geneSymbol = null;
                    }
                    break;

                case GeneSymbolSourceKey:
                    geneSymbolSource = TranscriptUtilities.GetGeneSymbolSource(ad);
                    break;

                case GeneKey:
                    var geneNode = ad as ObjectKeyValue;
                    if (geneNode != null)
                    {
                        gene = Gene.Parse(geneNode.Value, dataStore.CurrentReferenceIndex);
                    }
                    break;

                case GeneStableIdKey:
                    geneStableId = DumperUtilities.GetString(ad);
                    if (geneStableId == "-" || geneStableId == "")
                    {
                        geneStableId = null;
                    }
                    break;

                case IsCanonicalKey:
                    isCanonical = DumperUtilities.GetBool(ad);
                    break;

                case ProteinKey:
                    proteinId = DumperUtilities.GetString(ad);
                    if (proteinId == "-" || proteinId == "")
                    {
                        proteinId = null;
                    }
                    break;

                case RefseqKey:
                    refSeqId = DumperUtilities.GetString(ad);
                    if (refSeqId == "-" || refSeqId == "")
                    {
                        refSeqId = null;
                    }
                    break;

                case SliceKey:
                    var sliceNode = ad as ObjectKeyValue;
                    if (sliceNode != null)
                    {
                        slice = Slice.Parse(sliceNode.Value, dataStore.CurrentReferenceIndex);
                    }
                    break;

                case StableIdKey:
                    stableId = DumperUtilities.GetString(ad);
                    if (stableId == "-" || stableId == "")
                    {
                        stableId = null;
                    }
                    break;

                case StartKey:
                    start = DumperUtilities.GetInt32(ad);
                    break;

                case StrandKey:
                    onReverseStrand = TranscriptUtilities.GetStrand(ad);
                    break;

                case TransExonArrayKey:
                    var exonsList = ad as ListObjectKeyValue;
                    if (exonsList != null)
                    {
                        transExons = Exon.ParseList(exonsList.Values, dataStore);
                    }
                    else
                    {
                        throw new GeneralException($"Could not transform the AbstractData object into a ListObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case TranslationKey:
                    var translationNode = ad as ObjectKeyValue;
                    if (translationNode != null)
                    {
                        translation = Translation.Parse(translationNode.Value, dataStore);
                    }
                    else if (DumperUtilities.IsUndefined(ad))
                    {
                        translation = null;
                    }
                    else
                    {
                        throw new GeneralException($"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case VariationEffectFeatureCacheKey:
                    var cacheNode = ad as ObjectKeyValue;
                    if (cacheNode == null)
                    {
                        throw new GeneralException($"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    variantEffectCache = VariantEffectFeatureCache.Parse(cacheNode.Value, dataStore);
                    break;

                case VersionKey:
                    version = (byte)DumperUtilities.GetInt32(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            dataStore.Transcripts.Add(new DataStructures.VEP.Transcript(bioType, transExons, gene, translation, variantEffectCache, slice,
                                                                        onReverseStrand, isCanonical, compDnaCodingStart, compDnaCodingEnd, dataStore.CurrentReferenceIndex, start, end,
                                                                        ccdsId, databaseId, proteinId, refSeqId, geneStableId, stableId, geneSymbol, geneSymbolSource, hgncId, version,
                                                                        microRnas));
        }
Exemple #27
0
        /// <summary>
        /// points to a introns that have already been created
        /// </summary>
        public static void ParseListReference(List <AbstractData> abstractDataList, DataStructures.VEP.Intron[] introns, ImportDataStore dataStore)
        {
            // loop over all of the introns
            for (int intronIndex = 0; intronIndex < abstractDataList.Count; intronIndex++)
            {
                var intronNode = abstractDataList[intronIndex];

                var objectValue = intronNode as ObjectValue;
                if (objectValue == null)
                {
                    throw new GeneralException(
                              $"Could not transform the AbstractData object into an ObjectValue: [{abstractDataList[intronIndex].GetType()}]");
                }

                ParseReference(objectValue, introns[intronIndex], dataStore);
            }
        }
Exemple #28
0
        /// <summary>
        /// points to a mapper pair that has already been created
        /// </summary>
        private static DataStructures.VEP.MapperPair ParseCodingDnaReference(string reference, ImportDataStore dataStore)
        {
            var mapperPairReferenceMatch = ReferenceCodingDnaRegex.Match(reference);

            if (!mapperPairReferenceMatch.Success)
            {
                throw new GeneralException(
                          $"Unable to use the regular expression on the mapper pair reference string: [{reference}]");
            }

            int transcriptIndex;

            if (!int.TryParse(mapperPairReferenceMatch.Groups[1].Value, out transcriptIndex))
            {
                throw new GeneralException(
                          $"Unable to convert the transcript index from a string to an integer: [{mapperPairReferenceMatch.Groups[1].Value}]");
            }

            // sanity check: make sure we have at least that many transcripts in our list
            if (transcriptIndex < 0 || transcriptIndex >= dataStore.Transcripts.Count)
            {
                throw new GeneralException(
                          $"Unable to link the mapper pair reference: transcript index: [{transcriptIndex}], current # of transcripts: [{dataStore.Transcripts.Count}]");
            }

            int codingDnaMapperPairIndex;

            if (!int.TryParse(mapperPairReferenceMatch.Groups[2].Value, out codingDnaMapperPairIndex))
            {
                throw new GeneralException(
                          $"Unable to convert the mapper pair index from a string to an integer: [{mapperPairReferenceMatch.Groups[2].Value}]");
            }

            // sanity check: make sure we have at least that many mapper pairs in our list
            int numGenomicMapperPairs = dataStore.Transcripts[transcriptIndex].VariantEffectCache.Mapper.ExonCoordinateMapper.PairGenomic.Genomic.Count;

            if (codingDnaMapperPairIndex < 0 || codingDnaMapperPairIndex >= numGenomicMapperPairs)
            {
                throw new GeneralException(
                          $"Unable to link the mapper pair reference: mapper pair index: [{codingDnaMapperPairIndex}], current # of mapper pairs: [{numGenomicMapperPairs}]");
            }

            // Console.WriteLine("reference:         {0}", reference);
            // Console.WriteLine("transcript index:  {0}", transcriptIndex);
            // Console.WriteLine("mapper pair index: {0}", genomicMapperPairIndex);

            return(dataStore.Transcripts[transcriptIndex].VariantEffectCache.Mapper.ExonCoordinateMapper.PairCodingDna.CodingDna[codingDnaMapperPairIndex]);
        }
Exemple #29
0
        /// <summary>
        /// parses the relevant data from each intron object
        /// </summary>
        private static void ParseReference(ObjectValue objectValue, DataStructures.VEP.Intron intron, ImportDataStore dataStore)
        {
            // loop over all of the key/value pairs in the intron object
            foreach (AbstractData ad in objectValue)
            {
                // skip normal entries
                if (!DumperUtilities.IsReference(ad))
                {
                    continue;
                }

                // handle each key
                switch (ad.Key)
                {
                case Transcript.SliceKey:
                    var referenceKeyValue = ad as ReferenceKeyValue;
                    if (referenceKeyValue != null)
                    {
                        intron.Slice = Slice.ParseReference(referenceKeyValue.Value, dataStore);
                    }
                    break;

                default:
                    throw new GeneralException($"Found an unhandled reference in the intron object: {ad.Key}");
                }
            }
        }
Exemple #30
0
        /// <summary>
        /// parses the relevant data from each mapper pairs object
        /// </summary>
        public static List <DataStructures.VEP.MapperPair> ParseList(List <AbstractData> abstractDataList, ImportDataStore dataStore)
        {
            var mapperPairs = DumperUtilities.GetPopulatedList <DataStructures.VEP.MapperPair>(abstractDataList.Count);

            // loop over all of the key/value pairs in the mapper pairs object
            for (int mapperPairIndex = 0; mapperPairIndex < abstractDataList.Count; mapperPairIndex++)
            {
                var ad = abstractDataList[mapperPairIndex];

                // skip references
                if (DumperUtilities.IsReference(ad))
                {
                    continue;
                }

                if (ad.DataType != DataType)
                {
                    throw new GeneralException(
                              $"Expected a mapper pair data type, but found the following data type: [{ad.DataType}]");
                }

                var mapperPairNode = ad as ObjectValue;
                if (mapperPairNode == null)
                {
                    throw new GeneralException(
                              $"Could not transform the AbstractData object into an ObjectValue: [{ad.GetType()}]");
                }

                var newMapperPair = Parse(mapperPairNode, dataStore.CurrentReferenceIndex);
                // DS.VEP.MapperPair oldMapperPair;
                // if (dataStore.MapperPairs.TryGetValue(newMapperPair, out oldMapperPair))
                //{
                //    mapperPairs[mapperPairIndex] = oldMapperPair;
                //}
                // else
                //{
                mapperPairs[mapperPairIndex] = newMapperPair;
                //    dataStore.MapperPairs[newMapperPair] = newMapperPair;
                //}
            }

            return(mapperPairs);
        }