Exemple #1
0
        /// <summary>
        /// parses the relevant data from each intron object
        /// </summary>
        private static void ParseReference(ObjectValue objectValue, DataStructures.VEP.Intron intron, ImportDataStore dataStore)
        {
            // loop over all of the key/value pairs in the intron object
            foreach (AbstractData ad in objectValue)
            {
                // skip normal entries
                if (!DumperUtilities.IsReference(ad))
                {
                    continue;
                }

                // handle each key
                switch (ad.Key)
                {
                case Transcript.SliceKey:
                    var referenceKeyValue = ad as ReferenceKeyValue;
                    if (referenceKeyValue != null)
                    {
                        intron.Slice = Slice.ParseReference(referenceKeyValue.Value, dataStore);
                    }
                    break;

                default:
                    throw new GeneralException($"Found an unhandled reference in the intron object: {ad.Key}");
                }
            }
        }
        /// <summary>
        /// parses the relevant data from each protein function prediction object
        /// </summary>
        public static void ParseReference(ObjectValue objectValue, DataStructures.VEP.ProteinFunctionPredictions cache, ImportDataStore dataStore)
        {
            // loop over all of the key/value pairs in the cache object
            foreach (AbstractData ad in objectValue)
            {
                if (!DumperUtilities.IsReference(ad))
                {
                    continue;
                }

                // handle each key
                var referenceKeyValue = ad as ReferenceKeyValue;
                if (referenceKeyValue == null)
                {
                    continue;
                }

                switch (referenceKeyValue.Key)
                {
                case PolyPhenHumVarKey:
                    cache.PolyPhen = PolyPhen.ParseReference(referenceKeyValue.Value, dataStore);
                    break;

                case SiftKey:
                    cache.Sift = Sift.ParseReference(referenceKeyValue.Value, dataStore);
                    break;

                default:
                    throw new GeneralException(
                              $"Found an unhandled reference in the protein function prediction object: {ad.Key}");
                }
            }
        }
Exemple #3
0
        /// <summary>
        /// parses the relevant data from each PolyPhen object
        /// </summary>
        public static DataStructures.VEP.PolyPhen Parse(ObjectValue objectValue)
        {
            string matrix = null;

            // loop over all of the key/value pairs in the PolyPhen object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException($"Encountered an unknown key in the dumper PolyPhen object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case AnalysisKey:
                case IsMatrixCompressedKey:
                case PeptideLengthKey:
                case SubAnalysisKey:
                case TranslationMD5Key:
                    break;

                case MatrixKey:
                    matrix = DumperUtilities.GetString(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            return(new DataStructures.VEP.PolyPhen(matrix));
        }
        /// <summary>
        /// parses the relevant data from each regulatory element
        /// </summary>
        public static void Parse(ObjectValue objectValue, int regulatoryFeatureIndex, ImportDataStore dataStore)
        {
            // Console.WriteLine("*** Parse {0} ***", regulatoryFeatureIndex + 1);

            int    start    = -1;
            int    end      = -1;
            string stableId = null;
            string type     = null;

            // loop over all of the key/value pairs in the transcript object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException(
                              $"Encountered an unknown key in the dumper regulatory element object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case BoundLengthsKey:
                case CellTypeCountKey:
                case CellTypesKey:
                case DbIdKey:
                case DisplayLabelKey:
                case HasEvidenceKey:
                case ProjectedKey:
                case SetKey:
                case Transcript.StrandKey:
                case Transcript.SliceKey:
                    // not used
                    break;

                case FeatureTypeKey:
                    type = DumperUtilities.GetString(ad);
                    break;

                case Transcript.EndKey:
                    end = DumperUtilities.GetInt32(ad);
                    break;

                case Transcript.StableIdKey:
                    stableId = DumperUtilities.GetString(ad);
                    break;

                case Transcript.StartKey:
                    start = DumperUtilities.GetInt32(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            dataStore.RegulatoryFeatures.Add(new DataStructures.VEP.RegulatoryFeature(dataStore.CurrentReferenceIndex, start, end, stableId, type));
        }
Exemple #5
0
        /// <summary>
        /// returns a new exon given an ObjectValue
        /// </summary>
        public static DataStructures.VEP.Exon Parse(ObjectValue objectValue, ushort currentReferenceIndex)
        {
            bool onReverseStrand = false;

            int  end   = -1;
            byte?phase = null;
            int  start = -1;

            string stableId = null;

            // loop over all of the key/value pairs in the exon object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException($"Encountered an unknown key in the dumper mapper object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case Transcript.EndKey:
                    end = DumperUtilities.GetInt32(ad);
                    break;

                case EndPhaseKey:
                    break;

                case PhaseKey:
                    int phaseInt = DumperUtilities.GetInt32(ad);
                    if (phaseInt != -1)
                    {
                        phase = (byte)phaseInt;
                    }
                    break;

                case Transcript.StableIdKey:
                    stableId = DumperUtilities.GetString(ad);
                    break;

                case Transcript.StartKey:
                    start = DumperUtilities.GetInt32(ad);
                    break;

                case Transcript.StrandKey:
                    onReverseStrand = TranscriptUtilities.GetStrand(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            return(new DataStructures.VEP.Exon(currentReferenceIndex, start, end, stableId, onReverseStrand, phase));
        }
Exemple #6
0
        /// <summary>
        /// parses the relevant data from each attribute
        /// </summary>
        private static SimpleInterval Parse(ObjectValue objectValue)
        {
            string key   = null;
            string value = null;

            // loop over all of the key/value pairs in the gene object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException(
                              $"Encountered an unknown key in the dumper attribute object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case NameKey:
                case DescriptionKey:
                    // not used
                    break;

                case CodeKey:
                    key = DumperUtilities.GetString(ad);
                    break;

                case ValueKey:
                    value = DumperUtilities.GetString(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            // sanity check: make sure this is a miRNA object
            if (key != "miRNA" || value == null)
            {
                // Console.WriteLine("DEBUG: Found alternate attribute: {0}", key);
                return(null);
            }

            var rangeMatch = RangeRegex.Match(value);

            if (!rangeMatch.Success)
            {
                throw new GeneralException("Unable to convert the Attribute to a miRNA object. The value string failed the regex: " + value);
            }

            int start = int.Parse(rangeMatch.Groups[1].Value);
            int end   = int.Parse(rangeMatch.Groups[2].Value);

            return(new SimpleInterval(start, end));
        }
Exemple #7
0
        /// <summary>
        /// parses the relevant data from each mapper pairs object
        /// </summary>
        public static List <DataStructures.VEP.MapperPair> ParseList(List <AbstractData> abstractDataList, ImportDataStore dataStore)
        {
            var mapperPairs = DumperUtilities.GetPopulatedList <DataStructures.VEP.MapperPair>(abstractDataList.Count);

            // loop over all of the key/value pairs in the mapper pairs object
            for (int mapperPairIndex = 0; mapperPairIndex < abstractDataList.Count; mapperPairIndex++)
            {
                var ad = abstractDataList[mapperPairIndex];

                // skip references
                if (DumperUtilities.IsReference(ad))
                {
                    continue;
                }

                if (ad.DataType != DataType)
                {
                    throw new GeneralException(
                              $"Expected a mapper pair data type, but found the following data type: [{ad.DataType}]");
                }

                var mapperPairNode = ad as ObjectValue;
                if (mapperPairNode == null)
                {
                    throw new GeneralException(
                              $"Could not transform the AbstractData object into an ObjectValue: [{ad.GetType()}]");
                }

                var newMapperPair = Parse(mapperPairNode, dataStore.CurrentReferenceIndex);
                // DS.VEP.MapperPair oldMapperPair;
                // if (dataStore.MapperPairs.TryGetValue(newMapperPair, out oldMapperPair))
                //{
                //    mapperPairs[mapperPairIndex] = oldMapperPair;
                //}
                // else
                //{
                mapperPairs[mapperPairIndex] = newMapperPair;
                //    dataStore.MapperPairs[newMapperPair] = newMapperPair;
                //}
            }

            return(mapperPairs);
        }
Exemple #8
0
        /// <summary>
        /// places a reference to already existing exons into the array of exons
        /// </summary>
        public static void ParseListReference(List <AbstractData> abstractDataList, DataStructures.VEP.Exon[] exons, ImportDataStore dataStore)
        {
            // loop over all of the exons
            for (int exonIndex = 0; exonIndex < abstractDataList.Count; exonIndex++)
            {
                var exonNode = abstractDataList[exonIndex];

                // skip normal exons
                if (!DumperUtilities.IsReference(exonNode))
                {
                    continue;
                }

                var referenceStringValue = exonNode as ReferenceStringValue;
                if (referenceStringValue != null)
                {
                    exons[exonIndex] = ParseReference(referenceStringValue.Value, dataStore);
                }
            }
        }
Exemple #9
0
        /// <summary>
        /// returns an array of miRNAs given a list of ObjectValues (AbstractData)
        /// </summary>
        public static SimpleInterval[] ParseList(List <AbstractData> abstractDataList)
        {
            var microRnas = new List <SimpleInterval>();

            foreach (var ad in abstractDataList)
            {
                // skip references
                if (DumperUtilities.IsReference(ad))
                {
                    continue;
                }

                var objectValue = ad as ObjectValue;

                if (objectValue != null)
                {
                    var newMicroRna = Parse(objectValue);
                    if (newMicroRna != null)
                    {
                        // MicroRna oldMicroRna;

                        // if (dataStore.MicroRnas.TryGetValue(newMicroRna, out oldMicroRna))
                        //{
                        //    microRnas.Add(oldMicroRna);
                        //}
                        // else
                        //{
                        microRnas.Add(newMicroRna);
                        // dataStore.MicroRnas[newMicroRna] = newMicroRna;
                        //}
                    }
                }
                else
                {
                    throw new GeneralException(
                              $"Could not transform the AbstractData object into an ObjectValue: [{ad.GetType()}]");
                }
            }

            return(microRnas.Count == 0 ? null : microRnas.ToArray());
        }
Exemple #10
0
        /// <summary>
        /// points to a translation that has already been created
        /// </summary>
        public static void ParseReference(ObjectValue objectValue, DataStructures.VEP.Translation translation, ImportDataStore dataStore)
        {
            // loop over all of the key/value pairs in the translation object
            foreach (AbstractData ad in objectValue)
            {
                if (!DumperUtilities.IsReference(ad))
                {
                    continue;
                }

                // handle each key
                var referenceKeyValue = ad as ReferenceKeyValue;
                if (referenceKeyValue == null)
                {
                    continue;
                }

                switch (referenceKeyValue.Key)
                {
                case AdaptorKey:
                    // skip this key
                    break;

                case EndExonKey:
                    translation.EndExon = Exon.ParseReference(referenceKeyValue.Value, dataStore);
                    break;

                case StartExonKey:
                    translation.StartExon = Exon.ParseReference(referenceKeyValue.Value, dataStore);
                    break;

                case TranscriptKey:
                    translation.Transcript = Transcript.ParseReference(referenceKeyValue.Value, dataStore);
                    break;

                default:
                    throw new GeneralException(
                              $"Found an unhandled reference in the translation object: {ad.Key}");
                }
            }
        }
Exemple #11
0
        /// <summary>
        /// parses the relevant data from each mapper pairs object
        /// </summary>
        public static void ParseListReference(List <AbstractData> abstractDataList, List <DataStructures.VEP.MapperPair> mapperPairs, ImportDataStore dataStore)
        {
            // loop over all of the key/value pairs in the mapper pairs object
            for (int mapperPairIndex = 0; mapperPairIndex < abstractDataList.Count; mapperPairIndex++)
            {
                var mapperNode = abstractDataList[mapperPairIndex];

                // skip normal mapper pairs
                if (!DumperUtilities.IsReference(mapperNode))
                {
                    continue;
                }

                var referenceStringValue = mapperNode as ReferenceStringValue;
                if (referenceStringValue != null)
                {
                    var mapperPair = ParseReference(referenceStringValue.Value, dataStore);
                    mapperPairs[mapperPairIndex] = mapperPair;
                }
            }
        }
Exemple #12
0
        /// <summary>
        /// parses the relevant data from each pair genomic object
        /// </summary>
        public static DataStructures.VEP.PairGenomic Parse(ObjectValue objectValue, ImportDataStore dataStore)
        {
            var pairGenomic = new DataStructures.VEP.PairGenomic();

            // loop over all of the key/value pairs in the pair genomic object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException($"Encountered an unknown key in the pair genomic object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case GenomicKey:
                    var genomicNode = ad as ListObjectKeyValue;
                    if (genomicNode != null)
                    {
                        pairGenomic.Genomic = MapperPair.ParseList(genomicNode.Values, dataStore);
                    }
                    else if (DumperUtilities.IsUndefined(ad))
                    {
                        pairGenomic.Genomic = null;
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            return(pairGenomic);
        }
Exemple #13
0
        /// <summary>
        /// returns an array of exons given a list of ObjectValues (AbstractData)
        /// </summary>
        public static DataStructures.VEP.Exon[] ParseList(List <AbstractData> abstractDataList, ImportDataStore dataStore)
        {
            var exons = new DataStructures.VEP.Exon[abstractDataList.Count];

            // loop over all of the exons
            for (int exonIndex = 0; exonIndex < abstractDataList.Count; exonIndex++)
            {
                // skip references
                if (DumperUtilities.IsReference(abstractDataList[exonIndex]))
                {
                    continue;
                }

                var objectValue = abstractDataList[exonIndex] as ObjectValue;
                if (objectValue != null)
                {
                    var newExon = Parse(objectValue, dataStore.CurrentReferenceIndex);
                    // DS.VEP.Exon oldExon;
                    // if (dataStore.Exons.TryGetValue(newExon, out oldExon))
                    //{
                    //    exons[exonIndex] = oldExon;
                    //}
                    // else
                    //{
                    exons[exonIndex] = newExon;
                    //    dataStore.Exons[newExon] = newExon;
                    //}
                }
                else
                {
                    throw new GeneralException(
                              $"Could not transform the AbstractData object into an ObjectValue: [{abstractDataList[exonIndex].GetType()}]");
                }
            }

            return(exons);
        }
Exemple #14
0
        /// <summary>
        /// parses the relevant data from each mapper unit object
        /// </summary>
        public static DataStructures.VEP.MapperUnit Parse(ObjectValue objectValue, ushort currentReferenceIndex)
        {
            var id    = MapperUnitType.Unknown;
            int end   = -1;
            int start = -1;

            // loop over all of the key/value pairs in the mapper unit object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException($"Encountered an unknown key in the mapper unit object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case Transcript.EndKey:
                    end = DumperUtilities.GetInt32(ad);
                    break;

                case IdKey:
                    id = TranscriptUtilities.GetMapperUnitType(ad);
                    break;

                case Transcript.StartKey:
                    start = DumperUtilities.GetInt32(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            return(new DataStructures.VEP.MapperUnit(currentReferenceIndex, start, end, id));
        }
Exemple #15
0
        /// <summary>
        /// returns a new exon given an ObjectValue
        /// </summary>
        private static DataStructures.VEP.Intron Parse(ObjectValue objectValue, ImportDataStore dataStore)
        {
            var intron = new DataStructures.VEP.Intron();

            // loop over all of the key/value pairs in the intron object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException($"Encountered an unknown key in the dumper mapper object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case Transcript.EndKey:
                    intron.End = DumperUtilities.GetInt32(ad);
                    break;

                case Transcript.SliceKey:
                    var sliceNode = ad as ObjectKeyValue;
                    if (sliceNode != null)
                    {
                        var newSlice = Slice.Parse(sliceNode.Value, dataStore.CurrentReferenceIndex);
                        // DS.VEP.Slice oldSlice;
                        // if (dataStore.Slices.TryGetValue(newSlice, out oldSlice))
                        //{
                        //    intron.Slice = oldSlice;
                        //}
                        // else
                        //{
                        intron.Slice = newSlice;
                        //    dataStore.Slices[newSlice] = newSlice;
                        //}
                    }
                    else if (DumperUtilities.IsReference(ad))
                    {
                        // skip references until the second pass
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue or ReferenceKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case Transcript.StartKey:
                    intron.Start = DumperUtilities.GetInt32(ad);
                    break;

                case Transcript.StrandKey:
                    TranscriptUtilities.GetStrand(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            return(intron);
        }
Exemple #16
0
        /// <summary>
        /// parses the relevant data from each exon coordinate mapper object
        /// </summary>
        public static DataStructures.VEP.Mapper Parse(ObjectValue objectValue, ImportDataStore dataStore)
        {
            var mapper = new DataStructures.VEP.Mapper();

            // loop over all of the key/value pairs in the exon coordinate mapper object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException($"Encountered an unknown key in the dumper mapper object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case FromCoordSystemKey:
                    if (!DumperUtilities.IsUndefined(ad))
                    {
                        throw new GeneralException("Found an unexpected value in FromCoordSystemKey");
                    }
                    break;

                case FromNameKey:
                    mapper.FromType = DumperUtilities.GetString(ad);
                    break;

                case IsSortedKey:
                    mapper.IsSorted = DumperUtilities.GetBool(ad);
                    break;

                case PairCodingDnaKey:
                    var pairCodingDnaNode = ad as ObjectKeyValue;
                    if (pairCodingDnaNode != null)
                    {
                        mapper.PairCodingDna = PairCodingDna.Parse(pairCodingDnaNode.Value, dataStore);
                    }
                    else if (DumperUtilities.IsUndefined(ad))
                    {
                        mapper.PairCodingDna = null;
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case PairCountKey:
                    mapper.PairCount = DumperUtilities.GetInt32(ad);
                    break;

                case PairGenomicKey:
                    var pairGenomicNode = ad as ObjectKeyValue;
                    if (pairGenomicNode != null)
                    {
                        mapper.PairGenomic = PairGenomic.Parse(pairGenomicNode.Value, dataStore);
                    }
                    else if (DumperUtilities.IsUndefined(ad))
                    {
                        mapper.PairGenomic = null;
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case ToCoordSystemKey:
                    if (!DumperUtilities.IsUndefined(ad))
                    {
                        throw new GeneralException("Found an unexpected value in ToCoordSystemKey");
                    }
                    break;

                case ToNameKey:
                    mapper.ToType = DumperUtilities.GetString(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            return(mapper);
        }
Exemple #17
0
        /// <summary>
        /// parses the relevant data from each variant effect feature cache
        /// </summary>
        public static DataStructures.VEP.VariantEffectFeatureCache Parse(ObjectValue objectValue, ImportDataStore dataStore)
        {
            var cache = new DataStructures.VEP.VariantEffectFeatureCache();

            // loop over all of the key/value pairs in the cache object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException(
                              $"Encountered an unknown key in the dumper variant effect feature cache object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case SelenocysteinesKey:
                case ThreePrimeUtrKey:
                case SeqEditsKey:
                case CodonTableKey:
                case ProteinFeaturesKey:
                    // not used
                    break;

                case IntronsKey:
                    var intronsList = ad as ListObjectKeyValue;
                    if (intronsList != null)
                    {
                        cache.Introns = Intron.ParseList(intronsList.Values, dataStore);
                    }
                    else if (DumperUtilities.IsUndefined(ad))
                    {
                        cache.Introns = null;
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into a ListObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case MapperKey:
                    var mapperNode = ad as ObjectKeyValue;
                    if (mapperNode != null)
                    {
                        cache.Mapper = TranscriptMapper.Parse(mapperNode.Value, dataStore);
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case PeptideKey:
                    cache.Peptide = DumperUtilities.GetString(ad);
                    break;

                case ProteinFunctionPredictionsKey:
                    var predictionsNode = ad as ObjectKeyValue;
                    if (predictionsNode != null)
                    {
                        cache.ProteinFunctionPredictions = ProteinFunctionPredictions.Parse(predictionsNode.Value);
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }

                    break;

                case SortedExonsKey:
                    var exonsList = ad as ListObjectKeyValue;
                    if (exonsList != null)
                    {
                        cache.Exons = Exon.ParseList(exonsList.Values, dataStore);
                    }
                    else
                    {
                        throw new GeneralException($"Could not transform the AbstractData object into a ListObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case TranslateableSeqKey:
                    cache.TranslateableSeq = DumperUtilities.GetString(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            return(cache);
        }
Exemple #18
0
        /// <summary>
        /// parses the relevant data from each transcript
        /// </summary>
        public static void Parse(ObjectValue objectValue, int transcriptIndex, ImportDataStore dataStore)
        {
            // Console.WriteLine("*** Parse {0} ***", transcriptIndex + 1);

            var bioType          = BioType.Unknown;
            var geneSymbolSource = GeneSymbolSource.Unknown; // HGNC

            SimpleInterval[]               microRnas   = null;
            DataStructures.VEP.Exon[]      transExons  = null;
            DataStructures.VEP.Gene        gene        = null;
            DataStructures.VEP.Translation translation = null;
            DataStructures.VEP.VariantEffectFeatureCache variantEffectCache = null;
            DataStructures.VEP.Slice slice = null;

            bool onReverseStrand = false;
            bool isCanonical     = false;

            int compDnaCodingStart = -1;
            int compDnaCodingEnd   = -1;

            int  start   = -1;
            int  end     = -1;
            byte version = 1;

            string ccdsId       = null;
            string databaseId   = null;
            string proteinId    = null;
            string refSeqId     = null;
            string geneStableId = null;
            string stableId     = null;

            string geneSymbol = null; // DDX11L1
            int    hgncId     = -1;   // 37102

            // loop over all of the key/value pairs in the transcript object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException($"Encountered an unknown key in the dumper transcript object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case CodingRegionStartKey:
                case CodingRegionEndKey:
                case CreatedDateKey:
                case DescriptionKey:
                case DisplayXrefKey:
                case ExternalDbKey:
                case ExternalDisplayNameKey:
                case ExternalNameKey:
                case ExternalStatusKey:
                case GenePhenotypeKey:
                case ModifiedDateKey:
                case SourceKey:
                case SwissProtKey:
                case TremblKey:
                case UniParcKey:
                    // not used
                    break;

                case AttributesKey:
                    var attributesList = ad as ListObjectKeyValue;
                    if (attributesList != null)
                    {
                        microRnas = Attribute.ParseList(attributesList.Values);
                    }
                    break;

                case BiotypeKey:
                    bioType = TranscriptUtilities.GetBiotype(ad);
                    break;

                case CcdsKey:
                    ccdsId = DumperUtilities.GetString(ad);
                    if (ccdsId == "-" || ccdsId == "")
                    {
                        ccdsId = null;
                    }
                    break;

                case CdnaCodingEndKey:
                    compDnaCodingEnd = DumperUtilities.GetInt32(ad);
                    break;

                case CdnaCodingStartKey:
                    compDnaCodingStart = DumperUtilities.GetInt32(ad);
                    break;

                case DbIdKey:
                    databaseId = DumperUtilities.GetString(ad);
                    if (databaseId == "-" || databaseId == "")
                    {
                        databaseId = null;
                    }
                    break;

                case EndKey:
                    end = DumperUtilities.GetInt32(ad);
                    break;

                case GeneHgncIdKey:
                    var hgnc = DumperUtilities.GetString(ad);
                    if (hgnc != null && hgnc.StartsWith("HGNC:"))
                    {
                        hgnc = hgnc.Substring(5);
                    }
                    if (hgnc == "-" || hgnc == "")
                    {
                        hgnc = null;
                    }

                    if (hgnc != null)
                    {
                        hgncId = int.Parse(hgnc);
                    }
                    break;

                case GeneSymbolKey:
                case GeneHgncKey:     // older key
                    geneSymbol = DumperUtilities.GetString(ad);
                    if (geneSymbol == "-" || geneSymbol == "")
                    {
                        geneSymbol = null;
                    }
                    break;

                case GeneSymbolSourceKey:
                    geneSymbolSource = TranscriptUtilities.GetGeneSymbolSource(ad);
                    break;

                case GeneKey:
                    var geneNode = ad as ObjectKeyValue;
                    if (geneNode != null)
                    {
                        gene = Gene.Parse(geneNode.Value, dataStore.CurrentReferenceIndex);
                    }
                    break;

                case GeneStableIdKey:
                    geneStableId = DumperUtilities.GetString(ad);
                    if (geneStableId == "-" || geneStableId == "")
                    {
                        geneStableId = null;
                    }
                    break;

                case IsCanonicalKey:
                    isCanonical = DumperUtilities.GetBool(ad);
                    break;

                case ProteinKey:
                    proteinId = DumperUtilities.GetString(ad);
                    if (proteinId == "-" || proteinId == "")
                    {
                        proteinId = null;
                    }
                    break;

                case RefseqKey:
                    refSeqId = DumperUtilities.GetString(ad);
                    if (refSeqId == "-" || refSeqId == "")
                    {
                        refSeqId = null;
                    }
                    break;

                case SliceKey:
                    var sliceNode = ad as ObjectKeyValue;
                    if (sliceNode != null)
                    {
                        slice = Slice.Parse(sliceNode.Value, dataStore.CurrentReferenceIndex);
                    }
                    break;

                case StableIdKey:
                    stableId = DumperUtilities.GetString(ad);
                    if (stableId == "-" || stableId == "")
                    {
                        stableId = null;
                    }
                    break;

                case StartKey:
                    start = DumperUtilities.GetInt32(ad);
                    break;

                case StrandKey:
                    onReverseStrand = TranscriptUtilities.GetStrand(ad);
                    break;

                case TransExonArrayKey:
                    var exonsList = ad as ListObjectKeyValue;
                    if (exonsList != null)
                    {
                        transExons = Exon.ParseList(exonsList.Values, dataStore);
                    }
                    else
                    {
                        throw new GeneralException($"Could not transform the AbstractData object into a ListObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case TranslationKey:
                    var translationNode = ad as ObjectKeyValue;
                    if (translationNode != null)
                    {
                        translation = Translation.Parse(translationNode.Value, dataStore);
                    }
                    else if (DumperUtilities.IsUndefined(ad))
                    {
                        translation = null;
                    }
                    else
                    {
                        throw new GeneralException($"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case VariationEffectFeatureCacheKey:
                    var cacheNode = ad as ObjectKeyValue;
                    if (cacheNode == null)
                    {
                        throw new GeneralException($"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    variantEffectCache = VariantEffectFeatureCache.Parse(cacheNode.Value, dataStore);
                    break;

                case VersionKey:
                    version = (byte)DumperUtilities.GetInt32(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            dataStore.Transcripts.Add(new DataStructures.VEP.Transcript(bioType, transExons, gene, translation, variantEffectCache, slice,
                                                                        onReverseStrand, isCanonical, compDnaCodingStart, compDnaCodingEnd, dataStore.CurrentReferenceIndex, start, end,
                                                                        ccdsId, databaseId, proteinId, refSeqId, geneStableId, stableId, geneSymbol, geneSymbolSource, hgncId, version,
                                                                        microRnas));
        }
Exemple #19
0
        /// <summary>
        /// parses the relevant data from each transcript
        /// </summary>
        public static void ParseReferences(ObjectValue objectValue, int transcriptIndex, ImportDataStore dataStore)
        {
            // Console.WriteLine("*** ParseReferences {0} / {1} ***", transcriptIndex + 1, _tempTranscripts.Count);
            var transcript = dataStore.Transcripts[transcriptIndex];

            // loop over all of the key/value pairs in the transcript object
            foreach (AbstractData ad in objectValue)
            {
                // skip undefined keys
                if (DumperUtilities.IsUndefined(ad))
                {
                    continue;
                }

                // handle each key
                ReferenceKeyValue referenceKeyValue;

                // references found in:
                // 'transcript' -> '_variation_effect_feature_cache' -> 'introns' -> 'slice' has references
                // 'transcript' -> 'gene' has references
                // 'transcript' -> 'slice' has references
                // 'transcript' -> '_trans_exon_array' -> [] has references
                // 'transcript' -> 'translation'-> 'end_exon' has references
                // 'transcript' -> 'translation'-> 'start_exon' has references
                // 'transcript' -> 'translation'-> 'transcript' has references

                switch (ad.Key)
                {
                case GeneKey:
                    // works well
                    if (DumperUtilities.IsReference(ad))
                    {
                        referenceKeyValue = ad as ReferenceKeyValue;
                        if (referenceKeyValue != null)
                        {
                            transcript.Gene = Gene.ParseReference(referenceKeyValue.Value, dataStore);
                        }
                    }
                    break;

                case SliceKey:
                    if (DumperUtilities.IsReference(ad))
                    {
                        referenceKeyValue = ad as ReferenceKeyValue;
                        if (referenceKeyValue != null)
                        {
                            transcript.Slice = Slice.ParseReference(referenceKeyValue.Value, dataStore);
                        }
                    }
                    break;

                case TransExonArrayKey:
                    var exonsList = ad as ListObjectKeyValue;
                    if (exonsList != null)
                    {
                        Exon.ParseListReference(exonsList.Values, transcript.TransExons, dataStore);
                    }
                    break;

                case TranslationKey:
                    var translationNode = ad as ObjectKeyValue;
                    if (translationNode != null)
                    {
                        Translation.ParseReference(translationNode.Value, transcript.Translation, dataStore);
                    }
                    break;

                case VariationEffectFeatureCacheKey:
                    var cacheNode = ad as ObjectKeyValue;
                    if (cacheNode != null)
                    {
                        VariantEffectFeatureCache.ParseReference(cacheNode.Value, transcript.VariantEffectCache, dataStore);
                    }
                    break;
                }
            }
        }
Exemple #20
0
        /// <summary>
        /// parses the relevant data from each slice
        /// </summary>
        public static DataStructures.VEP.Slice Parse(ObjectValue objectValue, ushort currentReferenceIndex)
        {
            DataStructures.VEP.CoordSystem coordinateSystem = null;

            bool isCircular      = false;
            bool isTopLevel      = false;
            bool onReverseStrand = false;

            int start = -1;
            int end   = -1;

            int    sequenceRegionLen  = -1;
            string sequenceRegionName = null;

            // loop over all of the key/value pairs in the gene object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException($"Encountered an unknown key in the dumper slice object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case CoordSystemKey:
                    var coordSystemNode = ad as ObjectKeyValue;
                    if (coordSystemNode != null)
                    {
                        coordinateSystem = CoordSystem.Parse(coordSystemNode.Value);
                    }
                    break;

                case CircularKey:
                    isCircular = DumperUtilities.GetBool(ad);
                    break;

                case Transcript.EndKey:
                    end = DumperUtilities.GetInt32(ad);
                    break;

                case SequenceRegionLenKey:
                    sequenceRegionLen = DumperUtilities.GetInt32(ad);
                    break;

                case SequenceRegionNameKey:
                    sequenceRegionName = DumperUtilities.GetString(ad);
                    break;

                case Transcript.StartKey:
                    start = DumperUtilities.GetInt32(ad);
                    break;

                case Transcript.StrandKey:
                    onReverseStrand = TranscriptUtilities.GetStrand(ad);
                    break;

                case TopLevelSliceKey:
                    isTopLevel = DumperUtilities.GetBool(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            return(new DataStructures.VEP.Slice(currentReferenceIndex, start, end, onReverseStrand, isCircular, isTopLevel, coordinateSystem, sequenceRegionLen, sequenceRegionName));
        }
Exemple #21
0
        /// <summary>
        /// parses the relevant data from each translation object
        /// </summary>
        public static DataStructures.VEP.Translation Parse(ObjectValue objectValue, ImportDataStore dataStore)
        {
            var translation = new DataStructures.VEP.Translation();

            // loop over all of the key/value pairs in the translation object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException($"Encountered an unknown key in the dumper mapper object: {ad.Key}");
                }

                // handle each key
                ObjectKeyValue exonNode;
                switch (ad.Key)
                {
                case AdaptorKey:
                case SequenceKey:
                case Transcript.DbIdKey:
                case Transcript.StableIdKey:
                    // skip this key
                    break;

                case EndExonKey:
                    exonNode = ad as ObjectKeyValue;
                    if (exonNode != null)
                    {
                        var newExon = Exon.Parse(exonNode.Value, dataStore.CurrentReferenceIndex);
                        translation.EndExon = newExon;
                    }
                    break;

                case StartExonKey:
                    exonNode = ad as ObjectKeyValue;
                    if (exonNode != null)
                    {
                        var newExon = Exon.Parse(exonNode.Value, dataStore.CurrentReferenceIndex);
                        translation.StartExon = newExon;
                    }
                    break;

                case TranscriptKey:
                    // parse this during the references
                    if (!DumperUtilities.IsReference(ad))
                    {
                        throw new GeneralException("Found a Translation->Transcript entry that wasn't a reference.");
                    }
                    break;

                case Transcript.EndKey:
                    translation.End = DumperUtilities.GetInt32(ad);
                    break;

                case Transcript.StartKey:
                    translation.Start = DumperUtilities.GetInt32(ad);
                    break;

                case Transcript.VersionKey:
                    translation.Version = (byte)DumperUtilities.GetInt32(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            return(translation);
        }
        /// <summary>
        /// parses the relevant data from each protein function predictions object
        /// </summary>
        public static DataStructures.VEP.ProteinFunctionPredictions Parse(ObjectValue objectValue)
        {
            var predictions = new DataStructures.VEP.ProteinFunctionPredictions();

            // loop over all of the key/value pairs in the protein function predictions object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException($"Encountered an unknown key in the dumper mapper object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case PolyPhenHumDivKey:
                    // not used by default
                    break;

                case PolyPhenKey:
                    if (DumperUtilities.IsUndefined(ad))
                    {
                        // do nothing
                    }
                    else
                    {
                        throw new GeneralException($"Could not handle the PolyPhen key: [{ad.GetType()}]");
                    }
                    break;

                case PolyPhenHumVarKey:
                    // used by default
                    var polyPhenHumVarNode = ad as ObjectKeyValue;
                    if (polyPhenHumVarNode != null)
                    {
                        predictions.PolyPhen = PolyPhen.Parse(polyPhenHumVarNode.Value);
                    }
                    else if (DumperUtilities.IsUndefined(ad))
                    {
                        predictions.PolyPhen = null;
                    }
                    else if (DumperUtilities.IsReference(ad))
                    {
                        // skip references for now
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case SiftKey:
                    var siftNode = ad as ObjectKeyValue;
                    if (siftNode != null)
                    {
                        predictions.Sift = Sift.Parse(siftNode.Value);
                    }
                    else if (DumperUtilities.IsUndefined(ad))
                    {
                        predictions.Sift = null;
                    }
                    else if (DumperUtilities.IsReference(ad))
                    {
                        // skip references for now
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            return(predictions);
        }