Beispiel #1
0
        /// <summary>
        /// parses the relevant data from each variant effect feature cache
        /// </summary>
        public static void ParseReference(ObjectValue objectValue, DataStructures.VEP.VariantEffectFeatureCache cache, ImportDataStore dataStore)
        {
            // loop over all of the key/value pairs in the cache object
            foreach (AbstractData ad in objectValue)
            {
                switch (ad.Key)
                {
                case IntronsKey:
                    var intronsList = ad as ListObjectKeyValue;
                    if (intronsList != null)
                    {
                        Intron.ParseListReference(intronsList.Values, cache.Introns, dataStore);
                    }
                    break;

                case MapperKey:
                    var mapperNode = ad as ObjectKeyValue;
                    if (mapperNode != null)
                    {
                        TranscriptMapper.ParseReference(mapperNode.Value, cache.Mapper, dataStore);
                    }
                    break;

                case ProteinFunctionPredictionsKey:
                    var predictionsNode = ad as ObjectKeyValue;
                    if (predictionsNode != null)
                    {
                        ProteinFunctionPredictions.ParseReference(predictionsNode.Value, cache.ProteinFunctionPredictions, dataStore);
                    }
                    break;
                }
            }
        }
Beispiel #2
0
        /// <summary>
        /// parses the relevant data from each variant effect feature cache
        /// </summary>
        public static DataStructures.VEP.VariantEffectFeatureCache Parse(ObjectValue objectValue, ImportDataStore dataStore)
        {
            var cache = new DataStructures.VEP.VariantEffectFeatureCache();

            // loop over all of the key/value pairs in the cache object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException(
                              $"Encountered an unknown key in the dumper variant effect feature cache object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case SelenocysteinesKey:
                case ThreePrimeUtrKey:
                case SeqEditsKey:
                case CodonTableKey:
                case ProteinFeaturesKey:
                    // not used
                    break;

                case IntronsKey:
                    var intronsList = ad as ListObjectKeyValue;
                    if (intronsList != null)
                    {
                        cache.Introns = Intron.ParseList(intronsList.Values, dataStore);
                    }
                    else if (DumperUtilities.IsUndefined(ad))
                    {
                        cache.Introns = null;
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into a ListObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case MapperKey:
                    var mapperNode = ad as ObjectKeyValue;
                    if (mapperNode != null)
                    {
                        cache.Mapper = TranscriptMapper.Parse(mapperNode.Value, dataStore);
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case PeptideKey:
                    cache.Peptide = DumperUtilities.GetString(ad);
                    break;

                case ProteinFunctionPredictionsKey:
                    var predictionsNode = ad as ObjectKeyValue;
                    if (predictionsNode != null)
                    {
                        cache.ProteinFunctionPredictions = ProteinFunctionPredictions.Parse(predictionsNode.Value);
                    }
                    else
                    {
                        throw new GeneralException(
                                  $"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }

                    break;

                case SortedExonsKey:
                    var exonsList = ad as ListObjectKeyValue;
                    if (exonsList != null)
                    {
                        cache.Exons = Exon.ParseList(exonsList.Values, dataStore);
                    }
                    else
                    {
                        throw new GeneralException($"Could not transform the AbstractData object into a ListObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case TranslateableSeqKey:
                    cache.TranslateableSeq = DumperUtilities.GetString(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            return(cache);
        }
Beispiel #3
0
        /// <summary>
        /// parses the relevant data from each transcript
        /// </summary>
        public static void Parse(ObjectValue objectValue, int transcriptIndex, ImportDataStore dataStore)
        {
            // Console.WriteLine("*** Parse {0} ***", transcriptIndex + 1);

            var bioType          = BioType.Unknown;
            var geneSymbolSource = GeneSymbolSource.Unknown; // HGNC

            SimpleInterval[]               microRnas   = null;
            DataStructures.VEP.Exon[]      transExons  = null;
            DataStructures.VEP.Gene        gene        = null;
            DataStructures.VEP.Translation translation = null;
            DataStructures.VEP.VariantEffectFeatureCache variantEffectCache = null;
            DataStructures.VEP.Slice slice = null;

            bool onReverseStrand = false;
            bool isCanonical     = false;

            int compDnaCodingStart = -1;
            int compDnaCodingEnd   = -1;

            int  start   = -1;
            int  end     = -1;
            byte version = 1;

            string ccdsId       = null;
            string databaseId   = null;
            string proteinId    = null;
            string refSeqId     = null;
            string geneStableId = null;
            string stableId     = null;

            string geneSymbol = null; // DDX11L1
            int    hgncId     = -1;   // 37102

            // loop over all of the key/value pairs in the transcript object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException($"Encountered an unknown key in the dumper transcript object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case CodingRegionStartKey:
                case CodingRegionEndKey:
                case CreatedDateKey:
                case DescriptionKey:
                case DisplayXrefKey:
                case ExternalDbKey:
                case ExternalDisplayNameKey:
                case ExternalNameKey:
                case ExternalStatusKey:
                case GenePhenotypeKey:
                case ModifiedDateKey:
                case SourceKey:
                case SwissProtKey:
                case TremblKey:
                case UniParcKey:
                    // not used
                    break;

                case AttributesKey:
                    var attributesList = ad as ListObjectKeyValue;
                    if (attributesList != null)
                    {
                        microRnas = Attribute.ParseList(attributesList.Values);
                    }
                    break;

                case BiotypeKey:
                    bioType = TranscriptUtilities.GetBiotype(ad);
                    break;

                case CcdsKey:
                    ccdsId = DumperUtilities.GetString(ad);
                    if (ccdsId == "-" || ccdsId == "")
                    {
                        ccdsId = null;
                    }
                    break;

                case CdnaCodingEndKey:
                    compDnaCodingEnd = DumperUtilities.GetInt32(ad);
                    break;

                case CdnaCodingStartKey:
                    compDnaCodingStart = DumperUtilities.GetInt32(ad);
                    break;

                case DbIdKey:
                    databaseId = DumperUtilities.GetString(ad);
                    if (databaseId == "-" || databaseId == "")
                    {
                        databaseId = null;
                    }
                    break;

                case EndKey:
                    end = DumperUtilities.GetInt32(ad);
                    break;

                case GeneHgncIdKey:
                    var hgnc = DumperUtilities.GetString(ad);
                    if (hgnc != null && hgnc.StartsWith("HGNC:"))
                    {
                        hgnc = hgnc.Substring(5);
                    }
                    if (hgnc == "-" || hgnc == "")
                    {
                        hgnc = null;
                    }

                    if (hgnc != null)
                    {
                        hgncId = int.Parse(hgnc);
                    }
                    break;

                case GeneSymbolKey:
                case GeneHgncKey:     // older key
                    geneSymbol = DumperUtilities.GetString(ad);
                    if (geneSymbol == "-" || geneSymbol == "")
                    {
                        geneSymbol = null;
                    }
                    break;

                case GeneSymbolSourceKey:
                    geneSymbolSource = TranscriptUtilities.GetGeneSymbolSource(ad);
                    break;

                case GeneKey:
                    var geneNode = ad as ObjectKeyValue;
                    if (geneNode != null)
                    {
                        gene = Gene.Parse(geneNode.Value, dataStore.CurrentReferenceIndex);
                    }
                    break;

                case GeneStableIdKey:
                    geneStableId = DumperUtilities.GetString(ad);
                    if (geneStableId == "-" || geneStableId == "")
                    {
                        geneStableId = null;
                    }
                    break;

                case IsCanonicalKey:
                    isCanonical = DumperUtilities.GetBool(ad);
                    break;

                case ProteinKey:
                    proteinId = DumperUtilities.GetString(ad);
                    if (proteinId == "-" || proteinId == "")
                    {
                        proteinId = null;
                    }
                    break;

                case RefseqKey:
                    refSeqId = DumperUtilities.GetString(ad);
                    if (refSeqId == "-" || refSeqId == "")
                    {
                        refSeqId = null;
                    }
                    break;

                case SliceKey:
                    var sliceNode = ad as ObjectKeyValue;
                    if (sliceNode != null)
                    {
                        slice = Slice.Parse(sliceNode.Value, dataStore.CurrentReferenceIndex);
                    }
                    break;

                case StableIdKey:
                    stableId = DumperUtilities.GetString(ad);
                    if (stableId == "-" || stableId == "")
                    {
                        stableId = null;
                    }
                    break;

                case StartKey:
                    start = DumperUtilities.GetInt32(ad);
                    break;

                case StrandKey:
                    onReverseStrand = TranscriptUtilities.GetStrand(ad);
                    break;

                case TransExonArrayKey:
                    var exonsList = ad as ListObjectKeyValue;
                    if (exonsList != null)
                    {
                        transExons = Exon.ParseList(exonsList.Values, dataStore);
                    }
                    else
                    {
                        throw new GeneralException($"Could not transform the AbstractData object into a ListObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case TranslationKey:
                    var translationNode = ad as ObjectKeyValue;
                    if (translationNode != null)
                    {
                        translation = Translation.Parse(translationNode.Value, dataStore);
                    }
                    else if (DumperUtilities.IsUndefined(ad))
                    {
                        translation = null;
                    }
                    else
                    {
                        throw new GeneralException($"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case VariationEffectFeatureCacheKey:
                    var cacheNode = ad as ObjectKeyValue;
                    if (cacheNode == null)
                    {
                        throw new GeneralException($"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    variantEffectCache = VariantEffectFeatureCache.Parse(cacheNode.Value, dataStore);
                    break;

                case VersionKey:
                    version = (byte)DumperUtilities.GetInt32(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            dataStore.Transcripts.Add(new DataStructures.VEP.Transcript(bioType, transExons, gene, translation, variantEffectCache, slice,
                                                                        onReverseStrand, isCanonical, compDnaCodingStart, compDnaCodingEnd, dataStore.CurrentReferenceIndex, start, end,
                                                                        ccdsId, databaseId, proteinId, refSeqId, geneStableId, stableId, geneSymbol, geneSymbolSource, hgncId, version,
                                                                        microRnas));
        }