Ejemplo n.º 1
0
        /// <summary>
        /// points to a translation that has already been created
        /// </summary>
        public static void ParseReference(ObjectValue objectValue, DataStructures.VEP.Translation translation, ImportDataStore dataStore)
        {
            // loop over all of the key/value pairs in the translation object
            foreach (AbstractData ad in objectValue)
            {
                if (!DumperUtilities.IsReference(ad))
                {
                    continue;
                }

                // handle each key
                var referenceKeyValue = ad as ReferenceKeyValue;
                if (referenceKeyValue == null)
                {
                    continue;
                }

                switch (referenceKeyValue.Key)
                {
                case AdaptorKey:
                    // skip this key
                    break;

                case EndExonKey:
                    translation.EndExon = Exon.ParseReference(referenceKeyValue.Value, dataStore);
                    break;

                case StartExonKey:
                    translation.StartExon = Exon.ParseReference(referenceKeyValue.Value, dataStore);
                    break;

                case TranscriptKey:
                    translation.Transcript = Transcript.ParseReference(referenceKeyValue.Value, dataStore);
                    break;

                default:
                    throw new GeneralException(
                              $"Found an unhandled reference in the translation object: {ad.Key}");
                }
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// parses the relevant data from each transcript
        /// </summary>
        public static void Parse(ObjectValue objectValue, int transcriptIndex, ImportDataStore dataStore)
        {
            // Console.WriteLine("*** Parse {0} ***", transcriptIndex + 1);

            var bioType          = BioType.Unknown;
            var geneSymbolSource = GeneSymbolSource.Unknown; // HGNC

            SimpleInterval[]               microRnas   = null;
            DataStructures.VEP.Exon[]      transExons  = null;
            DataStructures.VEP.Gene        gene        = null;
            DataStructures.VEP.Translation translation = null;
            DataStructures.VEP.VariantEffectFeatureCache variantEffectCache = null;
            DataStructures.VEP.Slice slice = null;

            bool onReverseStrand = false;
            bool isCanonical     = false;

            int compDnaCodingStart = -1;
            int compDnaCodingEnd   = -1;

            int  start   = -1;
            int  end     = -1;
            byte version = 1;

            string ccdsId       = null;
            string databaseId   = null;
            string proteinId    = null;
            string refSeqId     = null;
            string geneStableId = null;
            string stableId     = null;

            string geneSymbol = null; // DDX11L1
            int    hgncId     = -1;   // 37102

            // loop over all of the key/value pairs in the transcript object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException($"Encountered an unknown key in the dumper transcript object: {ad.Key}");
                }

                // handle each key
                switch (ad.Key)
                {
                case CodingRegionStartKey:
                case CodingRegionEndKey:
                case CreatedDateKey:
                case DescriptionKey:
                case DisplayXrefKey:
                case ExternalDbKey:
                case ExternalDisplayNameKey:
                case ExternalNameKey:
                case ExternalStatusKey:
                case GenePhenotypeKey:
                case ModifiedDateKey:
                case SourceKey:
                case SwissProtKey:
                case TremblKey:
                case UniParcKey:
                    // not used
                    break;

                case AttributesKey:
                    var attributesList = ad as ListObjectKeyValue;
                    if (attributesList != null)
                    {
                        microRnas = Attribute.ParseList(attributesList.Values);
                    }
                    break;

                case BiotypeKey:
                    bioType = TranscriptUtilities.GetBiotype(ad);
                    break;

                case CcdsKey:
                    ccdsId = DumperUtilities.GetString(ad);
                    if (ccdsId == "-" || ccdsId == "")
                    {
                        ccdsId = null;
                    }
                    break;

                case CdnaCodingEndKey:
                    compDnaCodingEnd = DumperUtilities.GetInt32(ad);
                    break;

                case CdnaCodingStartKey:
                    compDnaCodingStart = DumperUtilities.GetInt32(ad);
                    break;

                case DbIdKey:
                    databaseId = DumperUtilities.GetString(ad);
                    if (databaseId == "-" || databaseId == "")
                    {
                        databaseId = null;
                    }
                    break;

                case EndKey:
                    end = DumperUtilities.GetInt32(ad);
                    break;

                case GeneHgncIdKey:
                    var hgnc = DumperUtilities.GetString(ad);
                    if (hgnc != null && hgnc.StartsWith("HGNC:"))
                    {
                        hgnc = hgnc.Substring(5);
                    }
                    if (hgnc == "-" || hgnc == "")
                    {
                        hgnc = null;
                    }

                    if (hgnc != null)
                    {
                        hgncId = int.Parse(hgnc);
                    }
                    break;

                case GeneSymbolKey:
                case GeneHgncKey:     // older key
                    geneSymbol = DumperUtilities.GetString(ad);
                    if (geneSymbol == "-" || geneSymbol == "")
                    {
                        geneSymbol = null;
                    }
                    break;

                case GeneSymbolSourceKey:
                    geneSymbolSource = TranscriptUtilities.GetGeneSymbolSource(ad);
                    break;

                case GeneKey:
                    var geneNode = ad as ObjectKeyValue;
                    if (geneNode != null)
                    {
                        gene = Gene.Parse(geneNode.Value, dataStore.CurrentReferenceIndex);
                    }
                    break;

                case GeneStableIdKey:
                    geneStableId = DumperUtilities.GetString(ad);
                    if (geneStableId == "-" || geneStableId == "")
                    {
                        geneStableId = null;
                    }
                    break;

                case IsCanonicalKey:
                    isCanonical = DumperUtilities.GetBool(ad);
                    break;

                case ProteinKey:
                    proteinId = DumperUtilities.GetString(ad);
                    if (proteinId == "-" || proteinId == "")
                    {
                        proteinId = null;
                    }
                    break;

                case RefseqKey:
                    refSeqId = DumperUtilities.GetString(ad);
                    if (refSeqId == "-" || refSeqId == "")
                    {
                        refSeqId = null;
                    }
                    break;

                case SliceKey:
                    var sliceNode = ad as ObjectKeyValue;
                    if (sliceNode != null)
                    {
                        slice = Slice.Parse(sliceNode.Value, dataStore.CurrentReferenceIndex);
                    }
                    break;

                case StableIdKey:
                    stableId = DumperUtilities.GetString(ad);
                    if (stableId == "-" || stableId == "")
                    {
                        stableId = null;
                    }
                    break;

                case StartKey:
                    start = DumperUtilities.GetInt32(ad);
                    break;

                case StrandKey:
                    onReverseStrand = TranscriptUtilities.GetStrand(ad);
                    break;

                case TransExonArrayKey:
                    var exonsList = ad as ListObjectKeyValue;
                    if (exonsList != null)
                    {
                        transExons = Exon.ParseList(exonsList.Values, dataStore);
                    }
                    else
                    {
                        throw new GeneralException($"Could not transform the AbstractData object into a ListObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case TranslationKey:
                    var translationNode = ad as ObjectKeyValue;
                    if (translationNode != null)
                    {
                        translation = Translation.Parse(translationNode.Value, dataStore);
                    }
                    else if (DumperUtilities.IsUndefined(ad))
                    {
                        translation = null;
                    }
                    else
                    {
                        throw new GeneralException($"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    break;

                case VariationEffectFeatureCacheKey:
                    var cacheNode = ad as ObjectKeyValue;
                    if (cacheNode == null)
                    {
                        throw new GeneralException($"Could not transform the AbstractData object into an ObjectKeyValue: [{ad.GetType()}]");
                    }
                    variantEffectCache = VariantEffectFeatureCache.Parse(cacheNode.Value, dataStore);
                    break;

                case VersionKey:
                    version = (byte)DumperUtilities.GetInt32(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            dataStore.Transcripts.Add(new DataStructures.VEP.Transcript(bioType, transExons, gene, translation, variantEffectCache, slice,
                                                                        onReverseStrand, isCanonical, compDnaCodingStart, compDnaCodingEnd, dataStore.CurrentReferenceIndex, start, end,
                                                                        ccdsId, databaseId, proteinId, refSeqId, geneStableId, stableId, geneSymbol, geneSymbolSource, hgncId, version,
                                                                        microRnas));
        }
Ejemplo n.º 3
0
        /// <summary>
        /// parses the relevant data from each translation object
        /// </summary>
        public static DataStructures.VEP.Translation Parse(ObjectValue objectValue, ImportDataStore dataStore)
        {
            var translation = new DataStructures.VEP.Translation();

            // loop over all of the key/value pairs in the translation object
            foreach (AbstractData ad in objectValue)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(ad.Key))
                {
                    throw new GeneralException($"Encountered an unknown key in the dumper mapper object: {ad.Key}");
                }

                // handle each key
                ObjectKeyValue exonNode;
                switch (ad.Key)
                {
                case AdaptorKey:
                case SequenceKey:
                case Transcript.DbIdKey:
                case Transcript.StableIdKey:
                    // skip this key
                    break;

                case EndExonKey:
                    exonNode = ad as ObjectKeyValue;
                    if (exonNode != null)
                    {
                        var newExon = Exon.Parse(exonNode.Value, dataStore.CurrentReferenceIndex);
                        translation.EndExon = newExon;
                    }
                    break;

                case StartExonKey:
                    exonNode = ad as ObjectKeyValue;
                    if (exonNode != null)
                    {
                        var newExon = Exon.Parse(exonNode.Value, dataStore.CurrentReferenceIndex);
                        translation.StartExon = newExon;
                    }
                    break;

                case TranscriptKey:
                    // parse this during the references
                    if (!DumperUtilities.IsReference(ad))
                    {
                        throw new GeneralException("Found a Translation->Transcript entry that wasn't a reference.");
                    }
                    break;

                case Transcript.EndKey:
                    translation.End = DumperUtilities.GetInt32(ad);
                    break;

                case Transcript.StartKey:
                    translation.Start = DumperUtilities.GetInt32(ad);
                    break;

                case Transcript.VersionKey:
                    translation.Version = (byte)DumperUtilities.GetInt32(ad);
                    break;

                default:
                    throw new GeneralException($"Unknown key found: {ad.Key}");
                }
            }

            return(translation);
        }