コード例 #1
0
ファイル: ImportExon.cs プロジェクト: zhouhufeng/Nirvana
        /// <summary>
        /// returns an array of exons given a list of ObjectValues (AbstractData)
        /// </summary>
        public static MutableExon[] ParseList(IImportNode importNode, IChromosome chromosome)
        {
            var listMembers = importNode.GetListMembers();

            if (listMembers == null)
            {
                throw new InvalidDataException("Encountered an exon node that could not be converted to a member list.");
            }

            var exons = new MutableExon[listMembers.Count];

            for (var exonIndex = 0; exonIndex < listMembers.Count; exonIndex++)
            {
                if (listMembers[exonIndex] is ObjectValueNode objectValue)
                {
                    exons[exonIndex] = Parse(objectValue, chromosome);
                }
                else
                {
                    throw new InvalidDataException($"Could not transform the AbstractData object into an ObjectValue: [{listMembers[exonIndex].GetType()}]");
                }
            }

            return(exons);
        }
コード例 #2
0
        private MutableExon[] ReadExons(IChromosome chromosome)
        {
            var cols = GetColumns("Exons");

            int numExons = int.Parse(cols[1]);

            if (numExons == 0)
            {
                return(null);
            }

            var exons    = new MutableExon[numExons];
            var colIndex = 2;

            for (var i = 0; i < numExons; i++)
            {
                int start = int.Parse(cols[colIndex++]);
                int end   = int.Parse(cols[colIndex++]);
                var phase = (byte)(int.Parse(cols[colIndex++]) + 1);
                exons[i] = new MutableExon(chromosome, start, end, phase);
            }

            return(exons);
        }
コード例 #3
0
                       endExon) Parse(IImportNode importNode, IChromosome currentChromosome)
        {
            var objectValue = importNode.GetObjectValueNode();

            if (objectValue == null)
            {
                throw new InvalidDataException("Encountered a translation import node that could not be converted to an object value node.");
            }

            int         start          = -1;
            int         end            = -1;
            string      proteinId      = null;
            byte        proteinVersion = 0;
            MutableExon startExon      = null;
            MutableExon endExon        = null;

            foreach (var node in objectValue.Values)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(node.Key))
                {
                    throw new InvalidDataException($"Encountered an unknown key in the dumper mapper object: {node.Key}");
                }

                ObjectKeyValueNode exonNode;

                switch (node.Key)
                {
                case ImportKeys.Adaptor:
                case ImportKeys.Sequence:
                case ImportKeys.DbId:
                case ImportKeys.Transcript:
                    // skip this key
                    break;

                case ImportKeys.StartExon:
                    exonNode = node as ObjectKeyValueNode;
                    if (exonNode != null)
                    {
                        startExon = ImportExon.Parse(exonNode.Value, currentChromosome);
                    }
                    break;

                case ImportKeys.EndExon:
                    exonNode = node as ObjectKeyValueNode;
                    if (exonNode != null)
                    {
                        endExon = ImportExon.Parse(exonNode.Value, currentChromosome);
                    }
                    break;

                case ImportKeys.StableId:
                    proteinId = node.GetString();
                    break;

                case ImportKeys.End:
                    end = node.GetInt32();
                    break;

                case ImportKeys.Start:
                    start = node.GetInt32();
                    break;

                case ImportKeys.Version:
                    proteinVersion = (byte)node.GetInt32();
                    break;

                default:
                    throw new InvalidDataException($"Unknown key found: {node.Key}");
                }
            }

            return(start, end, proteinId, proteinVersion, startExon, endExon);
        }
コード例 #4
0
        /// <summary>
        /// parses the relevant data from each transcript
        /// </summary>
        public static MutableTranscript Parse(ObjectValueNode objectValue, IChromosome chromosome, Source source)
        {
            // IDs
            string transcriptId      = null;
            byte   transcriptVersion = 1;
            string proteinId         = null;
            byte   proteinVersion    = 0;
            string ccdsId            = null;
            string refSeqId          = null;
            string geneId            = null;
            int    hgncId            = -1;

            // gene
            int    geneStart           = -1;
            int    geneEnd             = -1;
            var    geneOnReverseStrand = false;
            string geneSymbol          = null;
            var    geneSymbolSource    = GeneSymbolSource.Unknown;

            // translation
            int         translationStart     = -1;
            int         translationEnd       = -1;
            MutableExon translationStartExon = null;
            MutableExon translationEndExon   = null;

            // predictions
            string siftData     = null;
            string polyphenData = null;

            var bioType = BioType.other;

            IInterval[] microRnas = null;
            MutableTranscriptRegion[] cdnaMaps = null;
            IInterval[] introns               = null;
            string      peptideSequence       = null;
            string      translateableSequence = null;
            var         isCanonical           = false;
            int         compDnaCodingStart    = -1;
            int         compDnaCodingEnd      = -1;
            int         start = -1;
            int         end   = -1;

            MutableExon[] exons            = null;
            var           cdsStartNotFound = false;
            var           cdsEndNotFound   = false;

            int[]      selenocysteinePositions = null;
            IRnaEdit[] rnaEdits      = null;
            string     bamEditStatus = null;

            foreach (var node in objectValue.Values)
            {
                // sanity check: make sure we know about the keys are used for
                if (!KnownKeys.Contains(node.Key))
                {
                    throw new InvalidDataException($"Encountered an unknown key in the dumper transcript object: {node.Key}");
                }

                // handle each key
                switch (node.Key)
                {
                case ImportKeys.CodingRegionEnd:
                case ImportKeys.CodingRegionStart:
                case ImportKeys.CreatedDate:
                case ImportKeys.DbId:
                case ImportKeys.Description:
                case ImportKeys.DisplayXref:
                case ImportKeys.ExternalDb:
                case ImportKeys.ExternalDisplayName:
                case ImportKeys.ExternalName:
                case ImportKeys.ExternalStatus:
                case ImportKeys.GenePhenotype:
                case ImportKeys.GeneStableId:
                case ImportKeys.ModifiedDate:
                case ImportKeys.Protein:
                case ImportKeys.Slice:
                case ImportKeys.Source:
                case ImportKeys.Strand:
                case ImportKeys.SwissProt:
                case ImportKeys.Trembl:
                case ImportKeys.UniParc:
                case ImportKeys.VepLazyLoaded:
                    // not used
                    break;

                case ImportKeys.BamEditStatus:
                    bamEditStatus = node.GetString();
                    break;

                case ImportKeys.Attributes:
                    (microRnas, rnaEdits, cdsStartNotFound, cdsEndNotFound) = Attribute.ParseList(node);
                    break;

                case ImportKeys.Biotype:
                    bioType = TranscriptUtilities.GetBiotype(node);
                    break;

                case ImportKeys.Ccds:
                    ccdsId = node.GetString();
                    break;

                case ImportKeys.CdnaCodingEnd:
                    compDnaCodingEnd = node.GetInt32();
                    break;

                case ImportKeys.CdnaCodingStart:
                    compDnaCodingStart = node.GetInt32();
                    break;

                case ImportKeys.End:
                    end = node.GetInt32();
                    break;

                case ImportKeys.GeneHgncId:
                    hgncId = node.GetHgncId();
                    break;

                case ImportKeys.GeneSymbol:
                case ImportKeys.GeneHgnc:     // older key
                    geneSymbol = node.GetString();
                    break;

                case ImportKeys.GeneSymbolSource:
                    geneSymbolSource = GeneSymbolSourceHelper.GetGeneSymbolSource(node.GetString());
                    break;

                case ImportKeys.Gene:
                    (geneStart, geneEnd, geneId, geneOnReverseStrand) = ImportGene.Parse(node);
                    break;

                case ImportKeys.IsCanonical:
                    isCanonical = node.GetBool();
                    break;

                case ImportKeys.Refseq:
                    refSeqId = node.GetString();
                    break;

                case ImportKeys.StableId:
                    transcriptId = node.GetString();
                    break;

                case ImportKeys.Start:
                    start = node.GetInt32();
                    break;

                case ImportKeys.TransExonArray:
                    exons = ImportExon.ParseList(node, chromosome);
                    break;

                case ImportKeys.Translation:
                    (translationStart, translationEnd, proteinId, proteinVersion, translationStartExon, translationEndExon) = ImportTranslation.Parse(node, chromosome);
                    break;

                case ImportKeys.VariationEffectFeatureCache:
                    (cdnaMaps, introns, peptideSequence, translateableSequence, siftData, polyphenData, selenocysteinePositions) = ImportVariantEffectFeatureCache.Parse(node);
                    break;

                case ImportKeys.Version:
                    transcriptVersion = (byte)node.GetInt32();
                    break;

                default:
                    throw new InvalidDataException($"Unknown key found: {node.Key}");
                }
            }

            var fixedTranscript = AccessionUtilities.GetMaxVersion(transcriptId, transcriptVersion);
            var fixedProtein    = AccessionUtilities.GetMaxVersion(proteinId, proteinVersion);

            var gene = new MutableGene(chromosome, geneStart, geneEnd, geneOnReverseStrand, geneSymbol,
                                       geneSymbolSource, geneId, hgncId);

            var codingRegion = new CodingRegion(GetCodingRegionStart(geneOnReverseStrand, translationStartExon, translationEndExon, translationStart, translationEnd),
                                                GetCodingRegionEnd(geneOnReverseStrand, translationStartExon, translationEndExon, translationStart, translationEnd),
                                                compDnaCodingStart, compDnaCodingEnd, 0);

            int totalExonLength = GetTotalExonLength(exons);
            int startExonPhase  = translationStartExon?.Phase ?? int.MinValue;

            return(new MutableTranscript(chromosome, start, end, fixedTranscript.Id, fixedTranscript.Version, ccdsId,
                                         refSeqId, bioType, isCanonical, codingRegion, fixedProtein.Id, fixedProtein.Version,
                                         peptideSequence, source, gene, exons, startExonPhase, totalExonLength, introns, cdnaMaps,
                                         siftData, polyphenData, translateableSequence, microRnas, cdsStartNotFound, cdsEndNotFound,
                                         selenocysteinePositions, rnaEdits, bamEditStatus));
        }
コード例 #5
0
 private static bool ExonEquals(MutableExon x, MutableExon y)
 {
     return(x.Start == y.Start &&
            x.End == y.End &&
            x.Phase == y.Phase);
 }