예제 #1
0
        /// <summary>
        /// Fix coding domain sequences that have end frames
        /// </summary>
        /// <returns></returns>
        public void FrameCorrection()
        {
            // No coding domains? Nothing to do
            if (CdsSortedStrand == null || CdsSortedStrand.Count == 0)
            {
                return;
            }

            CDS cdsFirst = CdsSortedStrand.First();

            if (cdsFirst != null)
            {
                UTR5Prime utr = cdsFirst.StartFrameCorrection();
                if (utr != null)
                {
                    UTRs.Add(utr);
                }
            }

            CDS cdsLast = CdsSortedStrand.Last();

            if (cdsLast != null)
            {
                UTR3Prime utr = cdsLast.EndFrameCorrection(RetrieveCodingSequence().Count);
                if (utr != null)
                {
                    UTRs.Add(utr);
                }
            }

            _CodingSequence = null; // update this later after this frame update
        }
예제 #2
0
        /// <summary>
        /// Get the coding sequence for this transcript.
        /// SnpEff keeps track of the UTRs to figure this out. I suppose that will work, now that I'm using the interval tree to dive down to change those ranges.
        /// </summary>
        /// <returns></returns>
        public ISequence RetrieveCodingSequence()
        {
            if (_CodingSequence != null)
            {
                return(_CodingSequence);
            }

            // Concatenate all exons
            List <Exon>   exons    = ExonsSortedStrand;
            StringBuilder sequence = new StringBuilder();
            int           utr5len  = 0;
            int           utr3len  = 0;

            // 5 prime UTR length
            foreach (UTR utr in UTRs.OfType <UTR5Prime>())
            {
                utr5len += (int)utr.Length();
            }

            // Append all exon sequences
            IAlphabet alphabet        = Alphabets.AmbiguousDNA;
            bool      missingSequence = false;

            foreach (Exon exon in exons)
            {
                missingSequence |= exon.Sequence == null;                           // If there is no sequence, we are in trouble
                sequence.Append(SequenceExtensions.ConvertToString(exon.Sequence)); // reverse complemented for reverse strand during loading
            }

            if (missingSequence)
            {
                _CodingSequence = new Sequence(Alphabets.DNA, ""); // One or more exons does not have sequence. Nothing to do
            }
            else
            {
                // OK, all exons have sequences

                // 3 prime UTR length
                foreach (UTR utr in UTRs.OfType <UTR3Prime>())
                {
                    utr3len += (int)utr.Length();
                }

                // Cut 5 prime UTR and 3 prime UTR points
                string dnaSequence = sequence.ToString();
                int    subEnd      = dnaSequence.Length - utr3len;
                int    subLen      = subEnd - utr5len;

                if (utr5len > subEnd)
                {
                    _CodingSequence = new Sequence(Alphabets.DNA, "");
                }
                else
                {
                    _CodingSequence = new Sequence(alphabet, dnaSequence.Substring(utr5len, subLen));
                }
            }
            return(_CodingSequence);
        }
예제 #3
0
        /// <summary>
        /// Create UTR regions for this transcript
        /// </summary>
        public List <UTR> CreateUTRs()
        {
            if (CodingDomainSequences.Count == 0)
            {
                return(UTRs);
            }

            List <Interval> missing = Exons.OfType <Interval>().ToList();

            foreach (Interval interval in UTRs.Concat(CodingDomainSequences.OfType <Interval>().ToList()))
            {
                missing = missing.SelectMany(i => i.Minus(interval)).ToList();
            }

            long codingMin = CodingDomainSequences.Select(c => c.OneBasedStart).Min();
            long codingMax = CodingDomainSequences.Select(c => c.OneBasedEnd).Max();

            foreach (Interval interval in missing)
            {
                Exon x = FindExon(interval);
                if (x == null)
                {
                    throw new ArgumentException("Cannot find exon for UTR: " + interval.ToString());
                }

                UTR toAdd = null;
                if (IsStrandPlus())
                {
                    if (interval.OneBasedEnd <= codingMin)
                    {
                        toAdd = new UTR5Prime(x, x.ChromosomeID, x.Source, x.Strand, interval.OneBasedStart, interval.OneBasedEnd);
                    }
                    else if (interval.OneBasedStart >= codingMax)
                    {
                        toAdd = new UTR3Prime(x, x.ChromosomeID, x.Source, x.Strand, interval.OneBasedStart, interval.OneBasedEnd);
                    }
                }
                else
                {
                    if (interval.OneBasedStart >= codingMax)
                    {
                        toAdd = new UTR5Prime(x, x.ChromosomeID, x.Source, x.Strand, interval.OneBasedStart, interval.OneBasedEnd);
                    }
                    else if (interval.OneBasedEnd <= codingMin)
                    {
                        toAdd = new UTR3Prime(x, x.ChromosomeID, x.Source, x.Strand, interval.OneBasedStart, interval.OneBasedEnd);
                    }
                }

                // OK?
                if (toAdd != null)
                {
                    UTRs.Add(toAdd);
                }
            }
            return(UTRs);
        }
예제 #4
0
        /// <summary>
        /// Calculate base number in a CDS where 'pos' maps
        ///
        /// usePrevBaseIntron: When 'pos' is intronic this method returns:
        ///             - if(usePrevBaseIntron== false)  => The first base in the exon after 'pos' (i.e.first coding base after intron)
        ///             - if(usePrevBaseIntron== true)   => The last base in the exon before 'pos'  (i.e.last coding base before intron)
        ///
        /// </summary>
        /// <param name="pos"></param>
        /// <param name="usePrevBaseIntron"></param>
        /// <returns>Base number or '-1' if it does not map to a coding base</returns>
        public long BaseNumberCds(long pos, bool usePrevBaseIntron)
        {
            // Doesn't hit this transcript?
            if (!Intersects(pos))
            {
                return(-1);
            }

            // Is it in UTR instead of CDS?
            if (UTRs.Any(utr => utr.Intersects(pos)))
            {
                return(-1);
            }

            // Calculate cdsStart and cdsEnd (if not already done)
            CalcCdsStartEnd();

            // All exons..
            long firstCdsBaseInExon = 0; // Where the exon maps to the CDS (i.e. which CDS base number does the first base in this exon maps to).

            foreach (Exon eint in ExonsSortedStrand)
            {
                if (eint.Intersects(pos))
                {
                    long cdsBaseInExon = IsStrandPlus() ? // cdsBaseInExon: base number relative to the beginning of the coding part of this exon (i.e. excluding 5'UTRs)
                                         pos - Math.Max(eint.OneBasedStart, CdsOneBasedStart) :
                                         Math.Min(eint.OneBasedEnd, CdsOneBasedStart) - pos;

                    cdsBaseInExon = Math.Max(0, cdsBaseInExon);

                    return(firstCdsBaseInExon + cdsBaseInExon);
                }
                else
                {
                    // Before exon begins?
                    if (IsStrandPlus() && pos < eint.OneBasedStart || // Before exon begins (positive strand)?
                        IsStrandMinus() && pos > eint.OneBasedEnd)        // Before exon begins (negative strand)?
                    {
                        return(firstCdsBaseInExon - (usePrevBaseIntron ? 1 : 0));
                    }
                }

                firstCdsBaseInExon += IsStrandPlus() ?
                                      Math.Max(0, eint.OneBasedEnd - Math.Max(eint.OneBasedStart, CdsOneBasedStart) + 1) :
                                      Math.Max(0, Math.Min(CdsOneBasedStart, eint.OneBasedEnd) - eint.OneBasedStart + 1);
            }

            return(firstCdsBaseInExon - 1);
        }
예제 #5
0
        public List <UTR5Prime> get5primeUtrs()
        {
            if (UTRs == null)
            {
                Transcript tr = (Transcript)FindParent(typeof(Transcript));

                // Get UTRs and sort them
                UTRs = tr.UTRs.OfType <UTR5Prime>().ToList();
                if (IsStrandPlus())
                {
                    UTRs = UTRs.OrderBy(u => u.OneBasedStart).ToList(); // Sort by start position
                }
                else
                {
                    UTRs = UTRs.OrderByDescending(u => u.OneBasedEnd).ToList(); // Sort by end position (reversed)
                }
            }

            return(UTRs);
        }