/// <summary> /// Fix coding domain sequences that have end frames /// </summary> /// <returns></returns> public void FrameCorrection() { // No coding domains? Nothing to do if (CdsSortedStrand == null || CdsSortedStrand.Count == 0) { return; } CDS cdsFirst = CdsSortedStrand.First(); if (cdsFirst != null) { UTR5Prime utr = cdsFirst.StartFrameCorrection(); if (utr != null) { UTRs.Add(utr); } } CDS cdsLast = CdsSortedStrand.Last(); if (cdsLast != null) { UTR3Prime utr = cdsLast.EndFrameCorrection(RetrieveCodingSequence().Count); if (utr != null) { UTRs.Add(utr); } } _CodingSequence = null; // update this later after this frame update }
/// <summary> /// Get the coding sequence for this transcript. /// SnpEff keeps track of the UTRs to figure this out. I suppose that will work, now that I'm using the interval tree to dive down to change those ranges. /// </summary> /// <returns></returns> public ISequence RetrieveCodingSequence() { if (_CodingSequence != null) { return(_CodingSequence); } // Concatenate all exons List <Exon> exons = ExonsSortedStrand; StringBuilder sequence = new StringBuilder(); int utr5len = 0; int utr3len = 0; // 5 prime UTR length foreach (UTR utr in UTRs.OfType <UTR5Prime>()) { utr5len += (int)utr.Length(); } // Append all exon sequences IAlphabet alphabet = Alphabets.AmbiguousDNA; bool missingSequence = false; foreach (Exon exon in exons) { missingSequence |= exon.Sequence == null; // If there is no sequence, we are in trouble sequence.Append(SequenceExtensions.ConvertToString(exon.Sequence)); // reverse complemented for reverse strand during loading } if (missingSequence) { _CodingSequence = new Sequence(Alphabets.DNA, ""); // One or more exons does not have sequence. Nothing to do } else { // OK, all exons have sequences // 3 prime UTR length foreach (UTR utr in UTRs.OfType <UTR3Prime>()) { utr3len += (int)utr.Length(); } // Cut 5 prime UTR and 3 prime UTR points string dnaSequence = sequence.ToString(); int subEnd = dnaSequence.Length - utr3len; int subLen = subEnd - utr5len; if (utr5len > subEnd) { _CodingSequence = new Sequence(Alphabets.DNA, ""); } else { _CodingSequence = new Sequence(alphabet, dnaSequence.Substring(utr5len, subLen)); } } return(_CodingSequence); }
/// <summary> /// Create UTR regions for this transcript /// </summary> public List <UTR> CreateUTRs() { if (CodingDomainSequences.Count == 0) { return(UTRs); } List <Interval> missing = Exons.OfType <Interval>().ToList(); foreach (Interval interval in UTRs.Concat(CodingDomainSequences.OfType <Interval>().ToList())) { missing = missing.SelectMany(i => i.Minus(interval)).ToList(); } long codingMin = CodingDomainSequences.Select(c => c.OneBasedStart).Min(); long codingMax = CodingDomainSequences.Select(c => c.OneBasedEnd).Max(); foreach (Interval interval in missing) { Exon x = FindExon(interval); if (x == null) { throw new ArgumentException("Cannot find exon for UTR: " + interval.ToString()); } UTR toAdd = null; if (IsStrandPlus()) { if (interval.OneBasedEnd <= codingMin) { toAdd = new UTR5Prime(x, x.ChromosomeID, x.Source, x.Strand, interval.OneBasedStart, interval.OneBasedEnd); } else if (interval.OneBasedStart >= codingMax) { toAdd = new UTR3Prime(x, x.ChromosomeID, x.Source, x.Strand, interval.OneBasedStart, interval.OneBasedEnd); } } else { if (interval.OneBasedStart >= codingMax) { toAdd = new UTR5Prime(x, x.ChromosomeID, x.Source, x.Strand, interval.OneBasedStart, interval.OneBasedEnd); } else if (interval.OneBasedEnd <= codingMin) { toAdd = new UTR3Prime(x, x.ChromosomeID, x.Source, x.Strand, interval.OneBasedStart, interval.OneBasedEnd); } } // OK? if (toAdd != null) { UTRs.Add(toAdd); } } return(UTRs); }
/// <summary> /// Calculate base number in a CDS where 'pos' maps /// /// usePrevBaseIntron: When 'pos' is intronic this method returns: /// - if(usePrevBaseIntron== false) => The first base in the exon after 'pos' (i.e.first coding base after intron) /// - if(usePrevBaseIntron== true) => The last base in the exon before 'pos' (i.e.last coding base before intron) /// /// </summary> /// <param name="pos"></param> /// <param name="usePrevBaseIntron"></param> /// <returns>Base number or '-1' if it does not map to a coding base</returns> public long BaseNumberCds(long pos, bool usePrevBaseIntron) { // Doesn't hit this transcript? if (!Intersects(pos)) { return(-1); } // Is it in UTR instead of CDS? if (UTRs.Any(utr => utr.Intersects(pos))) { return(-1); } // Calculate cdsStart and cdsEnd (if not already done) CalcCdsStartEnd(); // All exons.. long firstCdsBaseInExon = 0; // Where the exon maps to the CDS (i.e. which CDS base number does the first base in this exon maps to). foreach (Exon eint in ExonsSortedStrand) { if (eint.Intersects(pos)) { long cdsBaseInExon = IsStrandPlus() ? // cdsBaseInExon: base number relative to the beginning of the coding part of this exon (i.e. excluding 5'UTRs) pos - Math.Max(eint.OneBasedStart, CdsOneBasedStart) : Math.Min(eint.OneBasedEnd, CdsOneBasedStart) - pos; cdsBaseInExon = Math.Max(0, cdsBaseInExon); return(firstCdsBaseInExon + cdsBaseInExon); } else { // Before exon begins? if (IsStrandPlus() && pos < eint.OneBasedStart || // Before exon begins (positive strand)? IsStrandMinus() && pos > eint.OneBasedEnd) // Before exon begins (negative strand)? { return(firstCdsBaseInExon - (usePrevBaseIntron ? 1 : 0)); } } firstCdsBaseInExon += IsStrandPlus() ? Math.Max(0, eint.OneBasedEnd - Math.Max(eint.OneBasedStart, CdsOneBasedStart) + 1) : Math.Max(0, Math.Min(CdsOneBasedStart, eint.OneBasedEnd) - eint.OneBasedStart + 1); } return(firstCdsBaseInExon - 1); }
public List <UTR5Prime> get5primeUtrs() { if (UTRs == null) { Transcript tr = (Transcript)FindParent(typeof(Transcript)); // Get UTRs and sort them UTRs = tr.UTRs.OfType <UTR5Prime>().ToList(); if (IsStrandPlus()) { UTRs = UTRs.OrderBy(u => u.OneBasedStart).ToList(); // Sort by start position } else { UTRs = UTRs.OrderByDescending(u => u.OneBasedEnd).ToList(); // Sort by end position (reversed) } } return(UTRs); }