/// <summary> /// Create UTR regions for this transcript /// </summary> public List <UTR> CreateUTRs() { if (CodingDomainSequences.Count == 0) { return(UTRs); } List <Interval> missing = Exons.OfType <Interval>().ToList(); foreach (Interval interval in UTRs.Concat(CodingDomainSequences.OfType <Interval>().ToList())) { missing = missing.SelectMany(i => i.Minus(interval)).ToList(); } long codingMin = CodingDomainSequences.Select(c => c.OneBasedStart).Min(); long codingMax = CodingDomainSequences.Select(c => c.OneBasedEnd).Max(); foreach (Interval interval in missing) { Exon x = FindExon(interval); if (x == null) { throw new ArgumentException("Cannot find exon for UTR: " + interval.ToString()); } UTR toAdd = null; if (IsStrandPlus()) { if (interval.OneBasedEnd <= codingMin) { toAdd = new UTR5Prime(x, x.ChromosomeID, x.Source, x.Strand, interval.OneBasedStart, interval.OneBasedEnd); } else if (interval.OneBasedStart >= codingMax) { toAdd = new UTR3Prime(x, x.ChromosomeID, x.Source, x.Strand, interval.OneBasedStart, interval.OneBasedEnd); } } else { if (interval.OneBasedStart >= codingMax) { toAdd = new UTR5Prime(x, x.ChromosomeID, x.Source, x.Strand, interval.OneBasedStart, interval.OneBasedEnd); } else if (interval.OneBasedEnd <= codingMin) { toAdd = new UTR3Prime(x, x.ChromosomeID, x.Source, x.Strand, interval.OneBasedStart, interval.OneBasedEnd); } } // OK? if (toAdd != null) { UTRs.Add(toAdd); } } return(UTRs); }
/// <summary> /// Creates coding domains based on another annotated transcript /// </summary> /// <param name="withCDS"></param> /// <returns>true if this transcript was annotated; false if the transcript with CDS did not lead to an annotation</returns> public bool CreateCDSFromAnnotatedStartCodons(Transcript withCDS) { // Nothing to do if null input if (withCDS == null) { return(false); } // Figure out the start position CDS firstCds = withCDS.CdsSortedStrand.First(); long cdsStartInChrom = IsStrandPlus() ? firstCds.OneBasedStart : firstCds.OneBasedEnd; long cdsStartInMrna = BaseNumber2MRnaPos(cdsStartInChrom); if (cdsStartInMrna < 0) { return(false); } // the coding start wasn't within any of the exons of this transcript // Figure out the stop codon from translation ISequence spliced = SplicedRNA(); ISequence translateThis = spliced.GetSubSequence(cdsStartInMrna, spliced.Count - cdsStartInMrna); ISequence proteinSequence = Translation.OneFrameTranslation(translateThis, Gene.Chromosome.Mitochondrial); int stopIdx = proteinSequence.Select(x => x).ToList().IndexOf(Alphabets.Protein.Ter); if (stopIdx < 0) { return(false); } // no stop codon in sight long endInMrna = cdsStartInMrna + (stopIdx + 1) * GeneModel.CODON_SIZE - 1; // include the stop codon in CDS long lengthInMrna = endInMrna - cdsStartInMrna + 1; // Figure out the stop index on the chromosome long utr5ishstart = IsStrandPlus() ? Exons.Min(x => x.OneBasedStart) : cdsStartInChrom + 1; long utr5ishend = IsStrandPlus() ? cdsStartInChrom - 1 : Exons.Max(x => x.OneBasedEnd); Interval utr5ish = new Interval(null, "", Source, Strand, utr5ishstart, utr5ishend); var intervals = SortedStrand(Exons.SelectMany(x => x.Minus(utr5ish)).ToList()); long lengthSoFar = 0; foreach (Interval y in intervals) { long lengthSum = lengthSoFar + y.Length(); if (lengthSum <= lengthInMrna) // add this whole interval { var toAdd = new CDS(this, ChromosomeID, Source, Strand, y.OneBasedStart, y.OneBasedEnd, 0); CodingDomainSequences.Add(toAdd); lengthSoFar += toAdd.Length(); } else if (lengthSoFar < lengthInMrna) // chop off part of this interval { long chopLength = lengthSum - lengthInMrna; long start = IsStrandPlus() ? y.OneBasedStart : y.OneBasedStart + chopLength; long end = IsStrandPlus() ? y.OneBasedEnd - chopLength : y.OneBasedEnd; var toAdd = new CDS(this, ChromosomeID, Source, Strand, start, end, 0); CodingDomainSequences.Add(toAdd); lengthSoFar += toAdd.Length(); } } SetRegions(this); return(true); }
/// <summary> /// Find a CDS that matches exactly the exon /// </summary> /// <param name="exon"></param> /// <returns></returns> public CDS FindCds(Exon exon) { return(CodingDomainSequences.FirstOrDefault(c => exon.Includes(c))); }