/// <summary> /// Calculate base number in a cds where 'pos' is /// </summary> /// <param name="pos"></param> /// <returns></returns> protected long CdsBaseNumber(long pos) { long cdsbn = Transcript.BaseNumberCds(pos, true); // Does not intersect the transcript? if (cdsbn < 0) { // 'pos' before transcript start if (pos <= Transcript.CdsOneBasedStart) { if (Transcript.IsStrandPlus()) { return(0); } return(Transcript.RetrieveCodingSequence().Count); } // 'pos' is after CDS end if (Transcript.IsStrandPlus()) { return(Transcript.RetrieveCodingSequence().Count); } return(0); } return(cdsbn); }
private long cdsBaseNumber(long pos, bool usePrevBaseIntron) { if (pos < cdsStart) { return(Transcript.IsStrandPlus() ? 0 : Transcript.RetrieveCodingSequence().Count - 1); } if (pos > cdsEnd) { return(Transcript.IsStrandPlus() ? Transcript.RetrieveCodingSequence().Count - 1 : 0); } return(Transcript.BaseNumberCds(pos, usePrevBaseIntron)); }
/// <summary> /// We may have to calculate 'netCdsChange', which is the effect on the CDS. /// Note: A deletion or a MNP might affect several exons /// </summary> /// <returns></returns> protected override string NetCdsChange() { if (Variant.Length() > 1) { StringBuilder sb = new StringBuilder(); foreach (Exon exon in Transcript.ExonsSortedStrand) { string seq = Variant.NetChange(exon); sb.Append(exon.IsStrandPlus() ? seq : SequenceExtensions.ConvertToString(new Sequence(Alphabets.AmbiguousDNA, seq).GetReverseComplementedSequence())); } return(sb.ToString()); } return(Variant.NetChange(Transcript.IsStrandPlus())); }
/// <summary> /// Calculate codons old / codons new /// </summary> protected void codonOldNew() { if (!Transcript.Intersects(Variant)) { return; } // CDS coordinates cdsStart = Transcript.IsStrandPlus() ? Transcript.CdsOneBasedStart : Transcript.CdsOneBasedEnd; cdsEnd = Transcript.IsStrandPlus() ? Transcript.CdsOneBasedEnd : Transcript.CdsOneBasedStart; // Does it intersect CDS? if (cdsStart > Variant.OneBasedEnd) { return; } if (cdsEnd < Variant.OneBasedStart) { return; } // Base number relative to CDS start long scStart, scEnd; if (Transcript.IsStrandPlus()) { scStart = cdsBaseNumber(Variant.OneBasedStart, false); scEnd = cdsBaseNumber(Variant.OneBasedEnd, true); } else { scEnd = cdsBaseNumber(Variant.OneBasedStart, true); scStart = cdsBaseNumber(Variant.OneBasedEnd, false); } // Update coordinates CodonStartNumber = (int)(scStart / CODON_SIZE); CodonStartIndex = (int)(scStart % CODON_SIZE); // MNP overlap in coding part long scLen = scEnd - scStart; if (scLen < 0) { return; } // Round to codon position long scStart3 = round3(scStart, false); long scEnd3 = round3(scEnd, true); long scLen3 = scEnd3 - scStart3; if (scEnd3 == scStart3) { scEnd3 += 3; } // At least one codon // Append 'N' string padN = ""; long diff = scEnd3 - (Transcript.RetrieveCodingSequence().Count - 1); if (diff > 0) { scEnd3 = Transcript.RetrieveCodingSequence().Count - 1; // Pad with 'N' switch (diff) { case 1: padN = "N"; break; case 2: padN = "NN"; break; default: throw new ArgumentOutOfRangeException("Sanity check failed. Number of 'N' pading is :" + diff + ". This should not happen!"); } } // Get old codon (reference) CodonsReference = SequenceExtensions.ConvertToString(Transcript.RetrieveCodingSequence().GetSubSequence(scStart3, scLen3)); // Get new codon (change) string prepend = CodonsReference.Substring(0, (int)(scStart - scStart3)); string append = scEnd3 > scEnd?CodonsReference.Substring(CodonsReference.Length - (int)(scEnd3 - scEnd)) : ""; CodonsAlternate = prepend + NetCdsChange() + append; // Pad codons with 'N' if required CodonsReference += padN; CodonsAlternate += padN; //--- // Can we simplify codons? //--- if ((CodonsReference != null) && (CodonsAlternate != null)) { while ((CodonsReference.Length >= 3) && (CodonsAlternate.Length >= 3)) { // First codon string cold = CodonsReference.Substring(0, 3); string cnew = CodonsAlternate.Substring(0, 3); // Are codons equal? => Simplify if (cold.Equals(cnew, StringComparison.InvariantCultureIgnoreCase)) { CodonsReference = CodonsReference.Substring(3); CodonsAlternate = CodonsAlternate.Substring(3); CodonStartNumber++; } else { break; } } } }
/// <summary> /// Processes a feature from a GFF3 gene model file. /// </summary> /// <param name="feature"></param> /// <param name="oneBasedStart"></param> /// <param name="oneBasedEnd"></param> /// <param name="chrom"></param> /// <param name="attributes"></param> public void ProcessGff3Feature(MetadataListItem <List <string> > feature, long oneBasedStart, long oneBasedEnd, Chromosome chrom, Dictionary <string, string> attributes) { bool hasGeneId = attributes.TryGetValue("gene_id", out string geneId); bool hasTranscriptId = attributes.TryGetValue("transcript_id", out string transcriptId); bool hasExonId = attributes.TryGetValue("exon_id", out string exonId); bool hasProteinId = attributes.TryGetValue("protein_id", out string proteinId); bool hasSource = feature.SubItems.TryGetValue("source", out List <string> sourceish); // false if empty ("." in GFF format) bool hasStrand = feature.SubItems.TryGetValue("strand", out List <string> strandish); // false if empty ("." in GFF format) bool hasFrame = feature.SubItems.TryGetValue("frame", out List <string> framey); // false if empty ("." in GFF format) string source = hasSource ? sourceish[0] : ""; if (!hasStrand) { return; } // strand is a required to do anything in this program string strand = strandish[0]; int frame = 0; if (hasFrame) { int.TryParse(framey[0], out frame); } if (hasGeneId && (currentGene == null || hasGeneId && geneId != currentGene.ID)) { currentGene = new Gene(geneId, chrom, source, strand, oneBasedStart, oneBasedEnd, feature); Genes.Add(currentGene); GenomeForest.Add(currentGene); } if (hasTranscriptId && (currentTranscript == null || hasTranscriptId && transcriptId != currentTranscript.ID)) { if (currentTranscript != null) { Transcript.SetRegions(currentTranscript); currentTranscript.FrameCorrection(); } currentTranscript = new Transcript(transcriptId, currentGene, source, strand, oneBasedStart, oneBasedEnd, null, null, feature); currentGene.Transcripts.Add(currentTranscript); GenomeForest.Add(currentTranscript); } if (hasExonId) { ISequence exon_dna = chrom.Sequence.GetSubSequence(oneBasedStart - 1, oneBasedEnd - oneBasedStart + 1); Exon exon = new Exon(currentTranscript, currentTranscript.IsStrandPlus() ? exon_dna : exon_dna.GetReverseComplementedSequence(), source, oneBasedStart, oneBasedEnd, chrom == null ? "" : chrom.ChromosomeID, strand, null, feature); if (exon.Length() > 0) { currentTranscript.Exons.Add(exon); } } else if (hasProteinId) { CDS cds = new CDS(currentTranscript, chrom.Sequence.ID, source, strand, oneBasedStart, oneBasedEnd, null, frame); if (cds.Length() > 0) { currentTranscript.CodingDomainSequences.Add(cds); currentTranscript.ProteinID = proteinId; } } else // nothing to do { } }
/// <summary> /// Calculate a list of codon changes /// </summary> public virtual void ChangeCodon() { if (!Transcript.Intersects(Variant)) { return; } // Get coding start (after 5 prime UTR) long cdsStart = Transcript.CdsOneBasedStart; // We may have to calculate 'netCdsChange', which is the effect on the CDS NetCodingSequenceChange = NetCdsChange(); if (RequireNetCdsChange && NetCodingSequenceChange == "") { // This can happen on mixed changes where the 'InDel' part lies outside the transcript's exons CodonsReference = ""; CodonsAlternate = ""; return; } //--- // Concatenate all exons //--- int firstCdsBaseInExon = 0; // Where the exon maps to the CDS (i.e. which CDS base number does the first base in this exon maps to). List <Exon> exons = Transcript.ExonsSortedStrand; foreach (Exon exon in exons) { Exon = exon; if (exon.Intersects(Variant)) { long cdsBaseInExon; // cdsBaseInExon: base number relative to the beginning of the coding part of this exon (i.e. excluding 5'UTRs) if (Transcript.IsStrandPlus()) { long firstvariantBaseInExon = Math.Max(Variant.OneBasedStart, Math.Max(exon.OneBasedStart, cdsStart)); cdsBaseInExon = firstvariantBaseInExon - Math.Max(exon.OneBasedStart, cdsStart); } else { long lastvariantBaseInExon = Math.Min(Variant.OneBasedEnd, Math.Min(exon.OneBasedEnd, cdsStart)); cdsBaseInExon = Math.Min(exon.OneBasedEnd, cdsStart) - lastvariantBaseInExon; } if (cdsBaseInExon < 0) { cdsBaseInExon = 0; } // Get codon number and index within codon (where seqChage is pointing) if (CodonStartNumber < 0) { CodonStartNumber = (int)(firstCdsBaseInExon + cdsBaseInExon) / CODON_SIZE; CodonStartIndex = (int)(firstCdsBaseInExon + cdsBaseInExon) % CODON_SIZE; } // Use appropriate method to calculate codon change bool hasChanged = false; // Was there any change? hasChanged = ChangeCodon(exon); // This is overriden for the specific type of codon change // Any change? => Add change to list if (hasChanged && !VariantEffects.HasMarker()) { VariantEffects.SetMarker(exon); // It is affecting this exon, so we set the marker } // Can we finish after effect of first exon is added? if (ReturnNow) { return; } } firstCdsBaseInExon += Transcript.IsStrandPlus() ? (int)Math.Max(0, exon.OneBasedEnd - Math.Max(exon.OneBasedStart, cdsStart) + 1) : (int)Math.Max(0, Math.Min(cdsStart, exon.OneBasedEnd) - exon.OneBasedStart + 1); } }