コード例 #1
0
        /// <summary>
        /// Calculate base number in a cds where 'pos' is
        /// </summary>
        /// <param name="pos"></param>
        /// <returns></returns>
        protected long CdsBaseNumber(long pos)
        {
            long cdsbn = Transcript.BaseNumberCds(pos, true);

            // Does not intersect the transcript?
            if (cdsbn < 0)
            {
                // 'pos' before transcript start
                if (pos <= Transcript.CdsOneBasedStart)
                {
                    if (Transcript.IsStrandPlus())
                    {
                        return(0);
                    }
                    return(Transcript.RetrieveCodingSequence().Count);
                }

                // 'pos' is after CDS end
                if (Transcript.IsStrandPlus())
                {
                    return(Transcript.RetrieveCodingSequence().Count);
                }
                return(0);
            }

            return(cdsbn);
        }
コード例 #2
0
 private long cdsBaseNumber(long pos, bool usePrevBaseIntron)
 {
     if (pos < cdsStart)
     {
         return(Transcript.IsStrandPlus() ? 0 : Transcript.RetrieveCodingSequence().Count - 1);
     }
     if (pos > cdsEnd)
     {
         return(Transcript.IsStrandPlus() ? Transcript.RetrieveCodingSequence().Count - 1 : 0);
     }
     return(Transcript.BaseNumberCds(pos, usePrevBaseIntron));
 }
コード例 #3
0
        /// <summary>
        /// We may have to calculate 'netCdsChange', which is the effect on the CDS.
        /// Note: A deletion or a MNP might affect several exons
        /// </summary>
        /// <returns></returns>
        protected override string NetCdsChange()
        {
            if (Variant.Length() > 1)
            {
                StringBuilder sb = new StringBuilder();
                foreach (Exon exon in Transcript.ExonsSortedStrand)
                {
                    string seq = Variant.NetChange(exon);
                    sb.Append(exon.IsStrandPlus() ? seq : SequenceExtensions.ConvertToString(new Sequence(Alphabets.AmbiguousDNA, seq).GetReverseComplementedSequence()));
                }
                return(sb.ToString());
            }

            return(Variant.NetChange(Transcript.IsStrandPlus()));
        }
コード例 #4
0
        /// <summary>
        /// Calculate codons old / codons new
        /// </summary>
        protected void codonOldNew()
        {
            if (!Transcript.Intersects(Variant))
            {
                return;
            }

            // CDS coordinates
            cdsStart = Transcript.IsStrandPlus() ? Transcript.CdsOneBasedStart : Transcript.CdsOneBasedEnd;
            cdsEnd   = Transcript.IsStrandPlus() ? Transcript.CdsOneBasedEnd : Transcript.CdsOneBasedStart;

            // Does it intersect CDS?
            if (cdsStart > Variant.OneBasedEnd)
            {
                return;
            }
            if (cdsEnd < Variant.OneBasedStart)
            {
                return;
            }

            // Base number relative to CDS start
            long scStart, scEnd;

            if (Transcript.IsStrandPlus())
            {
                scStart = cdsBaseNumber(Variant.OneBasedStart, false);
                scEnd   = cdsBaseNumber(Variant.OneBasedEnd, true);
            }
            else
            {
                scEnd   = cdsBaseNumber(Variant.OneBasedStart, true);
                scStart = cdsBaseNumber(Variant.OneBasedEnd, false);
            }

            // Update coordinates
            CodonStartNumber = (int)(scStart / CODON_SIZE);
            CodonStartIndex  = (int)(scStart % CODON_SIZE);

            // MNP overlap in coding part
            long scLen = scEnd - scStart;

            if (scLen < 0)
            {
                return;
            }

            // Round to codon position
            long scStart3 = round3(scStart, false);
            long scEnd3   = round3(scEnd, true);
            long scLen3   = scEnd3 - scStart3;

            if (scEnd3 == scStart3)
            {
                scEnd3 += 3;
            }                                       // At least one codon

            // Append 'N'
            string padN = "";
            long   diff = scEnd3 - (Transcript.RetrieveCodingSequence().Count - 1);

            if (diff > 0)
            {
                scEnd3 = Transcript.RetrieveCodingSequence().Count - 1;
                // Pad with 'N'
                switch (diff)
                {
                case 1:
                    padN = "N";
                    break;

                case 2:
                    padN = "NN";
                    break;

                default:
                    throw new ArgumentOutOfRangeException("Sanity check failed. Number of 'N' pading is :" + diff + ". This should not happen!");
                }
            }

            // Get old codon (reference)
            CodonsReference = SequenceExtensions.ConvertToString(Transcript.RetrieveCodingSequence().GetSubSequence(scStart3, scLen3));

            // Get new codon (change)
            string prepend = CodonsReference.Substring(0, (int)(scStart - scStart3));
            string append  = scEnd3 > scEnd?CodonsReference.Substring(CodonsReference.Length - (int)(scEnd3 - scEnd)) : "";

            CodonsAlternate = prepend + NetCdsChange() + append;

            // Pad codons with 'N' if required
            CodonsReference += padN;
            CodonsAlternate += padN;

            //---
            // Can we simplify codons?
            //---
            if ((CodonsReference != null) && (CodonsAlternate != null))
            {
                while ((CodonsReference.Length >= 3) && (CodonsAlternate.Length >= 3))
                {
                    // First codon
                    string cold = CodonsReference.Substring(0, 3);
                    string cnew = CodonsAlternate.Substring(0, 3);

                    // Are codons equal? => Simplify
                    if (cold.Equals(cnew, StringComparison.InvariantCultureIgnoreCase))
                    {
                        CodonsReference = CodonsReference.Substring(3);
                        CodonsAlternate = CodonsAlternate.Substring(3);
                        CodonStartNumber++;
                    }
                    else
                    {
                        break;
                    }
                }
            }
        }
コード例 #5
0
        /// <summary>
        /// Processes a feature from a GFF3 gene model file.
        /// </summary>
        /// <param name="feature"></param>
        /// <param name="oneBasedStart"></param>
        /// <param name="oneBasedEnd"></param>
        /// <param name="chrom"></param>
        /// <param name="attributes"></param>
        public void ProcessGff3Feature(MetadataListItem <List <string> > feature, long oneBasedStart, long oneBasedEnd, Chromosome chrom, Dictionary <string, string> attributes)
        {
            bool hasGeneId       = attributes.TryGetValue("gene_id", out string geneId);
            bool hasTranscriptId = attributes.TryGetValue("transcript_id", out string transcriptId);
            bool hasExonId       = attributes.TryGetValue("exon_id", out string exonId);
            bool hasProteinId    = attributes.TryGetValue("protein_id", out string proteinId);
            bool hasSource       = feature.SubItems.TryGetValue("source", out List <string> sourceish); // false if empty ("." in GFF format)
            bool hasStrand       = feature.SubItems.TryGetValue("strand", out List <string> strandish); // false if empty ("." in GFF format)
            bool hasFrame        = feature.SubItems.TryGetValue("frame", out List <string> framey);     // false if empty ("." in GFF format)

            string source = hasSource ? sourceish[0] : "";

            if (!hasStrand)
            {
                return;
            }                           // strand is a required to do anything in this program
            string strand = strandish[0];
            int    frame  = 0;

            if (hasFrame)
            {
                int.TryParse(framey[0], out frame);
            }

            if (hasGeneId && (currentGene == null || hasGeneId && geneId != currentGene.ID))
            {
                currentGene = new Gene(geneId, chrom, source, strand, oneBasedStart, oneBasedEnd, feature);
                Genes.Add(currentGene);
                GenomeForest.Add(currentGene);
            }

            if (hasTranscriptId && (currentTranscript == null || hasTranscriptId && transcriptId != currentTranscript.ID))
            {
                if (currentTranscript != null)
                {
                    Transcript.SetRegions(currentTranscript);
                    currentTranscript.FrameCorrection();
                }
                currentTranscript = new Transcript(transcriptId, currentGene, source, strand, oneBasedStart, oneBasedEnd, null, null, feature);
                currentGene.Transcripts.Add(currentTranscript);
                GenomeForest.Add(currentTranscript);
            }

            if (hasExonId)
            {
                ISequence exon_dna = chrom.Sequence.GetSubSequence(oneBasedStart - 1, oneBasedEnd - oneBasedStart + 1);
                Exon      exon     = new Exon(currentTranscript, currentTranscript.IsStrandPlus() ? exon_dna : exon_dna.GetReverseComplementedSequence(),
                                              source, oneBasedStart, oneBasedEnd, chrom == null ? "" : chrom.ChromosomeID, strand, null, feature);
                if (exon.Length() > 0)
                {
                    currentTranscript.Exons.Add(exon);
                }
            }
            else if (hasProteinId)
            {
                CDS cds = new CDS(currentTranscript, chrom.Sequence.ID, source, strand, oneBasedStart, oneBasedEnd, null, frame);
                if (cds.Length() > 0)
                {
                    currentTranscript.CodingDomainSequences.Add(cds);
                    currentTranscript.ProteinID = proteinId;
                }
            }
            else // nothing to do
            {
            }
        }
コード例 #6
0
        /// <summary>
        /// Calculate a list of codon changes
        /// </summary>
        public virtual void ChangeCodon()
        {
            if (!Transcript.Intersects(Variant))
            {
                return;
            }

            // Get coding start (after 5 prime UTR)
            long cdsStart = Transcript.CdsOneBasedStart;

            // We may have to calculate 'netCdsChange', which is the effect on the CDS
            NetCodingSequenceChange = NetCdsChange();
            if (RequireNetCdsChange && NetCodingSequenceChange == "")
            { // This can happen on mixed changes where the 'InDel' part lies outside the transcript's exons
                CodonsReference = "";
                CodonsAlternate = "";
                return;
            }

            //---
            // Concatenate all exons
            //---
            int         firstCdsBaseInExon = 0; // Where the exon maps to the CDS (i.e. which CDS base number does the first base in this exon maps to).
            List <Exon> exons = Transcript.ExonsSortedStrand;

            foreach (Exon exon in exons)
            {
                Exon = exon;
                if (exon.Intersects(Variant))
                {
                    long cdsBaseInExon; // cdsBaseInExon: base number relative to the beginning of the coding part of this exon (i.e. excluding 5'UTRs)

                    if (Transcript.IsStrandPlus())
                    {
                        long firstvariantBaseInExon = Math.Max(Variant.OneBasedStart, Math.Max(exon.OneBasedStart, cdsStart));
                        cdsBaseInExon = firstvariantBaseInExon - Math.Max(exon.OneBasedStart, cdsStart);
                    }
                    else
                    {
                        long lastvariantBaseInExon = Math.Min(Variant.OneBasedEnd, Math.Min(exon.OneBasedEnd, cdsStart));
                        cdsBaseInExon = Math.Min(exon.OneBasedEnd, cdsStart) - lastvariantBaseInExon;
                    }

                    if (cdsBaseInExon < 0)
                    {
                        cdsBaseInExon = 0;
                    }

                    // Get codon number and index within codon (where seqChage is pointing)
                    if (CodonStartNumber < 0)
                    {
                        CodonStartNumber = (int)(firstCdsBaseInExon + cdsBaseInExon) / CODON_SIZE;
                        CodonStartIndex  = (int)(firstCdsBaseInExon + cdsBaseInExon) % CODON_SIZE;
                    }

                    // Use appropriate method to calculate codon change
                    bool hasChanged = false;        // Was there any change?
                    hasChanged = ChangeCodon(exon); // This is overriden for the specific type of codon change

                    // Any change? => Add change to list
                    if (hasChanged && !VariantEffects.HasMarker())
                    {
                        VariantEffects.SetMarker(exon); // It is affecting this exon, so we set the marker
                    }

                    // Can we finish after effect of first exon is added?
                    if (ReturnNow)
                    {
                        return;
                    }
                }

                firstCdsBaseInExon += Transcript.IsStrandPlus() ?
                                      (int)Math.Max(0, exon.OneBasedEnd - Math.Max(exon.OneBasedStart, cdsStart) + 1) :
                                      (int)Math.Max(0, Math.Min(cdsStart, exon.OneBasedEnd) - exon.OneBasedStart + 1);
            }
        }