Ejemplo n.º 1
0
        public void TranslateHardReverseStrand()
        {
            Genome         genome         = new Genome(Path.Combine(TestContext.CurrentContext.TestDirectory, "Homo_sapiens.GRCh38.dna.chromosome.14.fa"));
            GeneModel      geneModel      = new GeneModel(genome, Path.Combine(TestContext.CurrentContext.TestDirectory, "HardReverseStrand", "reverse.gff3"));
            List <Protein> proteins       = geneModel.Translate(true).ToList();
            ISequence      codingSequence = new FastAParser().Parse(Path.Combine(TestContext.CurrentContext.TestDirectory, "HardReverseStrand", "codingSeq.fa")).First();

            Assert.AreEqual(SequenceExtensions.ConvertToString(codingSequence),
                            SequenceExtensions.ConvertToString(geneModel.Genes[0].Transcripts[0].RetrieveCodingSequence()));
            Assert.AreEqual("MNLQAQPKAQNKRKRCLFGGQEPAPKEQPPPLQPPQQSIRVKEEQYLGHEGPGGAVSTSQ" +
                            "PVELPPPSSLALLNSVVYGPERTSAAMLSQQVASVKWPNSVMAPGRGPERGGGGGVSDSS" +
                            "WQQQPGQPPPHSTWNCHSLSLYSATKGSPHPGVGVPTYYNHPEALKREKAGGPQLDRYVR" +
                            "PMMPQKVQLEVGRPQAPLNSFHAAKKPPNQSLPLQPFQLAFGHQVNRQVFRQGPPPPNPV" +
                            "AAFPPQKQQQQQQPQQQQQQQQAALPQMPLFENFYSMPQQPSQQPQDFGLQPAGPLGQSH" +
                            "LAHHSMAPYPFPPNPDMNPELRKALLQDSAPQPALPQVQIPFPRRSRRLSKEGILPPSAL" +
                            "DGAGTQPGQEATGNLFLHHWPLQQPPPGSLGQPHPEALGFPLELRESQLLPDGERLAPNG" +
                            "REREAPAMGSEEGMRAVSTGDCGQVLRGGVIQSTRRRRRASQEANLLTLAQKAVELASLQ" +
                            "NAKDGSGSEEKRKSVLASTTKCGVEFSEPSLATKRAREDSGMVPLIIPVSVPVRTVDPTE" +
                            "AAQAGGLDEDGKGPEQNPAEHKPSVIVTRRRSTRIPGTDAQAQAEDMNVKLEGEPSVRKP" +
                            "KQRPRPEPLIIPTKAGTFIAPPVYSNITPYQSHLRSPVRLADHPSERSFELPPYTPPPIL" +
                            "SPVREGSGLYFNAIISTSTIPAPPPITPKSAHRTLLRTNSAEVTPPVLSVMGEATPVSIE" +
                            "PRINVGSRFQAEIPLMRDRALAAADPHKADLVWQPWEDLESSREKQRQVEDLLTAACSSI" +
                            "FPGAGTNQELALHCLHESRGDILETLNKLLLKKPLRPHNHPLATYHYTGSDQWKMAERKL" +
                            "FNKGIAIYKKDFFLVQKLIQTKTVAQCVEFYYTYKKQVKIGRNGTLTFGDVDTSDEKSAQ" +
                            "EEVEVDIKTSQKFPRVPLPRRESPSEERLEPKREVKEPRKEGEEEVPEIQEKEEQEEGRE" +
                            "RSRRAAAVKATQTLQANESASDILILRSHESNAPGSAGGQASEKPREGTGKSRRALPFSE" +
                            "KKKKTETFSKTQNQENTFPCKKCGR",
                            proteins[0].BaseSequence);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Calculate 'reference' codons
        /// </summary>
        /// <param name="numCodons"></param>
        /// <returns></returns>
        protected string CodonsRef(int numCodons)
        {
            ISequence cds   = Transcript.RetrieveCodingSequence();
            string    codon = "";

            int start = CodonStartNumber * CODON_SIZE;
            int more  = numCodons * CODON_SIZE;
            int end   = start + more;

            int len = (int)cds.Count;

            if (start >= len)
            {
                start = len;
            }
            if (end >= len)
            {
                end = len;
            }

            // Capitalize
            codon = SequenceExtensions.ConvertToString(cds.GetSubSequence(start, more));

            // Codon not multiple of three? Add missing bases as 'N'
            if (codon.Length % 3 == 1)
            {
                codon += "NN";
            }
            else if (codon.Length % 3 == 2)
            {
                codon += "N";
            }

            return(codon);
        }
Ejemplo n.º 3
0
        protected void ApplyMnp(Variant variant, IntervalSequence markerSeq)
        {
            // Calculate indexes
            long idxStart = variant.OneBasedStart - OneBasedStart;
            long idxAlt   = 0;

            // Variant starts before this marker (e.g. motif with sequence)
            if (idxStart < 0)
            {
                idxAlt   = -idxStart; // Remove first 'idxStart' bases from ALT sequence
                idxStart = 0;
            }

            long changeSize = variant.IntersectSize(this);
            long idxEnd     = idxStart + changeSize;

            // Apply variant to sequence
            ISequence     seq   = IsStrandPlus() ? Sequence : Sequence.GetReverseComplementedSequence();
            StringBuilder seqsb = new StringBuilder();

            seqsb.Append(SequenceExtensions.ConvertToString(seq, 0, idxStart));
            String seqAlt = variant.SecondAlleleString.Substring((int)idxAlt, (int)changeSize);

            seqsb.Append(seqAlt);
            seqsb.Append(SequenceExtensions.ConvertToString(seq, idxEnd));

            // Update sequence
            seq = new Sequence(seq.Alphabet, seqsb.ToString());
            markerSeq.Sequence = IsStrandPlus() ? seq : seq.GetReverseComplementedSequence();
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Get the coding sequence for this transcript.
        /// SnpEff keeps track of the UTRs to figure this out. I suppose that will work, now that I'm using the interval tree to dive down to change those ranges.
        /// </summary>
        /// <returns></returns>
        public ISequence RetrieveCodingSequence()
        {
            if (_CodingSequence != null)
            {
                return(_CodingSequence);
            }

            // Concatenate all exons
            List <Exon>   exons    = ExonsSortedStrand;
            StringBuilder sequence = new StringBuilder();
            int           utr5len  = 0;
            int           utr3len  = 0;

            // 5 prime UTR length
            foreach (UTR utr in UTRs.OfType <UTR5Prime>())
            {
                utr5len += (int)utr.Length();
            }

            // Append all exon sequences
            IAlphabet alphabet        = Alphabets.AmbiguousDNA;
            bool      missingSequence = false;

            foreach (Exon exon in exons)
            {
                missingSequence |= exon.Sequence == null;                           // If there is no sequence, we are in trouble
                sequence.Append(SequenceExtensions.ConvertToString(exon.Sequence)); // reverse complemented for reverse strand during loading
            }

            if (missingSequence)
            {
                _CodingSequence = new Sequence(Alphabets.DNA, ""); // One or more exons does not have sequence. Nothing to do
            }
            else
            {
                // OK, all exons have sequences

                // 3 prime UTR length
                foreach (UTR utr in UTRs.OfType <UTR3Prime>())
                {
                    utr3len += (int)utr.Length();
                }

                // Cut 5 prime UTR and 3 prime UTR points
                string dnaSequence = sequence.ToString();
                int    subEnd      = dnaSequence.Length - utr3len;
                int    subLen      = subEnd - utr5len;

                if (utr5len > subEnd)
                {
                    _CodingSequence = new Sequence(Alphabets.DNA, "");
                }
                else
                {
                    _CodingSequence = new Sequence(alphabet, dnaSequence.Substring(utr5len, subLen));
                }
            }
            return(_CodingSequence);
        }
        /// <summary>
        /// Calculate codons by applying the variant and calculating the differences in CDS sequences.
        /// This is a slow method, makes sense only for complex variants
        /// </summary>
        protected void CodonsRefAlt()
        {
            Transcript trNew = Transcript.ApplyVariant(Variant) as Transcript;

            cdsAlt = SequenceExtensions.ConvertToString(trNew.RetrieveCodingSequence());
            cdsRef = SequenceExtensions.ConvertToString(Transcript.RetrieveCodingSequence());
            cdsDiff(); // Calculate differences: CDS
        }
Ejemplo n.º 6
0
 public string NetChange(bool reverseStrand)
 {
     if (isDel())
     {
         return(reverseStrand ?
                SequenceExtensions.ConvertToString(new Sequence(Alphabets.DNA, ReferenceAlleleString).GetReverseComplementedSequence()) :
                ReferenceAlleleString); // Deletion have empty 'alt'
     }
     return(reverseStrand ?
            SequenceExtensions.ConvertToString(new Sequence(Alphabets.DNA, SecondAlleleString).GetReverseComplementedSequence()) :
            SecondAlleleString);
 }
Ejemplo n.º 7
0
        public void SequenceIndexOfSingleSegment()
        {
            var array = new byte[] { 1, 2, 3, 4, 5 };
            var bytes = new ReadOnlyBytes(array);

            Assert.Equal(array.Length, bytes.Length);

            // Static method call to avoid calling ReadOnlyBytes.IndexOf
            Assert.Equal(-1, SequenceExtensions.IndexOf(bytes, 0));

            for (int i = 0; i < array.Length; i++)
            {
                Assert.Equal(i, SequenceExtensions.IndexOf(bytes, (byte)(i + 1)));
            }
        }
Ejemplo n.º 8
0
        /// <summary>
        /// We may have to calculate 'netCdsChange', which is the effect on the CDS.
        /// Note: A deletion or a MNP might affect several exons
        /// </summary>
        /// <returns></returns>
        protected override string NetCdsChange()
        {
            if (Variant.Length() > 1)
            {
                StringBuilder sb = new StringBuilder();
                foreach (Exon exon in Transcript.ExonsSortedStrand)
                {
                    string seq = Variant.NetChange(exon);
                    sb.Append(exon.IsStrandPlus() ? seq : SequenceExtensions.ConvertToString(new Sequence(Alphabets.AmbiguousDNA, seq).GetReverseComplementedSequence()));
                }
                return(sb.ToString());
            }

            return(Variant.NetChange(Transcript.IsStrandPlus()));
        }
Ejemplo n.º 9
0
        protected void ApplySnp(Variant variant, IntervalSequence markerSeq)
        {
            // Get sequence in positive strand direction
            ISequence seq = IsStrandPlus() ? Sequence : Sequence.GetReverseComplementedSequence();

            // Apply change to sequence
            long   idx    = variant.OneBasedStart - OneBasedStart;
            string before = idx > 0 ? SequenceExtensions.ConvertToString(seq, 0, idx) : "";
            string var    = variant.SecondAlleleString;
            string after  = idx + 1 < seq.Count ? SequenceExtensions.ConvertToString(seq, idx + 1) : "";

            seq = new Sequence(seq.Alphabet, before + var + after);

            // Update sequence
            markerSeq.Sequence = IsStrandPlus() ? seq : seq.GetReverseComplementedSequence();
        }
Ejemplo n.º 10
0
        /// <summary>
        /// Not used or tested right now...
        /// </summary>
        /// <param name="exons"></param>
        /// <param name="proteinID"></param>
        /// <returns></returns>
        public static Protein ThreeFrameTranslation(List <Exon> exons, string proteinID)
        {
            string seq = String.Join("", exons.Select(x => SequenceExtensions.ConvertToString(x.Sequence)));

            if (seq.Contains('N'))
            {
                return(null);
            }
            ISequence dna_seq = new Sequence(Alphabets.DNA, seq);
            ISequence rna_seq = Transcription.Transcribe(exons[0].IsStrandPlus() ? dna_seq : dna_seq.GetReverseComplementedSequence());

            ISequence[] prot_seq = Enumerable.Range(0, 3).Select(i => ProteinTranslation.Translate(rna_seq, i)).ToArray();

            //return the protein sequence corresponding to the longest ORF
            return(new Protein(prot_seq.SelectMany(s => SequenceExtensions.ConvertToString(s).Split('*')).OrderByDescending(s => s.Length).FirstOrDefault(), proteinID));
        }
Ejemplo n.º 11
0
        public string getSequence()
        {
            // Create UTR sequence
            StringBuilder sb = new StringBuilder();

            foreach (UTR5Prime utr in get5primeUtrs())
            {
                Exon      ex     = (Exon)utr.Parent;
                ISequence utrSeq = ex.Sequence;
                if (utr.Length() < utrSeq.Count)
                {
                    utrSeq = utrSeq.GetSubSequence(0, utr.Length());
                }                                                                                     // UTR5' may stop before end of exon
                sb.Append(SequenceExtensions.ConvertToString(utrSeq));
            }
            return(sb.ToString());
        }
Ejemplo n.º 12
0
        public void SequenceIndexOfMultiSegment()
        {
            ReadOnlyBytes bytes = ListHelper.CreateRob(
                new byte[] { 1, 2 },
                new byte[] { 3, 4 }
                );

            Assert.Equal(4, bytes.Length);

            // Static method call to avoid calling ReadOnlyBytes.IndexOf
            Assert.Equal(-1, SequenceExtensions.IndexOf(bytes, 0));

            for (int i = 0; i < bytes.Length; i++)
            {
                Assert.Equal(i, SequenceExtensions.IndexOf(bytes, (byte)(i + 1)));
            }
        }
        /// <summary>
        /// Get original codons in CDS
        /// </summary>
        /// <returns></returns>
        protected override string CodonsRef()
        {
            int numCodons = 1;

            // Get CDS
            ISequence cdsStr = Transcript.RetrieveCodingSequence();
            long      cdsLen = cdsStr.Count;

            // Calculate minBase (first codon base in the CDS)
            int minBase = CodonStartNumber * CODON_SIZE;

            if (minBase < 0)
            {
                minBase = 0;
            }

            // Calculate maxBase (last codon base in the CDS)
            long maxBase = CodonStartNumber * CODON_SIZE + numCodons * CODON_SIZE;

            if (maxBase > cdsLen)
            {
                maxBase = cdsLen;
            }

            // Sanity checks
            if (cdsLen == 0 || // Empty CDS => Cannot get codon (e.g. one or more exons are missing their sequences
                (cdsLen <= minBase)        // Codon past CDS sequence => Cannot get codon
                )
            {
                return("");
            }

            // Create codon sequence
            char[] codonChars = SequenceExtensions.ConvertToString(cdsStr).Substring(minBase, CODON_SIZE).ToLower(CultureInfo.InvariantCulture).ToCharArray();

            // Capitatlize changed base
            if (CodonStartIndex < codonChars.Length)
            {
                codonChars[CodonStartIndex] = char.ToUpper(codonChars[CodonStartIndex]);
            }
            string codon = new String(codonChars);

            return(codon);
        }
Ejemplo n.º 14
0
        public void SequencePositionOfMultiSegment()
        {
            var(list, length) = MemoryList.Create(
                new byte[] { 1, 2 },
                new byte[] { 3, 4 }
                );
            var bytes = new ReadOnlyBytes(list, length);

            Assert.Equal(4, length);
            Assert.Equal(4, bytes.Length);

            // Static method call to avoid calling instance methods
            Assert.Equal(Position.End, SequenceExtensions.PositionOf(list, 0));

            for (int i = 0; i < bytes.Length; i++)
            {
                var value    = (byte)(i + 1);
                var position = SequenceExtensions.PositionOf(list, value);
                var(node, index) = position.Get <IMemoryList <byte> >();
                Assert.Equal(value, node.Memory.Span[index]);

                var robPosition = bytes.PositionOf(value);
                Assert.Equal(position, robPosition);

                var robSlice = bytes.Slice(1);
                robPosition = robSlice.PositionOf(value);
                if (i > 0)
                {
                    Assert.Equal(position, robPosition);
                }
                else
                {
                    Assert.Equal(Position.End, robPosition);
                }

                if (position != Position.End)
                {
                    robSlice = bytes.Slice(position);
                    Assert.Equal(value, robSlice.First.Span[0]);
                }
            }
        }
Ejemplo n.º 15
0
        /// <summary>
        /// Is the first codon a START codon?
        /// </summary>
        /// <returns></returns>
        public bool isErrorStartCodon()
        {
            if (
                //!Config.get().isTreatAllAsProteinCoding() &&
                !IsProteinCoding())
            {
                return(false);
            }

            // Not even one codon in this protein? Error
            ISequence cds = RetrieveCodingSequence();

            if (cds.Count < 3)
            {
                return(true);
            }

            string codon = SequenceExtensions.ConvertToString(cds.GetSubSequence(0, 3)).ToUpper(CultureInfo.InvariantCulture);

            return(!(Gene.Chromosome.Mitochondrial ? CodonsVertebrateMitochondrial.START_CODONS.Contains(codon) : CodonsStandard.START_CODONS.Contains(codon)));
        }
Ejemplo n.º 16
0
        private Protein Protein(ISequence dnaSeq, Dictionary <string, string> selenocysteineContaining)
        {
            selenocysteineContaining = selenocysteineContaining != null ? selenocysteineContaining : new Dictionary <string, string>();
            bool          hasSelenocysteine = selenocysteineContaining.TryGetValue(ProteinID, out string selenocysteineContainingSeq);
            HashSet <int> uIndices          = !hasSelenocysteine ?
                                              new HashSet <int>() :
                                              new HashSet <int>(Enumerable.Range(0, selenocysteineContainingSeq.Length).Where(i => selenocysteineContainingSeq[i] == 'U'));

            // Translate protein sequence, and replace amber stop codons with selenocysteines where appropriate
            ISequence proteinSequence = Translation.OneFrameTranslation(dnaSeq, Gene.Chromosome.Mitochondrial);
            string    proteinBases    = !hasSelenocysteine?
                                        SequenceExtensions.ConvertToString(proteinSequence) :
                                            new string(Enumerable.Range(0, (int)proteinSequence.Count).Select(i => uIndices.Contains(i) && proteinSequence[i] == Alphabets.Protein.Ter ? (char)Alphabets.Protein.U : (char)proteinSequence[i]).ToArray());

            string proteinSequenceString = proteinBases.Split((char)Alphabets.Protein.Ter)[0];
            string annotations           = String.Join(" ", VariantAnnotations);
            string accession             = Translation.GetSafeProteinAccession(ProteinID);

            protein = new Protein(proteinSequenceString, accession, organism: "H**o sapiens", name: annotations, fullName: annotations, sequenceVariations: ProteinSequenceVariations.ToList());
            return(protein);
        }
Ejemplo n.º 17
0
        protected void ApplyDup(Variant variant, IntervalSequence markerSeq)
        {
            // Get sequence in positive strand direction
            ISequence seq = IsStrandPlus() ? Sequence : Sequence.GetReverseComplementedSequence();

            // Apply duplication to sequence
            String dupSeq = SequenceExtensions.ConvertToString(GetSequence(Intersect(variant)));
            long   idx    = variant.OneBasedStart - OneBasedStart - 1;

            if (idx >= 0)
            {
                seq = new Sequence(seq.Alphabet, SequenceExtensions.ConvertToString(seq, 0, idx + 1) + dupSeq + SequenceExtensions.ConvertToString(seq, idx + 1));
            }
            else
            {
                seq = new Sequence(seq.Alphabet, dupSeq + SequenceExtensions.ConvertToString(seq));
            }

            // Update sequence
            markerSeq.Sequence = IsStrandPlus() ? seq : seq.GetReverseComplementedSequence();
        }
Ejemplo n.º 18
0
        protected void ApplyIns(Variant variant, IntervalSequence markerSeq)
        {
            // Get sequence in positive strand direction
            ISequence seq = IsStrandPlus() ? Sequence : Sequence.GetReverseComplementedSequence();

            // Apply change to sequence
            String netChange = variant.NetChange(this);
            long   idx       = variant.OneBasedStart - OneBasedStart - 1;

            if (idx >= 0)
            {
                seq = new Sequence(seq.Alphabet, SequenceExtensions.ConvertToString(seq, 0, idx + 1) + netChange + SequenceExtensions.ConvertToString(seq, idx + 1));
            }
            else
            {
                seq = new Sequence(seq.Alphabet, netChange + SequenceExtensions.ConvertToString(seq));
            }

            // Update sequence
            markerSeq.Sequence = IsStrandPlus() ? seq : seq.GetReverseComplementedSequence();
        }
        public void TestMissenseMutation()
        {
            // Make a transcript
            Sequence seq = new Sequence(Alphabets.DNA, "AAA".Select(cc => (byte)cc).ToArray(), false);

            seq.ID = "1";
            Chromosome c = new Chromosome(seq, null);
            Gene       g = new Gene("", c, "", "+", 1, 3, null);
            Transcript t = new Transcript("", g, "", "+", 1, 3, "", null, null);
            Exon       x = new Exon(t, seq, "", 1, 3, seq.ID, "+", null, null);

            t.Exons = new List <Exon> {
                x
            };
            CDS cds = new CDS(t, seq.ID, "", "+", 1, 3, null, 0);

            t.CodingDomainSequences = new List <CDS> {
                cds
            };

            // Make a missense mutation
            // ugh.vcf has a homozygous variation that should change the codon from AAA to AGA, which code for K and R
            // # CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	sample
            // 1   2 .   A   G   64.77 . info   GT:AD:DP:GQ:PL  1/1:2,3:5:69:93,0,69
            List <Variant> variants = new VCFParser(Path.Combine(TestContext.CurrentContext.TestDirectory, "TestVcfs", "ugh.vcf")).Select(v => new Variant(null, v, new Chromosome(seq, null))).ToList();

            // Make sure it makes it into the DNA sequence
            t.Variants = new HashSet <Variant>(variants);
            List <Transcript> variantTranscripts = GeneModel.ApplyVariantsCombinitorially(t);

            Assert.AreEqual("AAA", SequenceExtensions.ConvertToString(t.Exons[0].Sequence));
            Assert.AreEqual("K", t.Protein().BaseSequence);
            Assert.AreEqual("AGA", SequenceExtensions.ConvertToString(variantTranscripts[0].Exons[0].Sequence));
            Assert.AreEqual("R", variantTranscripts[0].Protein().BaseSequence);

            // Make sure it gets annotated as a missense mutation
            Assert.IsTrue(variantTranscripts[0].VariantAnnotations.Any(str => str.Contains(FunctionalClass.MISSENSE.ToString())));
        }
Ejemplo n.º 20
0
        /// <summary>
        /// Get original codons in CDS
        /// </summary>
        /// <returns></returns>
        protected override string CodonsRef()
        {
            if (NetCodingSequenceChange == "")
            {
                return("");
            }

            long min        = Variant.OneBasedStart;
            long max        = Variant.OneBasedEnd;
            long cdsBaseMin = CdsBaseNumber(min);
            long cdsBaseMax = CdsBaseNumber(max);

            // Swap?
            if (Transcript.IsStrandMinus())
            {
                long swap = cdsBaseMin;
                cdsBaseMin = cdsBaseMax;
                cdsBaseMax = swap;
            }

            if (cdsBaseMax < cdsBaseMin)
            {
                throw new ArgumentOutOfRangeException("This should never happen!\n\tcdsBaseMin: " + cdsBaseMin + "\n\tcdsBaseMax: " + cdsBaseMax + "\n\tmin: " + min + "\n\tmax: " + max + "\n\tSeqChange: " + Variant + "\n\ttranscript: " + Transcript + "\n\tCDS.len: " + Transcript.RetrieveCodingSequence().Count);
            }

            long maxCodon         = cdsBaseMax / CODON_SIZE;
            long minCodon         = cdsBaseMin / CODON_SIZE;
            long oldCodonCdsStart = (CODON_SIZE * minCodon);
            long oldCodonCdsEnd   = (CODON_SIZE * (maxCodon + 1)) - 1;

            string codons = oldCodonCdsEnd >= Transcript.RetrieveCodingSequence().Count ?
                            SequenceExtensions.ConvertToString(Transcript.RetrieveCodingSequence()).Substring((int)oldCodonCdsStart) :
                            SequenceExtensions.ConvertToString(Transcript.RetrieveCodingSequence().GetSubSequence(oldCodonCdsStart, (maxCodon - minCodon + 1) * CODON_SIZE));

            return(codons);
        }
Ejemplo n.º 21
0
        protected void ApplyDel(Variant variant, IntervalSequence markerSeq)
        {
            // Get sequence in positive strand direction
            ISequence seq = IsStrandPlus() ? Sequence : Sequence.GetReverseComplementedSequence();

            // Apply change to sequence
            long idxStart = variant.OneBasedStart - OneBasedStart;
            long idxEnd   = idxStart + variant.Length();

            StringBuilder newSeq = new StringBuilder();

            if (idxStart >= 0)
            {
                newSeq.Append(SequenceExtensions.ConvertToString(seq, 0, idxStart));
            }
            if (idxEnd >= 0 && idxEnd < seq.Count)
            {
                newSeq.Append(SequenceExtensions.ConvertToString(seq));
            }

            // Update sequence
            seq = new Sequence(seq.Alphabet, newSeq.ToString());
            markerSeq.Sequence = IsStrandPlus() ? seq : seq.GetReverseComplementedSequence();
        }
Ejemplo n.º 22
0
        /// <summary>
        /// Calculate codons old / codons new
        /// </summary>
        protected void codonOldNew()
        {
            if (!Transcript.Intersects(Variant))
            {
                return;
            }

            // CDS coordinates
            cdsStart = Transcript.IsStrandPlus() ? Transcript.CdsOneBasedStart : Transcript.CdsOneBasedEnd;
            cdsEnd   = Transcript.IsStrandPlus() ? Transcript.CdsOneBasedEnd : Transcript.CdsOneBasedStart;

            // Does it intersect CDS?
            if (cdsStart > Variant.OneBasedEnd)
            {
                return;
            }
            if (cdsEnd < Variant.OneBasedStart)
            {
                return;
            }

            // Base number relative to CDS start
            long scStart, scEnd;

            if (Transcript.IsStrandPlus())
            {
                scStart = cdsBaseNumber(Variant.OneBasedStart, false);
                scEnd   = cdsBaseNumber(Variant.OneBasedEnd, true);
            }
            else
            {
                scEnd   = cdsBaseNumber(Variant.OneBasedStart, true);
                scStart = cdsBaseNumber(Variant.OneBasedEnd, false);
            }

            // Update coordinates
            CodonStartNumber = (int)(scStart / CODON_SIZE);
            CodonStartIndex  = (int)(scStart % CODON_SIZE);

            // MNP overlap in coding part
            long scLen = scEnd - scStart;

            if (scLen < 0)
            {
                return;
            }

            // Round to codon position
            long scStart3 = round3(scStart, false);
            long scEnd3   = round3(scEnd, true);
            long scLen3   = scEnd3 - scStart3;

            if (scEnd3 == scStart3)
            {
                scEnd3 += 3;
            }                                       // At least one codon

            // Append 'N'
            string padN = "";
            long   diff = scEnd3 - (Transcript.RetrieveCodingSequence().Count - 1);

            if (diff > 0)
            {
                scEnd3 = Transcript.RetrieveCodingSequence().Count - 1;
                // Pad with 'N'
                switch (diff)
                {
                case 1:
                    padN = "N";
                    break;

                case 2:
                    padN = "NN";
                    break;

                default:
                    throw new ArgumentOutOfRangeException("Sanity check failed. Number of 'N' pading is :" + diff + ". This should not happen!");
                }
            }

            // Get old codon (reference)
            CodonsReference = SequenceExtensions.ConvertToString(Transcript.RetrieveCodingSequence().GetSubSequence(scStart3, scLen3));

            // Get new codon (change)
            string prepend = CodonsReference.Substring(0, (int)(scStart - scStart3));
            string append  = scEnd3 > scEnd?CodonsReference.Substring(CodonsReference.Length - (int)(scEnd3 - scEnd)) : "";

            CodonsAlternate = prepend + NetCdsChange() + append;

            // Pad codons with 'N' if required
            CodonsReference += padN;
            CodonsAlternate += padN;

            //---
            // Can we simplify codons?
            //---
            if ((CodonsReference != null) && (CodonsAlternate != null))
            {
                while ((CodonsReference.Length >= 3) && (CodonsAlternate.Length >= 3))
                {
                    // First codon
                    string cold = CodonsReference.Substring(0, 3);
                    string cnew = CodonsAlternate.Substring(0, 3);

                    // Are codons equal? => Simplify
                    if (cold.Equals(cnew, StringComparison.InvariantCultureIgnoreCase))
                    {
                        CodonsReference = CodonsReference.Substring(3);
                        CodonsAlternate = CodonsAlternate.Substring(3);
                        CodonStartNumber++;
                    }
                    else
                    {
                        break;
                    }
                }
            }
        }
Ejemplo n.º 23
0
        public ISequence SplicedRNA()
        {
            bool ambiguity = ExonsSortedStrand.Any(x => x.Sequence.Alphabet.HasAmbiguity);

            return(new Sequence(ambiguity ? Alphabets.AmbiguousDNA : Alphabets.DNA, String.Join("", ExonsSortedStrand.Select(x => SequenceExtensions.ConvertToString(x.Sequence)))));
        }