Exemple #1
0
        public void TrimPrefix()
        {
            // RSS/R
            var hn = new HgvsProteinNomenclature.HgvsNotation("RSS", "R", "bob", 100, 102)
            {
                Type = ProteinChange.Deletion
            };

            AminoAcids.RemovePrefixAndSuffix(hn);

            const string expectedReference = "SS";

            Assert.Equal(expectedReference, hn.ReferenceAminoAcids);

            const string expectedAlternate = null;

            Assert.Equal(expectedAlternate, hn.AlternateAminoAcids);

            const int expectedStart = 101;

            Assert.Equal(expectedStart, hn.Start);

            const int expectedEnd = 102;

            Assert.Equal(expectedEnd, hn.End);
        }
Exemple #2
0
        public void TrimBothPrefixAndSuffix()
        {
            // RT/RMLMLT
            var hn = new HgvsProteinNomenclature.HgvsNotation("RT", "RMLMLT", "bob", 100, 101)
            {
                Type = ProteinChange.Insertion
            };

            AminoAcids.RemovePrefixAndSuffix(hn);

            const string expectedReference = null;

            Assert.Equal(expectedReference, hn.ReferenceAminoAcids);

            const string expectedAlternate = "MLML";

            Assert.Equal(expectedAlternate, hn.AlternateAminoAcids);

            const int expectedStart = 101;

            Assert.Equal(expectedStart, hn.Start);

            const int expectedEnd = 100;

            Assert.Equal(expectedEnd, hn.End);
        }
Exemple #3
0
        /// <summary>
        /// given a common amino acid prefix, remove the common amino acids (insertion)
        /// returns true if the alleles were modified, false otherwise
        /// </summary>
        internal static void RemovePrefixAndSuffix(HgvsProteinNomenclature.HgvsNotation hn)
        {
            // nothing to do if we have a pure insertion or deletion
            if (hn.ReferenceAminoAcids == null || hn.AlternateAminoAcids == null)
            {
                return;
            }

            // skip this if the amino acids are already the same
            if (hn.ReferenceAminoAcids == hn.AlternateAminoAcids)
            {
                return;
            }

            // calculate how many shared amino acids we have from the beginning of each amino acid
            var numSharedPrefixPos = 0;
            var isClipped          = false;
            var refLen             = hn.ReferenceAminoAcids.Length;
            var altLen             = hn.AlternateAminoAcids.Length;
            var minLength          = Math.Min(refLen, altLen);

            for (var pos = 0; pos < minLength; pos++, numSharedPrefixPos++, hn.Start++)
            {
                if (hn.ReferenceAminoAcids[pos] != hn.AlternateAminoAcids[pos])
                {
                    break;
                }
                refLen--;
                altLen--;
                isClipped = true;
            }

            // calculate how many shared amino acids we have from the end of each amino acid
            minLength = Math.Min(refLen, altLen);

            for (var pos = 0; pos < minLength; pos++, hn.End--)
            {
                var refPos = hn.ReferenceAminoAcids.Length - pos - 1;
                var altPos = hn.AlternateAminoAcids.Length - pos - 1;
                if (hn.ReferenceAminoAcids[refPos] != hn.AlternateAminoAcids[altPos])
                {
                    break;
                }
                refLen--;
                altLen--;
                isClipped = true;
            }

            // clip the amino acid alleles
            if (isClipped)
            {
                hn.SetReferenceAminoAcids(refLen == 0 ? null : hn.ReferenceAminoAcids.Substring(numSharedPrefixPos, refLen));
                hn.SetAlternateAminoAcids(altLen == 0 ? null : hn.AlternateAminoAcids.Substring(numSharedPrefixPos, altLen));
            }
        }
Exemple #4
0
        // ReSharper disable once InconsistentNaming
        public void Shift3PrimeSS()
        {
            // given a SS/- deletion in RS[SS]SSS, we want to move to: RSSSS[SS]
            const string transcriptPeptides = "RSSSSSS";
            var          hn = new HgvsProteinNomenclature.HgvsNotation("SS", null, "bob", 3, 4)
            {
                Type = ProteinChange.Deletion
            };

            AminoAcids.Rotate3Prime(hn, transcriptPeptides);

            Assert.Equal(6, hn.Start);
            Assert.Equal(7, hn.End);
        }
Exemple #5
0
        // ReSharper disable once InconsistentNaming
        public void Shift3PrimeSTM()
        {
            // given a STM/- deletion in R[STM]STMP, we want to move to: RSTM[STM]P
            const string transcriptPeptides = "RSTMSTMP";
            var          hn = new HgvsProteinNomenclature.HgvsNotation("STM", null, "bob", 2, 4)
            {
                Type = ProteinChange.Deletion
            };

            AminoAcids.Rotate3Prime(hn, transcriptPeptides);

            Assert.Equal(5, hn.Start);
            Assert.Equal(7, hn.End);
        }
Exemple #6
0
        internal static void Rotate3Prime(HgvsProteinNomenclature.HgvsNotation hn, string peptides)
        {
            if (hn.Type != ProteinChange.Deletion &&
                hn.Type != ProteinChange.Duplication &&
                hn.Type != ProteinChange.Insertion
                )
            {
                return;
            }

            // for insertion, the reference bases will be empty string. The shift should happen on the alternate allele
            var rotatingPeptides = hn.Type == ProteinChange.Insertion ? hn.AlternateAminoAcids : hn.ReferenceAminoAcids;
            var numBases         = rotatingPeptides.Length;

            var downstreamPeptides = peptides.Length >= hn.End ? peptides.Substring(hn.End): null;

            if (downstreamPeptides == null)
            {
                return;
            }

            var combinedSequence = rotatingPeptides + downstreamPeptides;

            int shiftStart, shiftEnd;
            var hasShifted = false;

            for (shiftStart = 0, shiftEnd = numBases; shiftEnd < combinedSequence.Length; shiftStart++, shiftEnd++)
            {
                if (combinedSequence[shiftStart] != combinedSequence[shiftEnd])
                {
                    break;
                }
                hn.Start++;
                hasShifted = true;
            }
            if (hasShifted)
            {
                rotatingPeptides = combinedSequence.Substring(shiftStart, numBases);
            }

            if (hn.Type == ProteinChange.Insertion)
            {
                hn.AlternateAminoAcids = rotatingPeptides;
            }
            else
            {
                hn.ReferenceAminoAcids = rotatingPeptides;
            }

            hn.End = hn.Type == ProteinChange.Insertion ? hn.Start - 1 : hn.Start + numBases - 1;

            if (hn.Type != ProteinChange.Insertion || !hasShifted)
            {
                return;
            }

            var newUpstreamSeq = combinedSequence.Substring(0, shiftStart);

            if (newUpstreamSeq.EndsWith(rotatingPeptides))
            {
                hn.Type = ProteinChange.Duplication;
                // We are not sure why we have to take 1 off numBases. But that is what VEP is doing.
                // var incrementLength = numBases;
                // hn.Start -= incrementLength;
                hn.End = hn.Start + numBases - 1;

                hn.ReferenceAminoAcids = hn.AlternateAminoAcids;
            }
        }