public void TrimPrefix() { // RSS/R var hn = new HgvsProteinNomenclature.HgvsNotation("RSS", "R", "bob", 100, 102) { Type = ProteinChange.Deletion }; AminoAcids.RemovePrefixAndSuffix(hn); const string expectedReference = "SS"; Assert.Equal(expectedReference, hn.ReferenceAminoAcids); const string expectedAlternate = null; Assert.Equal(expectedAlternate, hn.AlternateAminoAcids); const int expectedStart = 101; Assert.Equal(expectedStart, hn.Start); const int expectedEnd = 102; Assert.Equal(expectedEnd, hn.End); }
public void TrimBothPrefixAndSuffix() { // RT/RMLMLT var hn = new HgvsProteinNomenclature.HgvsNotation("RT", "RMLMLT", "bob", 100, 101) { Type = ProteinChange.Insertion }; AminoAcids.RemovePrefixAndSuffix(hn); const string expectedReference = null; Assert.Equal(expectedReference, hn.ReferenceAminoAcids); const string expectedAlternate = "MLML"; Assert.Equal(expectedAlternate, hn.AlternateAminoAcids); const int expectedStart = 101; Assert.Equal(expectedStart, hn.Start); const int expectedEnd = 100; Assert.Equal(expectedEnd, hn.End); }
/// <summary> /// given a common amino acid prefix, remove the common amino acids (insertion) /// returns true if the alleles were modified, false otherwise /// </summary> internal static void RemovePrefixAndSuffix(HgvsProteinNomenclature.HgvsNotation hn) { // nothing to do if we have a pure insertion or deletion if (hn.ReferenceAminoAcids == null || hn.AlternateAminoAcids == null) { return; } // skip this if the amino acids are already the same if (hn.ReferenceAminoAcids == hn.AlternateAminoAcids) { return; } // calculate how many shared amino acids we have from the beginning of each amino acid var numSharedPrefixPos = 0; var isClipped = false; var refLen = hn.ReferenceAminoAcids.Length; var altLen = hn.AlternateAminoAcids.Length; var minLength = Math.Min(refLen, altLen); for (var pos = 0; pos < minLength; pos++, numSharedPrefixPos++, hn.Start++) { if (hn.ReferenceAminoAcids[pos] != hn.AlternateAminoAcids[pos]) { break; } refLen--; altLen--; isClipped = true; } // calculate how many shared amino acids we have from the end of each amino acid minLength = Math.Min(refLen, altLen); for (var pos = 0; pos < minLength; pos++, hn.End--) { var refPos = hn.ReferenceAminoAcids.Length - pos - 1; var altPos = hn.AlternateAminoAcids.Length - pos - 1; if (hn.ReferenceAminoAcids[refPos] != hn.AlternateAminoAcids[altPos]) { break; } refLen--; altLen--; isClipped = true; } // clip the amino acid alleles if (isClipped) { hn.SetReferenceAminoAcids(refLen == 0 ? null : hn.ReferenceAminoAcids.Substring(numSharedPrefixPos, refLen)); hn.SetAlternateAminoAcids(altLen == 0 ? null : hn.AlternateAminoAcids.Substring(numSharedPrefixPos, altLen)); } }
// ReSharper disable once InconsistentNaming public void Shift3PrimeSS() { // given a SS/- deletion in RS[SS]SSS, we want to move to: RSSSS[SS] const string transcriptPeptides = "RSSSSSS"; var hn = new HgvsProteinNomenclature.HgvsNotation("SS", null, "bob", 3, 4) { Type = ProteinChange.Deletion }; AminoAcids.Rotate3Prime(hn, transcriptPeptides); Assert.Equal(6, hn.Start); Assert.Equal(7, hn.End); }
// ReSharper disable once InconsistentNaming public void Shift3PrimeSTM() { // given a STM/- deletion in R[STM]STMP, we want to move to: RSTM[STM]P const string transcriptPeptides = "RSTMSTMP"; var hn = new HgvsProteinNomenclature.HgvsNotation("STM", null, "bob", 2, 4) { Type = ProteinChange.Deletion }; AminoAcids.Rotate3Prime(hn, transcriptPeptides); Assert.Equal(5, hn.Start); Assert.Equal(7, hn.End); }
internal static void Rotate3Prime(HgvsProteinNomenclature.HgvsNotation hn, string peptides) { if (hn.Type != ProteinChange.Deletion && hn.Type != ProteinChange.Duplication && hn.Type != ProteinChange.Insertion ) { return; } // for insertion, the reference bases will be empty string. The shift should happen on the alternate allele var rotatingPeptides = hn.Type == ProteinChange.Insertion ? hn.AlternateAminoAcids : hn.ReferenceAminoAcids; var numBases = rotatingPeptides.Length; var downstreamPeptides = peptides.Length >= hn.End ? peptides.Substring(hn.End): null; if (downstreamPeptides == null) { return; } var combinedSequence = rotatingPeptides + downstreamPeptides; int shiftStart, shiftEnd; var hasShifted = false; for (shiftStart = 0, shiftEnd = numBases; shiftEnd < combinedSequence.Length; shiftStart++, shiftEnd++) { if (combinedSequence[shiftStart] != combinedSequence[shiftEnd]) { break; } hn.Start++; hasShifted = true; } if (hasShifted) { rotatingPeptides = combinedSequence.Substring(shiftStart, numBases); } if (hn.Type == ProteinChange.Insertion) { hn.AlternateAminoAcids = rotatingPeptides; } else { hn.ReferenceAminoAcids = rotatingPeptides; } hn.End = hn.Type == ProteinChange.Insertion ? hn.Start - 1 : hn.Start + numBases - 1; if (hn.Type != ProteinChange.Insertion || !hasShifted) { return; } var newUpstreamSeq = combinedSequence.Substring(0, shiftStart); if (newUpstreamSeq.EndsWith(rotatingPeptides)) { hn.Type = ProteinChange.Duplication; // We are not sure why we have to take 1 off numBases. But that is what VEP is doing. // var incrementLength = numBases; // hn.Start -= incrementLength; hn.End = hn.Start + numBases - 1; hn.ReferenceAminoAcids = hn.AlternateAminoAcids; } }