/// <summary> /// All input strings are delimited with "|" /// PTMs are annotated with [] /// </summary> /// <param name="fullSequenceString">All possible sequences (with modifications) for this PrSM</param> /// <param name="geneString">All possible genes for this PrSM</param> /// <returns></returns> public static string ClassifyPrSM(string fullSequenceString, string geneString) { //separate delimited input string[] sequences = fullSequenceString.Split('|'); string[] genes = geneString.Split('|'); //determine sequence ambiguity string firstBaseSequence = PeptideWithSetModifications.GetBaseSequenceFromFullSequence(sequences[0]).ToUpper(); //get first sequence with modifications removed bool sequenceIdentified = !SequenceContainsUnknownAminoAcids(firstBaseSequence); //check if there are any ambiguous amino acids (i.e. B, J, X, Z) //for every other sequence reported if (sequenceIdentified) //if there weren't any unknown amino acids reported. { for (int i = 1; i < sequences.Length; i++) { //if the unmodified sequences don't match, then there's sequence ambiguity if (!firstBaseSequence.Equals(PeptideWithSetModifications.GetBaseSequenceFromFullSequence(sequences[i]).ToUpper())) { sequenceIdentified = false; break; } } } //determine PTM localization and identification List <(int index, string ptm)> firstPTMsSortedByIndex = GetPTMs(sequences[0]); //get ptms from the first sequence reported List <string> firstPTMsSortedByPTM = firstPTMsSortedByIndex.Select(x => x.ptm).OrderBy(x => x).ToList(); //sort ptms alphabetically //check if there are unknown mass shifts bool ptmsIdentified = !PtmsContainUnknownMassShifts(firstPTMsSortedByPTM); bool ptmsLocalized = true; //assume these are localized unless we determine otherwise //for every other sequence reported for (int seqIndex = 1; seqIndex < sequences.Length; seqIndex++) { List <(int index, string ptm)> currentPTMsSortedByIndex = GetPTMs(sequences[seqIndex]); //get ptms from this sequence List <string> currentPTMsSortedByPTM = currentPTMsSortedByIndex.Select(x => x.ptm).OrderBy(x => x).ToList(); //sort ptms alphabetically //are number of PTMs the same? if (firstPTMsSortedByIndex.Count == currentPTMsSortedByIndex.Count) { //check localization (are indexes conserved?) for (int i = 0; i < firstPTMsSortedByIndex.Count; i++) { if (firstPTMsSortedByIndex[i].index != currentPTMsSortedByIndex[i].index) { ptmsLocalized = false; break; } } //check PTM identification for (int i = 0; i < firstPTMsSortedByPTM.Count; i++) { if (!firstPTMsSortedByPTM[i].Equals(currentPTMsSortedByPTM[i])) { ptmsIdentified = false; break; } } } else { ptmsIdentified = false; ptmsLocalized = false; } } //handle an edge case where two PTMs are identified and localized to two residues, but it's unclear which PTM is localized to which residue. if (ptmsIdentified && ptmsLocalized) { for (int seqIndex = 1; seqIndex < sequences.Length; seqIndex++) { List <(int index, string ptm)> currentPTMsSortedByIndex = GetPTMs(sequences[seqIndex]); //get ptms from this sequence //check that the mods are in the same position for (int ptmIndex = 0; ptmIndex < currentPTMsSortedByIndex.Count; ptmIndex++) { if (!firstPTMsSortedByIndex[ptmIndex].ptm.Equals(currentPTMsSortedByIndex[ptmIndex])) { ptmsLocalized = false; break; } } } } //determine gene ambiguity bool geneIdentified = genes.Length == 1; return(GetProteoformClassification(ptmsLocalized, ptmsIdentified, sequenceIdentified, geneIdentified)); }