Exemplo n.º 1
0
        /// <summary>
        /// All input strings are delimited with "|"
        /// PTMs are annotated with []
        /// </summary>
        /// <param name="fullSequenceString">All possible sequences (with modifications) for this PrSM</param>
        /// <param name="geneString">All possible genes for this PrSM</param>
        /// <returns></returns>
        public static string ClassifyPrSM(string fullSequenceString, string geneString)
        {
            //separate delimited input
            string[] sequences = fullSequenceString.Split('|');
            string[] genes     = geneString.Split('|');


            //determine sequence ambiguity
            string firstBaseSequence  = PeptideWithSetModifications.GetBaseSequenceFromFullSequence(sequences[0]).ToUpper(); //get first sequence with modifications removed
            bool   sequenceIdentified = !SequenceContainsUnknownAminoAcids(firstBaseSequence);                               //check if there are any ambiguous amino acids (i.e. B, J, X, Z)

            //for every other sequence reported
            if (sequenceIdentified) //if there weren't any unknown amino acids reported.
            {
                for (int i = 1; i < sequences.Length; i++)
                {
                    //if the unmodified sequences don't match, then there's sequence ambiguity
                    if (!firstBaseSequence.Equals(PeptideWithSetModifications.GetBaseSequenceFromFullSequence(sequences[i]).ToUpper()))
                    {
                        sequenceIdentified = false;
                        break;
                    }
                }
            }


            //determine PTM localization and identification
            List <(int index, string ptm)> firstPTMsSortedByIndex = GetPTMs(sequences[0]);                           //get ptms from the first sequence reported
            List <string> firstPTMsSortedByPTM = firstPTMsSortedByIndex.Select(x => x.ptm).OrderBy(x => x).ToList(); //sort ptms alphabetically
            //check if there are unknown mass shifts
            bool ptmsIdentified = !PtmsContainUnknownMassShifts(firstPTMsSortedByPTM);
            bool ptmsLocalized  = true; //assume these are localized unless we determine otherwise

            //for every other sequence reported
            for (int seqIndex = 1; seqIndex < sequences.Length; seqIndex++)
            {
                List <(int index, string ptm)> currentPTMsSortedByIndex = GetPTMs(sequences[seqIndex]);                      //get ptms from this sequence
                List <string> currentPTMsSortedByPTM = currentPTMsSortedByIndex.Select(x => x.ptm).OrderBy(x => x).ToList(); //sort ptms alphabetically

                //are number of PTMs the same?
                if (firstPTMsSortedByIndex.Count == currentPTMsSortedByIndex.Count)
                {
                    //check localization (are indexes conserved?)
                    for (int i = 0; i < firstPTMsSortedByIndex.Count; i++)
                    {
                        if (firstPTMsSortedByIndex[i].index != currentPTMsSortedByIndex[i].index)
                        {
                            ptmsLocalized = false;
                            break;
                        }
                    }
                    //check PTM identification
                    for (int i = 0; i < firstPTMsSortedByPTM.Count; i++)
                    {
                        if (!firstPTMsSortedByPTM[i].Equals(currentPTMsSortedByPTM[i]))
                        {
                            ptmsIdentified = false;
                            break;
                        }
                    }
                }
                else
                {
                    ptmsIdentified = false;
                    ptmsLocalized  = false;
                }
            }
            //handle an edge case where two PTMs are identified and localized to two residues, but it's unclear which PTM is localized to which residue.
            if (ptmsIdentified && ptmsLocalized)
            {
                for (int seqIndex = 1; seqIndex < sequences.Length; seqIndex++)
                {
                    List <(int index, string ptm)> currentPTMsSortedByIndex = GetPTMs(sequences[seqIndex]); //get ptms from this sequence
                    //check that the mods are in the same position
                    for (int ptmIndex = 0; ptmIndex < currentPTMsSortedByIndex.Count; ptmIndex++)
                    {
                        if (!firstPTMsSortedByIndex[ptmIndex].ptm.Equals(currentPTMsSortedByIndex[ptmIndex]))
                        {
                            ptmsLocalized = false;
                            break;
                        }
                    }
                }
            }

            //determine gene ambiguity
            bool geneIdentified = genes.Length == 1;

            return(GetProteoformClassification(ptmsLocalized, ptmsIdentified, sequenceIdentified, geneIdentified));
        }