Beispiel #1
0
        public string cleanSeq(string seq, out string error_message)
        {
            error_message = "";
            bool   ModificationOn   = false;
            string ModificationName = "";
            int    aaIndex          = 0;
            string cleanedSeq       = "";

            foreach (char amino_acid in seq)
            {
                if (amino_acid == ')') //only occurs at end of mod
                {
                    ModificationOn = false;
                    double modMass = MassCalculator.getPTMMass(ModificationName, out string e);
                    error_message += e;
                    PTM ptm = new PTM(ModificationName, aaIndex, modMass);
                    this.ptms.Add(ptm);
                }
                if (ModificationOn == true) //only occurs if "(" already found
                {
                    ModificationName += amino_acid;
                }
                if (amino_acid == '(') //start collecting PTM name
                {
                    ModificationOn = true;
                }
                if (ModificationOn == false && amino_acid != ')')
                {
                    cleanedSeq += amino_acid;
                }
            }
            return(cleanedSeq);
        }
        public static string cleanSeq(string fullSeq)
        {
            string temp = fullSeq.Replace("[Common Fixed:Carbamidomethyl of C]", "");

            temp = temp.Replace("ptmlist:", "");
            temp = MassCalculator.RemoveNestedParentheses(temp, false);
            temp = temp.Replace("'", "");
            temp = temp.Replace("\"", "");
            return(temp);
        }
Beispiel #3
0
        //This method removes the number of amino acids specified by FragNumber from the respecitve terminus specified by ion of IonSequence
        //If checkToRemoveExtraAA is true, additional AA will be removed to achieve a theoretical mass less than the Experimental mass
        public string IonCrop(string IonSequence, double ExperimentalMass, int FragNumber, IonType ion, bool checkToRemoveExtraAA, out string error_message)
        {
            error_message = "";
            string IonFrag;

            if (ion == IonType.b)
            {
                IonFrag = IonSequence.Substring(0, (IonSequence.Length - FragNumber));
                if (IonFrag.Substring(IonSequence.Length - FragNumber - 1, 1) == ")") //if end of a PTM annotation
                {
                    while (IonFrag.Substring(IonSequence.Length - FragNumber - 1, 1) != "(")
                    {
                        FragNumber++;
                        IonFrag = IonSequence.Substring(0, (IonSequence.Length - FragNumber));
                    }
                    FragNumber++; //removes "("
                    FragNumber++; //removes the AA the PTM was attached to
                    IonFrag = IonSequence.Substring(0, (IonSequence.Length - FragNumber));
                }
            }
            else //Ion==Y
            {
                IonFrag = IonSequence.Substring((0 + FragNumber), (IonSequence.Length - FragNumber));
                if (IonFrag.Substring(0, 1) == "(") //if start of a PTM annotation
                {
                    while (IonFrag.Substring(0, 1) != ")")
                    {
                        FragNumber++;
                        IonFrag = IonSequence.Substring((0 + FragNumber), (IonSequence.Length - FragNumber));
                    }
                    FragNumber++; //removes ")"
                    IonFrag = IonSequence.Substring((0 + FragNumber), (IonSequence.Length - FragNumber));
                }
            }
            if (checkToRemoveExtraAA == false)
            {
                return(IonFrag);
            }
            else
            {
                double IonMass = MassCalculator.MonoIsoptopicMass(IonFrag, out string e);
                error_message += e;
                if (IonMass < ExperimentalMass) //end if the mass of the fragment is lower than the experimental
                {
                    return(IonFrag);
                }
                else //call the function again to remove another amino acid.
                {
                    FragNumber++;
                    string x = IonCrop(IonSequence, ExperimentalMass, FragNumber, ion, true, out string e2);
                    error_message += e2;
                    return(x);
                }
            }
        }
        };                                                                                                                                                  //20 common AA, ordered by mass assuming carbamido

        public string PopulateMassDictionary()
        {
            string        error_message = "";
            List <double> tempKeys      = new List <double>();
            double        maxMass       = (maxMissingConsecutivePeaks + 1) * 186.079313 + (Constants.WATER_MONOISOTOPIC_MASS + 3); //If one sequence knocks it out of range, the next might not (VVVVW vs VVVVV where the first is out of range but the second is not) W-G prevents this     //peaks+1 for converting missed peaks into number of ambiguous aa
            int           mer           = (4) * (maxMissingConsecutivePeaks + 1);                                                  //maximum length allowed (4>W/G>3)  //peaks+1 for converting missed peaks into number of ambiguous aa

            //int mer = (2*(maxMissingConsecutivePeaks+1)); //only two aa should fit into one... work around to reduce computational demand
            int[] indexes = new int[mer];
            for (int i = 0; i < mer; i++)
            {
                indexes[i] = 0;
            }
            string seq    = "";
            int    length = 1;

            /*         List<char> firstAA = new List<char>();
             *       foreach(char aa in AANames)
             *       {
             *           firstAA.Add(aa);
             *       }
             *       Parallel.ForEach(Partitioner.Creat(0,))*/
            while (indexes[0] < AANames.Count())
            {
                //get new seq
                seq = "";
                if (indexes[2] == 15)
                {
                }
                for (int n = 0; n < length; n++)
                {
                    seq += AANames[indexes[n]];
                }

                //if new seq is within range
                double fragMass = MassCalculator.MonoIsoptopicMass(seq, out string e);
                error_message += e;
                if (fragMass < maxMass)
                {
                    var           rounded = Math.Round(fragMass, decimalDigitsForFragmentMassRounding);
                    List <string> value;
                    if (massDict.TryGetValue(rounded, out value))
                    {
                        if (!value.Contains(seq))
                        {
                            value.Add(seq);
                        }
                    }
                    else
                    {
                        massDict.Add(rounded, new List <string> {
                            seq
                        });
                        tempKeys.Add(rounded);
                    }


                    if (mer != length && fragMass + 57.0214 < maxMass) //if not last position
                    {
                        length++;                                      //allow m to increase
                    }
                    else //don't increment length, we're happy right now!
                    {
                        if (indexes[length - 1] < AANames.Count() - 1) //if not last aa in possible aa
                        {
                            indexes[length - 1]++;
                        }
                        else //if it is, we need to go back a bit
                        {
                            indexes[length - 1] = 0;
                            indexes[length - 2]++; //could cause crashing with weird aa
                            length--;
                        }
                    }
                }
                else //if not in range, move back one
                {
                    indexes[length - 1] = 0;
                    indexes[length - 2]++; //could cause crashing with weird aa
                    length--;
                }

                //important catch to make sure going back doesn't result in an out of index exception
                for (int i = indexes.Count() - 1; i >= 0; i--)
                {
                    if (indexes[i] == AANames.Count())
                    {
                        if (i > 0)
                        {
                            length--;
                            indexes[i] = 0;
                            indexes[i - 1]++;
                        }
                    }
                }
            }
            tempKeys.Sort();
            keys = new double[tempKeys.Count()];
            for (int k = 0; k < tempKeys.Count(); k++)
            {
                keys[k] = tempKeys[k];
            }

            using (System.IO.StreamWriter file = new System.IO.StreamWriter(@"C:\Users\Zach Rolfs\Desktop\Chemistry\Smith Research\Fusion Peptides\Dictionary" + "3" + ".txt"))
            {
                foreach (double key in keys)
                {
                    List <string> value = new List <string>();
                    massDict.TryGetValue(key, out value);
                    bool satisfyRule = false;
                    foreach (string s in value)
                    {
                        if (s.Length <= maxMissingConsecutivePeaks + 1)
                        {
                            satisfyRule = true;
                        }
                    }
                    if (satisfyRule)
                    {
                        string output = key.ToString() + '\t';
                        foreach (string s in value)
                        {
                            output += s + ';';
                        }
                        output = output.Substring(0, output.Length - 1);
                        file.WriteLine(output);
                    }
                }
            }
            //test

            /*      foreach(double key in keys)
             *    {
             *        List<string> value;
             *        if (massDict.TryGetValue(key, out value))
             *        {
             *            //MessageBox.Show(key.ToString() + " with " + value.Count());
             *            foreach(string v in value)
             *            {
             *               // MessageBox.Show(v);
             *            }
             *        }
             *        else
             *        {
             *            //MessageBox.Show("No sequences found for " + key);
             *        }
             *    }*/
            return(error_message);
        }
Beispiel #5
0
        //compare the 6 first and last aa of each fusion candidate with database and determine if precursor mass can be achieved within 5 ppm. If it can, remove the psm from the list
        public void removeTranslatedPeptides(List <PSM> psms, List <TheoreticalProtein> database, out string error_message)
        {
            error_message = "";
            for (int i = 0; i < psms.Count(); i++)
            {
                this.worker.ReportProgress(Convert.ToInt16((Convert.ToDouble(i) / Convert.ToDouble(psms.Count())) * 100));
                bool removed = false;
                int  fcIndex = 0;//FusionCandidate
                while (!removed && fcIndex < psms[i].getFusionCandidates().Count())
                {
                    FusionCandidate fc  = psms[i].getFusionCandidates()[fcIndex];
                    string          seq = fc.seq;
                    if (seq.Length >= ionsUsedDigFilter)
                    {
                        string Nterm = seq.Substring(0, ionsUsedDigFilter);
                        string Cterm = seq.Substring(seq.Length - ionsUsedDigFilter, ionsUsedDigFilter);


                        //N-TERMINUS SEARCHING
                        List <TheoreticalProtein> matches = database.AsParallel().Where(x => x.seq.Contains(Nterm)).ToList();
                        int protIndex = 0; //TheoreticalProtein
                        while (!removed && protIndex < matches.Count())
                        {
                            TheoreticalProtein prot     = matches[protIndex];
                            double             prodMass = 0;
                            //use below code to catch multiple appearances of a frag in a parent protein
                            List <int> indexes   = new List <int>();
                            string     subProt   = prot.seq;
                            int        pastIndex = 0;
                            while (subProt.Contains(Nterm))
                            {
                                int newIndex = subProt.IndexOf(Nterm);
                                indexes.Add(newIndex + pastIndex);
                                subProt    = subProt.Substring(newIndex + 1, subProt.Length - newIndex - 1); //need to remove old match
                                pastIndex += newIndex + 1;
                            }
                            string protProd      = "";
                            int    sameProtIndex = 0;
                            while (!removed && sameProtIndex < indexes.Count())
                            {
                                Boolean hitEndOfProt = false;
                                int     numAAused    = ionsUsedDigFilter;
                                while (prodMass < psms[i].getExpMass() + 200 && hitEndOfProt == false)
                                {
                                    try
                                    {
                                        //   for (int PTMInclude = 0; PTMInclude < PTMIncludeMax; PTMInclude++) //loop twice, once without incorporating identified ptm masses and once with
                                        {
                                            //Obtain the Product mass
                                            //if (Ion == 0) //if B main
                                            {
                                                protProd       = prot.seq.Substring(indexes[sameProtIndex], numAAused);
                                                prodMass       = MassCalculator.MonoIsoptopicMass(protProd, out string e2);
                                                error_message += e2;
                                                //MessageBox.Show("1 "+ProtProd+" "+FASTARow[0].ToString() + " " + ProdMass.ToString());

                                                /*        if (PTMInclude == 1)
                                                 *      {
                                                 *          ProdMass += BPTMMass;
                                                 *      }*/
                                            }
                                            //MissedCleavage and NonSpecific Cleavage/autolysis catch
                                            if (generateDecoys)
                                            {
                                                if (((prodMass) > (psms[i].getExpMass() - 9.5) && (prodMass) < (psms[i].getExpMass() - 4.5)) | ((prodMass) > (psms[i].getExpMass() + 5.5) && (prodMass) < (psms[i].getExpMass() + 7.5))) //if match, add it!
                                                {
                                                    psms.Remove(psms[i]);
                                                    i--;
                                                    removed = true;
                                                }
                                            }
                                            else
                                            {
                                                if ((prodMass) > (psms[i].getExpMass() * (1 - precursorMassTolerancePpm / 1000000)) && (prodMass) < (psms[i].getExpMass() * (1 + precursorMassTolerancePpm / 1000000))) //if match
                                                {
                                                    psms.Remove(psms[i]);
                                                    i--;
                                                    removed = true;
                                                }
                                            }
                                        }
                                    }
                                    catch //sloppy patch used for when hitting the end of a protein
                                    {
                                        hitEndOfProt = true;
                                    }
                                    numAAused++;
                                }
                                sameProtIndex++;
                            }
                            protIndex++;
                        }


                        //C-TERMINUS SEARCHING
                        matches   = database.AsParallel().Where(x => x.seq.Contains(Cterm)).ToList();
                        protIndex = 0; //TheoreticalProtein
                        while (!removed && protIndex < matches.Count())
                        {
                            TheoreticalProtein prot     = matches[protIndex];
                            double             prodMass = 0;
                            //use below code to catch multiple appearances of a frag in a parent protein
                            List <int> indexes   = new List <int>();
                            string     subProt   = prot.seq;
                            int        pastIndex = 0;
                            while (subProt.Contains(Cterm))
                            {
                                int newIndex = subProt.IndexOf(Cterm);
                                indexes.Add(newIndex + pastIndex);
                                subProt    = subProt.Substring(newIndex + 1, subProt.Length - newIndex - 1); //need to remove old match
                                pastIndex += newIndex + 1;
                            }
                            string protProd      = "";
                            int    sameProtIndex = 0;
                            while (!removed && sameProtIndex < indexes.Count())
                            {
                                Boolean hitEndOfProt = false;
                                int     numAAused    = ionsUsedDigFilter;
                                while (prodMass < psms[i].getExpMass() + 200 && hitEndOfProt == false)
                                {
                                    try
                                    {
                                        //   for (int PTMInclude = 0; PTMInclude < PTMIncludeMax; PTMInclude++) //loop twice, once without incorporating identified ptm masses and once with
                                        {
                                            //Obtain the Product mass
                                            //else //if y main
                                            {
                                                protProd = prot.seq.Substring(indexes[sameProtIndex] - numAAused + ionsUsedDigFilter, numAAused);
                                                //MessageBox.Show(BProt);
                                                prodMass       = MassCalculator.MonoIsoptopicMass(protProd, out string e);
                                                error_message += e;
                                                //MessageBox.Show(Prot.IndexOf(bigFrag).ToString() + " " + AASearchLength + " "+bigFrag.Count().ToString());
                                                //MessageBox.Show("2 "+bigFrag+" "+ProtProd + " " + ProdMass.ToString());

                                                /*      if (PTMInclude == 1)
                                                 *    {
                                                 *        ProdMass += YPTMMass;
                                                 *    }*/
                                            }
                                            //MissedCleavage and NonSpecific Cleavage/autolysis catch
                                            if (generateDecoys)
                                            {
                                                if (((prodMass) > (psms[i].getExpMass() - 9.5) && (prodMass) < (psms[i].getExpMass() - 4.5)) | ((prodMass) > (psms[i].getExpMass() + 5.5) && (prodMass) < (psms[i].getExpMass() + 7.5))) //if match, add it!
                                                {
                                                    psms.Remove(psms[i]);
                                                    i--;
                                                    removed = true;
                                                }
                                            }
                                            else
                                            {
                                                if ((prodMass) > (psms[i].getExpMass() * (1 - precursorMassTolerancePpm / 1000000)) && (prodMass) < (psms[i].getExpMass() * (1 + precursorMassTolerancePpm / 1000000))) //if match
                                                {
                                                    psms.Remove(psms[i]);
                                                    i--;
                                                    removed = true;
                                                }
                                            }
                                        }
                                    }
                                    catch //sloppy patch used for when hitting the end of a protein
                                    {
                                        hitEndOfProt = true;
                                    }
                                    numAAused++;
                                }
                                sameProtIndex++;
                            }
                            protIndex++;
                        }
                    }
                    fcIndex++;
                }
            }
        }
Beispiel #6
0
        public void FindCommonFalsePositives(List <PSM> psms, List <TheoreticalProtein> database, out string error_message)
        {
            error_message = "";
            int i = 0;

            foreach (PSM psm in psms)
            {
                this.worker.ReportProgress(Convert.ToInt16((Convert.ToDouble(i) / Convert.ToDouble(psms.Count())) * 100));
                i++;
                foreach (FusionCandidate fusionCandidate in psm.getFusionCandidates())
                {
                    foreach (ParentInfo info in fusionCandidate.parentInfo)
                    {
                        if (info.fragFound.Length >= 6)
                        {
                            foreach (TheoreticalProtein protein in info.theoreticalProteins)
                            {
                                string protSeq      = protein.seq;
                                char[] candidateSeq = fusionCandidate.seq.ToCharArray();
                                int    index        = protSeq.IndexOf(info.fragFound);
                                int    fragLength   = info.fragFound.Length;
                                string possibleTranslatedSequence = protSeq.Substring(index, fragLength);
                                if (!possibleTranslatedSequence.Equals(fusionCandidate.seq)) //if not already found as translated
                                {
                                    double fragMass = MassCalculator.MonoIsoptopicMass(possibleTranslatedSequence, out string e);
                                    error_message += e;
                                    double expMass = psm.getExpMass();
                                    while (fragMass < expMass + 187.079 - 57.021 + 1)
                                    {
                                        //Find SNPs
                                        if (candidateSeq.Count() == possibleTranslatedSequence.Length)
                                        {
                                            char[] possibleSeqArray = possibleTranslatedSequence.ToCharArray();
                                            if (IsSNP(candidateSeq, possibleSeqArray))
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence, Variant.variantType.SNP, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                                psm.variants.Add(new Variant(fusionCandidate.seq, Variant.variantType.SNP, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                        }

                                        if (generateDecoys)
                                        {
                                            //search for unmodified sequences

                                            if (((fragMass) > (expMass - 9.5) && (fragMass) < (expMass - 4.5)) | ((fragMass) > (expMass + 5.5) && (fragMass) < (expMass + 7.5))) //if match, add it!
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence, Variant.variantType.UM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                            //fragMass, expMass
                                            //search for PTMs
                                            else if (((fragMass + .984016) > (expMass - 9.5) && (fragMass + .98402) < (expMass - 4.5)) | ((fragMass + .98402) > (expMass + 5.5) && (fragMass + .98402) < (expMass + 7.5))) //if match
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence + "+Deamidation", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                            else if (((fragMass + 15.99491463) > (expMass - 9.5) && (fragMass + 15.99491463) < (expMass - 4.5)) | ((fragMass + 15.99491463) > (expMass + 5.5) && (fragMass + 15.99491463) < (expMass + 7.5))) //if match
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence + "+Oxidation", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                            else if (((fragMass + 15.99491463 * 2) > (expMass - 9.5) && (fragMass + 15.99491463 * 2) < (expMass - 4.5)) | ((fragMass + 15.99491463 * 2) > (expMass + 5.5) && (fragMass + 15.99491463 * 2) < (expMass + 7.5))) //if match
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence + "+DiOxidation", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                            else if (((fragMass + 15.99491463 * 3) > (expMass - 9.5) && (fragMass + 15.99491463 * 3) < (expMass - 4.5)) | ((fragMass + 15.99491463 * 3) > (expMass + 5.5) && (fragMass + 15.99491463 * 3) < (expMass + 7.5))) //if match
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence + "+TriOxidation", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                            //else if ((fragMass + 14.01565) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 14.01565) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match
                                            //{
                                            //    psm.variants.Add(new Variant(Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            //}                                            else if (((TheoreticalMass + .984016) > (ExperimentalMass - 9.5) && (TheoreticalMass + .98402) < (ExperimentalMass - 4.5)) | ((TheoreticalMass + .98402) > (ExperimentalMass + 5.5) && (TheoreticalMass) < (ExperimentalMass + 7.5))) //if match
                                            else if (((fragMass + 42.01056) > (expMass - 9.5) && (fragMass + 42.01056) < (expMass - 4.5)) | ((fragMass + 42.01056) > (expMass + 5.5) && (fragMass + 42.01056) < (expMass + 7.5))) //if match
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence + "+Acetylation", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                            else if (((fragMass + 79.96633) > (expMass - 9.5) && (fragMass + 79.96633) < (expMass - 4.5)) | ((fragMass + 79.96633) > (expMass + 5.5) && (fragMass + 79.96633) < (expMass + 7.5))) //if match
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence + "+Phosphorylation", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                        }
                                        else
                                        {
                                            //search for unmodified sequences
                                            if ((fragMass) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match, add it!
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence, Variant.variantType.UM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }

                                            //search for PTMs
                                            else if ((fragMass + .984016) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + .98402) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence + "+Deamidation", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                            else if ((fragMass + 15.99491463) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 15.99491) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence + "+Oxidation", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                            else if ((fragMass + 15.99491463 * 2) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 15.99491 * 2) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence + "+Dioxidation", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                            else if ((fragMass + 15.99491463 * 3) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 15.99491 * 3) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence + "+Trioxidation", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                            //else if ((fragMass + 14.01565) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 14.01565) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match
                                            //{
                                            //    psm.variants.Add(new Variant(Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            //}
                                            else if ((fragMass + 42.01056) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 42.01056) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence + "+Acetyl", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                            else if ((fragMass + 79.96633) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 79.96633) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence + "+Phospho", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                            else if ((fragMass + 61.913495) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 61.913495) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence + "+Zinc", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                            else if ((fragMass + 37.955588) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 37.955588) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence + "+Potassium", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                            else if ((fragMass + 21.981944) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 21.981944) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence + "+Sodium", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                            else if ((fragMass + 57.021464) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 57.021464) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence + "+Carbamidomethyl", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                            else if ((fragMass + 79.956815) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 79.956815) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence + "+Sulfonation", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                            else if ((fragMass + 14.01565) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 14.01565) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence + "+Methyl", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                            else if ((fragMass + 28.0313) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 28.0313) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence + "+DiMethyl", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                            else if ((fragMass + 42.04695) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 42.04695) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence + "+TriMethyl", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                            else if ((fragMass - -17.026549) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass - -17.026549) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match
                                            {
                                                psm.variants.Add(new Variant(possibleTranslatedSequence + "+Ammonia loss", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length));
                                            }
                                        }
                                        //-17, 22,

                                        //update the sequence
                                        fragLength++;
                                        if (info.parentType.Equals(ParentInfo.terminal.N))
                                        {
                                            if (index + fragLength < protSeq.Length)
                                            {
                                                possibleTranslatedSequence = protSeq.Substring(index, fragLength);
                                                fragMass       = MassCalculator.MonoIsoptopicMass(possibleTranslatedSequence, out string e2);
                                                error_message += e2;
                                            }
                                            else
                                            {
                                                fragMass = expMass + 187.079 - 57.021 + 1;
                                            }
                                        }
                                        else
                                        {
                                            index--;
                                            if (index >= 0)
                                            {
                                                possibleTranslatedSequence = protSeq.Substring(index, fragLength);
                                                fragMass       = MassCalculator.MonoIsoptopicMass(possibleTranslatedSequence, out string e2);
                                                error_message += e2;
                                            }
                                            else
                                            {
                                                fragMass = expMass + 187.079 - 57.021 + 1;
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                List <Variant> variants = psm.variants;
                for (int v = variants.Count() - 1; v >= 0; v--)
                {
                    for (int v2 = 0; v2 < v; v2++)
                    {
                        if (variants[v].pepSeq.Equals(variants[v2].pepSeq) && variants[v].id.Equals(variants[v2].id))
                        {
                            variants.Remove(variants[v]);
                            v2 = variants.Count();
                        }
                    }
                }
            }
        }
Beispiel #7
0
        //method was originally written recursively, but large peptides result in stackoverflow exceptions
        public void MassMatch(string B, string Y, PSM psm, int BIndex, int YIndex, out string error_message) //this is the workhorse of SpliceFragments
        {
            error_message = "";
            test          = psm.getScan().ToString();
            double ExperimentalMass = psm.getExpMass();
            string BFrag            = IonCrop(B, ExperimentalMass, BIndex, IonType.b, false, out string e4);                                                                                            //returns a B ion sequence that has a mass smaller than the experimental mass by cleaving C term AA
            //BIndex = B.Length - BFrag.Length; //added 11/8/16 Useful first pass to record how many AA have been cleaved from C term
            string YFrag = IonCrop(Y, ExperimentalMass, YIndex, IonType.y, false, out string e3);                                                                                                       //returns a Y ion sequence that has a mass smaller than the experimental mass by cleaving N term AA
            //YIndex = Y.Length - YFrag.Length; //added 11/8/16 Useful first pass to record how many AA have been cleaved from N term
            double TheoreticalMass = MassCalculator.MonoIsoptopicMass(BFrag, out string e) + MassCalculator.MonoIsoptopicMass(YFrag, out string e2) - Constants.WATER_MONOISOTOPIC_MASS + fixedModMass; //water added once in b and once in y

            error_message += e3 + e4 + e + e2;

            //add PTM masses
            foreach (PTM ptm in psm.getNInfo().getPTMs())
            {
                if (ptm.index < BFrag.Length)
                {
                    TheoreticalMass += ptm.mass;
                }
            }
            foreach (PTM ptm in psm.getCInfo().getPTMs())
            {
                if (Y.Length - ptm.index < YFrag.Length)
                {
                    TheoreticalMass += ptm.mass;
                }
            }

            if (YFrag.Length < ionsUsedMassVer) //If the number of AA from the C-term peptide is less than desired amount, end recursion.
            {
                //we're done
            }
            else if (BFrag.Length < ionsUsedMassVer) //If the number of AA from the N-term peptide is less than desired amount, start over loop and remove a single aa from the C-term
            {
                //   MassMatch(B, Y, psm, 0, YIndex+1);
            }
            //if match
            //bool elif = true; //"else if" where not a match==true
            else if (FalsePositives.generateDecoys)
            {
                //else if (((TheoreticalMass - Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS * 7) > (ExperimentalMass - 1 * Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS) && (TheoreticalMass - Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS * 7) < (ExperimentalMass + 1 * Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS)) | ((TheoreticalMass + Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS * 7) > (ExperimentalMass - 1 * Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS) && (TheoreticalMass + Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS * 7) < (ExperimentalMass + 1 * Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS)))//if match                                                                                                                                                                                                                                                                                                                        //if ((TheoreticalMass) > (ExperimentalMass+ i -PrecursorMassToleranceDa) && (TheoreticalMass) < (ExperimentalMass+i +PrecursorMassToleranceDa)) //if match
                if (((TheoreticalMass) > (ExperimentalMass - 9.5) && (TheoreticalMass) < (ExperimentalMass - 4.5)) | ((TheoreticalMass) > (ExperimentalMass + 5.5) && (TheoreticalMass) < (ExperimentalMass + 7.5)))//if match                          //if ((TheoreticalMass) > (ExperimentalMass+ i -PrecursorMassToleranceDa) && (TheoreticalMass) < (ExperimentalMass+i +PrecursorMassToleranceDa)) //if match                                                                                                                                                                                                                 //else if(Math.Abs(ExperimentalMass - TheoreticalMass)<40 && (ExperimentalMass - TheoreticalMass)-Math.Floor(ExperimentalMass-TheoreticalMass)>0.3 && (ExperimentalMass - TheoreticalMass) - Math.Floor(ExperimentalMass - TheoreticalMass) < 0.8)
                {
                    // elif = false;
                    bool previouslyFound = false;
                    foreach (FusionCandidate oldCandidate in psm.getFusionCandidates())
                    {
                        if ((BFrag + YFrag).Equals(oldCandidate.seq)) //see if that sequence was already recorded
                        {
                            previouslyFound = true;
                        }
                    }
                    if (!previouslyFound) //if fusion sequence was not previously assigned to this psm
                    {
                        FusionCandidate candidate = new FusionCandidate(BFrag + YFrag);
                        psm.addFusionCandidate(candidate);
                        //         MassMatch(B, Y, psm, BIndex + 1, YIndex);
                    }
                }
            }
            else
            {
                if ((TheoreticalMass) > (ExperimentalMass * (1 - FalsePositives.precursorMassTolerancePpm / 1000000)) && (TheoreticalMass) < (ExperimentalMass * (1 + FalsePositives.precursorMassTolerancePpm / 1000000))) //if match                          //if ((TheoreticalMass) > (ExperimentalMass+ i -PrecursorMassToleranceDa) && (TheoreticalMass) < (ExperimentalMass+i +PrecursorMassToleranceDa)) //if match
                {
                    // elif = false;
                    bool previouslyFound = false;
                    foreach (FusionCandidate oldCandidate in psm.getFusionCandidates())
                    {
                        if ((BFrag + YFrag).Equals(oldCandidate.seq)) //see if that sequence was already recorded
                        {
                            previouslyFound = true;
                        }
                    }
                    if (!previouslyFound) //if fusion sequence was not previously assigned to this psm
                    {
                        FusionCandidate candidate = new FusionCandidate(BFrag + YFrag);
                        psm.addFusionCandidate(candidate);
                        //         MassMatch(B, Y, psm, BIndex + 1, YIndex);
                    }
                }
            }
            // if(elif) //not a match
            {
                /*        if (TheoreticalMass < ExperimentalMass && BIndex == 0) //first pass, theo less than exp and can't take away more ions
                 *      {
                 *          //we're done
                 *      }
                 *      else
                 *      {
                 *          if (TheoreticalMass < ExperimentalMass) //if b out of ions, but y not, crop off a y and start again
                 *          {
                 *              BIndex = 0;
                 *              YIndex++;
                 *              MassMatch(B,Y, psm, BIndex, YIndex);
                 *          }
                 *          else
                 *          { //crop off a b ion
                 *              MassMatch(B, Y, psm, BIndex + 1, YIndex);
                 *          }
                 *      }*/
            }
        }
        public bool GeneratePossibleSequences(PSM psm, out string error_message) //returns false if over the specified number of sequences are generated
        {
            error_message = "";
            List <string> foundSeq = new List <string>(); //get list of all FP sequences

            foreach (FusionCandidate fusionCandidate in psm.getFusionCandidates())
            {
                findIons(fusionCandidate, psm, out string error_message1); //populate the foundIons array
                error_message += error_message1;
                foundSeq.Add(fusionCandidate.seq);
            }
            bool done        = false;
            int  globalIndex = 0;

            while (!done)
            {
                done = true;                                                     //let's assume we're done and correct it later if we're not
                if (psm.getFusionCandidates().Count() > maxNumPossibleSequences) //if there are more than a set number of possible sequences, this is junk and we are not searching them all
                {
                    return(false);
                }

                for (int fc = 0; fc < psm.getFusionCandidates().Count(); fc++)
                {
                    FusionCandidate fusionCandidate = psm.getFusionCandidates()[fc];
                    if (fusionCandidate.getFoundIons().Count() > globalIndex)                 //prevent crashing, use to tell when done by hitting end of fc
                    {
                        List <FusionCandidate> tempCandidates = new List <FusionCandidate>(); //fill with possible sequences

                        done = false;                                                         //We're not done, because at least one fusion candidate sequence length is still greater than the global index
                        string fusionSeq = fusionCandidate.seq;
                        bool[] IonFound  = fusionCandidate.getFoundIons();
                        if (IonFound[globalIndex])                //only look for ambiguity if a peak was found to provide the stop point.
                        {
                            int mostRecent = -1;                  //most recent Ion found prior to this one (start point)
                            for (int i = 0; i < globalIndex; i++) //identify start point
                            {
                                if (IonFound[i])
                                {
                                    mostRecent = i; //save most recent hit, exclusive of the current index
                                }
                            }

                            string ambiguousFrag = fusionSeq.Substring(mostRecent + 1, globalIndex - mostRecent);
                            double key           = MassCalculator.MonoIsoptopicMass(ambiguousFrag, out string error_message2);
                            error_message += error_message2;

                            List <string> combinations = new List <string>();

                            double closestPeak = double.NaN;
                            var    ipos        = Array.BinarySearch(keys, key);
                            if (ipos < 0)
                            {
                                ipos = ~ipos;
                            }

                            if (ipos > 0)
                            {
                                var downIpos = ipos - 1;
                                // Try down
                                while (downIpos >= 0)
                                {
                                    closestPeak = keys[downIpos];
                                    if (closestPeak > key - productMassToleranceDa && closestPeak < key + productMassToleranceDa)
                                    {
                                        string[] value;
                                        if (massDict.TryGetValue(closestPeak, out value))
                                        {
                                            foreach (string frag in value)
                                            {
                                                combinations.Add(frag);
                                            }
                                        }
                                    }
                                    else
                                    {
                                        break;
                                    }
                                    downIpos--;
                                }
                            }
                            if (ipos < keys.Length)
                            {
                                var upIpos = ipos;
                                // Try here and up
                                while (upIpos < keys.Length)
                                {
                                    closestPeak = keys[upIpos];
                                    if (closestPeak > key - productMassToleranceDa && closestPeak < key + productMassToleranceDa)
                                    {
                                        string[] value;
                                        if (massDict.TryGetValue(closestPeak, out value))
                                        {
                                            foreach (string frag in value)
                                            {
                                                combinations.Add(frag);
                                            }
                                        }
                                    }
                                    else
                                    {
                                        break;
                                    }
                                    upIpos++;
                                }
                            }

                            foreach (string str in combinations)
                            {
                                string          nTermSeq      = fusionSeq.Substring(0, mostRecent + 1);
                                string          cTermSeq      = fusionSeq.Substring(globalIndex + 1, fusionSeq.Length - globalIndex - 1);
                                string          novelSeq      = nTermSeq + str + cTermSeq;
                                FusionCandidate tempCandidate = new FusionCandidate(novelSeq);
                                tempCandidates.Add(tempCandidate);
                            }
                        }
                        foreach (FusionCandidate newfc in tempCandidates)
                        {
                            if (!foundSeq.Contains(newfc.seq)) //if new FP sequence, add it.
                            {
                                foundSeq.Add(newfc.seq);
                                findIons(newfc, psm, out string error_message3);
                                error_message += error_message3;
                                psm.getFusionCandidates().Add(newfc);
                            }
                        }
                    }
                }
                globalIndex++;
                if (psm.getFusionCandidates().Count() > maxNumPossibleSequences)
                {
                    return(false);
                }
            }
            return(true);
        }
        //use ion hits to know where peaks have been found by morpheus and where there is ambiguity
        public static void findIons(FusionCandidate fusionCandidate, PSM psm, out string error_message)
        {
            error_message = "";
            double[] nPeaks = psm.getNInfo().getPeakHits(); //get peaks
            double[] cPeaks = psm.getCInfo().getPeakHits();
            fusionCandidate.makeFoundIons();
            string candSeq = fusionCandidate.seq;

            bool[] foundIons = fusionCandidate.getFoundIons();

            //find which aa have peaks
            for (int i = 0; i < foundIons.Count() - 1; i++)
            {
                //B IONS//
                if (ionsUsed.Contains(IonType.b))
                {
                    double bTheoMass = MassCalculator.MonoIsoptopicMass(candSeq.Substring(0, 1 + i), out string error_message2) - Constants.WATER_MONOISOTOPIC_MASS;
                    error_message += error_message2;
                    foreach (PTM ptm in psm.getNInfo().getPTMs())
                    {
                        if (ptm.index <= i)
                        {
                            bTheoMass += ptm.mass;
                        }
                    }
                    foreach (double expPeak in nPeaks)
                    {
                        if (expPeak > bTheoMass - productMassToleranceDa && expPeak < bTheoMass + productMassToleranceDa)
                        {
                            foundIons[i] = true;
                        }
                    }
                }
                //Y IONS//
                if (ionsUsed.Contains(IonType.y))
                {
                    double yTheoMass = MassCalculator.MonoIsoptopicMass(candSeq.Substring(candSeq.Length - 1 - i, i + 1), out string error_message3);
                    error_message += error_message3;
                    foreach (PTM ptm in psm.getCInfo().getPTMs())
                    {
                        if (ptm.index >= candSeq.Length - 2 - i)
                        {
                            yTheoMass += ptm.mass;
                        }
                    }
                    foreach (double expPeak in cPeaks)
                    {
                        if (expPeak > yTheoMass - productMassToleranceDa && expPeak < yTheoMass + productMassToleranceDa)
                        {
                            foundIons[foundIons.Count() - 2 - i] = true;
                        }
                    }
                }
                //C IONS//
                if (ionsUsed.Contains(IonType.c))
                {
                    double cTheoMass = MassCalculator.MonoIsoptopicMass(candSeq.Substring(0, 1 + i), out string error_message4) - Constants.WATER_MONOISOTOPIC_MASS + Constants.nitrogenMonoisotopicMass + 3 * Constants.hydrogenMonoisotopicMass;
                    error_message += error_message4;
                    foreach (PTM ptm in psm.getNInfo().getPTMs())
                    {
                        if (ptm.index <= i)
                        {
                            cTheoMass += ptm.mass;
                        }
                    }
                    foreach (double expPeak in nPeaks)
                    {
                        if (expPeak > cTheoMass - productMassToleranceDa && expPeak < cTheoMass + productMassToleranceDa)
                        {
                            foundIons[i] = true;
                        }
                    }
                }
                //ZDOT IONS//
                if (ionsUsed.Contains(IonType.zdot))
                {
                    double zdotTheoMass = MassCalculator.MonoIsoptopicMass(candSeq.Substring(candSeq.Length - 1 - i, i + 1), out string error_message5) - Constants.nitrogenMonoisotopicMass - 2 * Constants.hydrogenMonoisotopicMass;
                    error_message += error_message5;
                    foreach (PTM ptm in psm.getCInfo().getPTMs())
                    {
                        if (ptm.index >= candSeq.Length - 2 - i)
                        {
                            zdotTheoMass += ptm.mass;
                        }
                    }
                    foreach (double expPeak in cPeaks)
                    {
                        if (expPeak > zdotTheoMass - productMassToleranceDa && expPeak < zdotTheoMass + productMassToleranceDa)
                        {
                            foundIons[foundIons.Count() - 2 - i] = true;
                        }
                    }
                }
            }
            //foundIons[0] = true; //AspN always starts with a D
            foundIons[foundIons.Count() - 1] = true;//A|B|C|D|E|F|K| where the whole peptide peak is always placed arbitrarly at the c term
        }