Пример #1
0
        //method was originally written recursively, but large peptides result in stackoverflow exceptions
        public void MassMatch(string B, string Y, PSM psm, int BIndex, int YIndex, out string error_message) //this is the workhorse of SpliceFragments
        {
            error_message = "";
            test          = psm.getScan().ToString();
            double ExperimentalMass = psm.getExpMass();
            string BFrag            = IonCrop(B, ExperimentalMass, BIndex, IonType.b, false, out string e4);                                                                                            //returns a B ion sequence that has a mass smaller than the experimental mass by cleaving C term AA
            //BIndex = B.Length - BFrag.Length; //added 11/8/16 Useful first pass to record how many AA have been cleaved from C term
            string YFrag = IonCrop(Y, ExperimentalMass, YIndex, IonType.y, false, out string e3);                                                                                                       //returns a Y ion sequence that has a mass smaller than the experimental mass by cleaving N term AA
            //YIndex = Y.Length - YFrag.Length; //added 11/8/16 Useful first pass to record how many AA have been cleaved from N term
            double TheoreticalMass = MassCalculator.MonoIsoptopicMass(BFrag, out string e) + MassCalculator.MonoIsoptopicMass(YFrag, out string e2) - Constants.WATER_MONOISOTOPIC_MASS + fixedModMass; //water added once in b and once in y

            error_message += e3 + e4 + e + e2;

            //add PTM masses
            foreach (PTM ptm in psm.getNInfo().getPTMs())
            {
                if (ptm.index < BFrag.Length)
                {
                    TheoreticalMass += ptm.mass;
                }
            }
            foreach (PTM ptm in psm.getCInfo().getPTMs())
            {
                if (Y.Length - ptm.index < YFrag.Length)
                {
                    TheoreticalMass += ptm.mass;
                }
            }

            if (YFrag.Length < ionsUsedMassVer) //If the number of AA from the C-term peptide is less than desired amount, end recursion.
            {
                //we're done
            }
            else if (BFrag.Length < ionsUsedMassVer) //If the number of AA from the N-term peptide is less than desired amount, start over loop and remove a single aa from the C-term
            {
                //   MassMatch(B, Y, psm, 0, YIndex+1);
            }
            //if match
            //bool elif = true; //"else if" where not a match==true
            else if (FalsePositives.generateDecoys)
            {
                //else if (((TheoreticalMass - Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS * 7) > (ExperimentalMass - 1 * Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS) && (TheoreticalMass - Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS * 7) < (ExperimentalMass + 1 * Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS)) | ((TheoreticalMass + Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS * 7) > (ExperimentalMass - 1 * Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS) && (TheoreticalMass + Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS * 7) < (ExperimentalMass + 1 * Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS)))//if match                                                                                                                                                                                                                                                                                                                        //if ((TheoreticalMass) > (ExperimentalMass+ i -PrecursorMassToleranceDa) && (TheoreticalMass) < (ExperimentalMass+i +PrecursorMassToleranceDa)) //if match
                if (((TheoreticalMass) > (ExperimentalMass - 9.5) && (TheoreticalMass) < (ExperimentalMass - 4.5)) | ((TheoreticalMass) > (ExperimentalMass + 5.5) && (TheoreticalMass) < (ExperimentalMass + 7.5)))//if match                          //if ((TheoreticalMass) > (ExperimentalMass+ i -PrecursorMassToleranceDa) && (TheoreticalMass) < (ExperimentalMass+i +PrecursorMassToleranceDa)) //if match                                                                                                                                                                                                                 //else if(Math.Abs(ExperimentalMass - TheoreticalMass)<40 && (ExperimentalMass - TheoreticalMass)-Math.Floor(ExperimentalMass-TheoreticalMass)>0.3 && (ExperimentalMass - TheoreticalMass) - Math.Floor(ExperimentalMass - TheoreticalMass) < 0.8)
                {
                    // elif = false;
                    bool previouslyFound = false;
                    foreach (FusionCandidate oldCandidate in psm.getFusionCandidates())
                    {
                        if ((BFrag + YFrag).Equals(oldCandidate.seq)) //see if that sequence was already recorded
                        {
                            previouslyFound = true;
                        }
                    }
                    if (!previouslyFound) //if fusion sequence was not previously assigned to this psm
                    {
                        FusionCandidate candidate = new FusionCandidate(BFrag + YFrag);
                        psm.addFusionCandidate(candidate);
                        //         MassMatch(B, Y, psm, BIndex + 1, YIndex);
                    }
                }
            }
            else
            {
                if ((TheoreticalMass) > (ExperimentalMass * (1 - FalsePositives.precursorMassTolerancePpm / 1000000)) && (TheoreticalMass) < (ExperimentalMass * (1 + FalsePositives.precursorMassTolerancePpm / 1000000))) //if match                          //if ((TheoreticalMass) > (ExperimentalMass+ i -PrecursorMassToleranceDa) && (TheoreticalMass) < (ExperimentalMass+i +PrecursorMassToleranceDa)) //if match
                {
                    // elif = false;
                    bool previouslyFound = false;
                    foreach (FusionCandidate oldCandidate in psm.getFusionCandidates())
                    {
                        if ((BFrag + YFrag).Equals(oldCandidate.seq)) //see if that sequence was already recorded
                        {
                            previouslyFound = true;
                        }
                    }
                    if (!previouslyFound) //if fusion sequence was not previously assigned to this psm
                    {
                        FusionCandidate candidate = new FusionCandidate(BFrag + YFrag);
                        psm.addFusionCandidate(candidate);
                        //         MassMatch(B, Y, psm, BIndex + 1, YIndex);
                    }
                }
            }
            // if(elif) //not a match
            {
                /*        if (TheoreticalMass < ExperimentalMass && BIndex == 0) //first pass, theo less than exp and can't take away more ions
                 *      {
                 *          //we're done
                 *      }
                 *      else
                 *      {
                 *          if (TheoreticalMass < ExperimentalMass) //if b out of ions, but y not, crop off a y and start again
                 *          {
                 *              BIndex = 0;
                 *              YIndex++;
                 *              MassMatch(B,Y, psm, BIndex, YIndex);
                 *          }
                 *          else
                 *          { //crop off a b ion
                 *              MassMatch(B, Y, psm, BIndex + 1, YIndex);
                 *          }
                 *      }*/
            }
        }
        //returns true if a full fusion sequence could not be made or was found in the database, making it translated instead of a novel fusion.
        public bool PossibleCandidate(PSM psm)
        {
            foundSequences    = new Dictionary <string, List <TheoreticalProtein> >(); //used for finding longer fragments than those previously identified. Also populates ParentInfo
            notFoundSequences = new HashSet <string>();                                //don't bother looking for these fragments, since we know they don't exist. Good for multiple homologous putative fusion peptide sequences

            //conduct an initial search of each candidate's full sequence to identify any that are translated
            for (int i = 0; i < psm.getFusionCandidates().Count(); i++) //foreach fusion peptide sequence that could map to this scan
            {
                string novelSeq = psm.getFusionCandidates()[i].seq;
                if (foundParent(novelSeq, ParentInfo.terminal.C, psm.getFusionCandidates()[i], false)) //check really quick to see if the whole thing exists as is. If so, assign it as translated. Terminal C was arbitrarily chosen
                {
                    foreach (ParentInfo info in psm.getFusionCandidates()[i].parentInfo)
                    {
                        foreach (TheoreticalProtein protein in info.theoreticalProteins)
                        {
                            if (protein.seq.Contains(novelSeq)) //if translated
                            {
                                psm.getFusionCandidates()[i].translatedParents.Add(new TranslatedParent(protein.id, protein.seq, protein.seq.IndexOf(psm.getFusionCandidates()[i].seq), psm.getFusionCandidates()[i].seq.Length));
                            }
                        }
                    }
                    psm.getFusionCandidates()[i].fusionType = FusionCandidate.FusionType.TL;
                    psm.fusionType = psm.getFusionCandidates()[i].fusionType;
                    for (int j = 0; j < psm.getFusionCandidates().Count(); j++)
                    {
                        if (j != i)
                        {
                            psm.getFusionCandidates().Remove(psm.getFusionCandidates()[j]);
                            j--;
                            i--;
                        }
                    }
                    return(true);
                }
            }
            for (int i = 0; i < psm.getFusionCandidates().Count(); i++) //foreach fusion peptide sequence that could map to this scan
            {
                //sw.StartFindParents
                if (!isViable(psm.getFusionCandidates()[i])) //remove this fusion peptide sequence if the parent fragments cannot be found with the given database
                {
                    psm.getFusionCandidates().Remove(psm.getFusionCandidates()[i]);
                    i--;
                }
                else
                {
                    DetermineFusionCandidateType(psm.getFusionCandidates()[i]);   //cis, trans?
                    if (psm.fusionType > psm.getFusionCandidates()[i].fusionType) //if more likely than previous types, change the psm type (golf scoring)
                    {
                        psm.fusionType = psm.getFusionCandidates()[i].fusionType;
                    }
                    if (psm.fusionType.Equals(FusionCandidate.FusionType.TL))//if there's a possible sequence that's present in the database, it is likely correct and is it is not worth it to identify parents of other sequences.
                    {
                        //remove all other candidates
                        for (int j = 0; j < psm.getFusionCandidates().Count(); j++)
                        {
                            if (j != i)
                            {
                                psm.getFusionCandidates().Remove(psm.getFusionCandidates()[j]);
                                j--;
                                i--;
                            }
                        }
                        return(true);
                    }
                }
            }

            if (psm.getFusionCandidates().Count() == 0) //if no candidates are left, we couldn't make the sequence with the database and we'll discard it.
            {
                return(false);
            }

            return(true);
        }
        public bool IsTooMessy(PSM psm, out string error_message) //return true if too messy for confident identification
        {
            error_message = "";
            List <string> baseSequences    = new List <string>();
            int           currentBestScore = 0;

            for (int index = 0; index < psm.getFusionCandidates().Count(); index++)
            {
                bool            badID = false;
                FusionCandidate fc    = psm.getFusionCandidates()[index];
                findIons(fc, psm, out string error_message1);
                error_message += error_message1;
                int consecutiveMissedCounter = 0;
                int totalHitCounter          = 0;
                foreach (bool b in fc.getFoundIons())
                {
                    if (consecutiveMissedCounter > maxMissingConsecutivePeaks) //if too many permutations possible because of an unmapped region
                    {
                        badID = true;
                    }
                    else if (!b)
                    {
                        consecutiveMissedCounter++;
                    }
                    else
                    {
                        totalHitCounter++;
                        consecutiveMissedCounter = 0;
                    } //only care about consecutive
                }
                bool isRepeat = false;
                if (baseSequences.Contains(psm.getFusionCandidates()[index].seq))
                {
                    isRepeat = true;
                }

                if (totalHitCounter > currentBestScore && !badID)//the others were worse, so delete them
                {
                    for (int i = 0; i < index; i = 0)
                    {
                        psm.getFusionCandidates().Remove(psm.getFusionCandidates()[0]);
                        index--;
                    }
                    currentBestScore = totalHitCounter;
                    baseSequences    = new List <string> {
                        psm.getFusionCandidates()[index].seq
                    };
                }
                else if (totalHitCounter < currentBestScore | badID | isRepeat)
                {
                    psm.getFusionCandidates().Remove(psm.getFusionCandidates()[index]);
                    index--;
                }
            }
            //If there's anything left
            if (psm.getFusionCandidates().Count() > 0) //It wasn't too messy! Yay!
            {
                return(false);
            }
            else //this might be a fusion peptide, but we won't get any valuable information from this spectra, so discard it
            {
                return(true);
            }
        }
        public bool GeneratePossibleSequences(PSM psm, out string error_message) //returns false if over the specified number of sequences are generated
        {
            error_message = "";
            List <string> foundSeq = new List <string>(); //get list of all FP sequences

            foreach (FusionCandidate fusionCandidate in psm.getFusionCandidates())
            {
                findIons(fusionCandidate, psm, out string error_message1); //populate the foundIons array
                error_message += error_message1;
                foundSeq.Add(fusionCandidate.seq);
            }
            bool done        = false;
            int  globalIndex = 0;

            while (!done)
            {
                done = true;                                                     //let's assume we're done and correct it later if we're not
                if (psm.getFusionCandidates().Count() > maxNumPossibleSequences) //if there are more than a set number of possible sequences, this is junk and we are not searching them all
                {
                    return(false);
                }

                for (int fc = 0; fc < psm.getFusionCandidates().Count(); fc++)
                {
                    FusionCandidate fusionCandidate = psm.getFusionCandidates()[fc];
                    if (fusionCandidate.getFoundIons().Count() > globalIndex)                 //prevent crashing, use to tell when done by hitting end of fc
                    {
                        List <FusionCandidate> tempCandidates = new List <FusionCandidate>(); //fill with possible sequences

                        done = false;                                                         //We're not done, because at least one fusion candidate sequence length is still greater than the global index
                        string fusionSeq = fusionCandidate.seq;
                        bool[] IonFound  = fusionCandidate.getFoundIons();
                        if (IonFound[globalIndex])                //only look for ambiguity if a peak was found to provide the stop point.
                        {
                            int mostRecent = -1;                  //most recent Ion found prior to this one (start point)
                            for (int i = 0; i < globalIndex; i++) //identify start point
                            {
                                if (IonFound[i])
                                {
                                    mostRecent = i; //save most recent hit, exclusive of the current index
                                }
                            }

                            string ambiguousFrag = fusionSeq.Substring(mostRecent + 1, globalIndex - mostRecent);
                            double key           = MassCalculator.MonoIsoptopicMass(ambiguousFrag, out string error_message2);
                            error_message += error_message2;

                            List <string> combinations = new List <string>();

                            double closestPeak = double.NaN;
                            var    ipos        = Array.BinarySearch(keys, key);
                            if (ipos < 0)
                            {
                                ipos = ~ipos;
                            }

                            if (ipos > 0)
                            {
                                var downIpos = ipos - 1;
                                // Try down
                                while (downIpos >= 0)
                                {
                                    closestPeak = keys[downIpos];
                                    if (closestPeak > key - productMassToleranceDa && closestPeak < key + productMassToleranceDa)
                                    {
                                        string[] value;
                                        if (massDict.TryGetValue(closestPeak, out value))
                                        {
                                            foreach (string frag in value)
                                            {
                                                combinations.Add(frag);
                                            }
                                        }
                                    }
                                    else
                                    {
                                        break;
                                    }
                                    downIpos--;
                                }
                            }
                            if (ipos < keys.Length)
                            {
                                var upIpos = ipos;
                                // Try here and up
                                while (upIpos < keys.Length)
                                {
                                    closestPeak = keys[upIpos];
                                    if (closestPeak > key - productMassToleranceDa && closestPeak < key + productMassToleranceDa)
                                    {
                                        string[] value;
                                        if (massDict.TryGetValue(closestPeak, out value))
                                        {
                                            foreach (string frag in value)
                                            {
                                                combinations.Add(frag);
                                            }
                                        }
                                    }
                                    else
                                    {
                                        break;
                                    }
                                    upIpos++;
                                }
                            }

                            foreach (string str in combinations)
                            {
                                string          nTermSeq      = fusionSeq.Substring(0, mostRecent + 1);
                                string          cTermSeq      = fusionSeq.Substring(globalIndex + 1, fusionSeq.Length - globalIndex - 1);
                                string          novelSeq      = nTermSeq + str + cTermSeq;
                                FusionCandidate tempCandidate = new FusionCandidate(novelSeq);
                                tempCandidates.Add(tempCandidate);
                            }
                        }
                        foreach (FusionCandidate newfc in tempCandidates)
                        {
                            if (!foundSeq.Contains(newfc.seq)) //if new FP sequence, add it.
                            {
                                foundSeq.Add(newfc.seq);
                                findIons(newfc, psm, out string error_message3);
                                error_message += error_message3;
                                psm.getFusionCandidates().Add(newfc);
                            }
                        }
                    }
                }
                globalIndex++;
                if (psm.getFusionCandidates().Count() > maxNumPossibleSequences)
                {
                    return(false);
                }
            }
            return(true);
        }