//method was originally written recursively, but large peptides result in stackoverflow exceptions public void MassMatch(string B, string Y, PSM psm, int BIndex, int YIndex, out string error_message) //this is the workhorse of SpliceFragments { error_message = ""; test = psm.getScan().ToString(); double ExperimentalMass = psm.getExpMass(); string BFrag = IonCrop(B, ExperimentalMass, BIndex, IonType.b, false, out string e4); //returns a B ion sequence that has a mass smaller than the experimental mass by cleaving C term AA //BIndex = B.Length - BFrag.Length; //added 11/8/16 Useful first pass to record how many AA have been cleaved from C term string YFrag = IonCrop(Y, ExperimentalMass, YIndex, IonType.y, false, out string e3); //returns a Y ion sequence that has a mass smaller than the experimental mass by cleaving N term AA //YIndex = Y.Length - YFrag.Length; //added 11/8/16 Useful first pass to record how many AA have been cleaved from N term double TheoreticalMass = MassCalculator.MonoIsoptopicMass(BFrag, out string e) + MassCalculator.MonoIsoptopicMass(YFrag, out string e2) - Constants.WATER_MONOISOTOPIC_MASS + fixedModMass; //water added once in b and once in y error_message += e3 + e4 + e + e2; //add PTM masses foreach (PTM ptm in psm.getNInfo().getPTMs()) { if (ptm.index < BFrag.Length) { TheoreticalMass += ptm.mass; } } foreach (PTM ptm in psm.getCInfo().getPTMs()) { if (Y.Length - ptm.index < YFrag.Length) { TheoreticalMass += ptm.mass; } } if (YFrag.Length < ionsUsedMassVer) //If the number of AA from the C-term peptide is less than desired amount, end recursion. { //we're done } else if (BFrag.Length < ionsUsedMassVer) //If the number of AA from the N-term peptide is less than desired amount, start over loop and remove a single aa from the C-term { // MassMatch(B, Y, psm, 0, YIndex+1); } //if match //bool elif = true; //"else if" where not a match==true else if (FalsePositives.generateDecoys) { //else if (((TheoreticalMass - Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS * 7) > (ExperimentalMass - 1 * Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS) && (TheoreticalMass - Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS * 7) < (ExperimentalMass + 1 * Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS)) | ((TheoreticalMass + Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS * 7) > (ExperimentalMass - 1 * Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS) && (TheoreticalMass + Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS * 7) < (ExperimentalMass + 1 * Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS)))//if match //if ((TheoreticalMass) > (ExperimentalMass+ i -PrecursorMassToleranceDa) && (TheoreticalMass) < (ExperimentalMass+i +PrecursorMassToleranceDa)) //if match if (((TheoreticalMass) > (ExperimentalMass - 9.5) && (TheoreticalMass) < (ExperimentalMass - 4.5)) | ((TheoreticalMass) > (ExperimentalMass + 5.5) && (TheoreticalMass) < (ExperimentalMass + 7.5)))//if match //if ((TheoreticalMass) > (ExperimentalMass+ i -PrecursorMassToleranceDa) && (TheoreticalMass) < (ExperimentalMass+i +PrecursorMassToleranceDa)) //if match //else if(Math.Abs(ExperimentalMass - TheoreticalMass)<40 && (ExperimentalMass - TheoreticalMass)-Math.Floor(ExperimentalMass-TheoreticalMass)>0.3 && (ExperimentalMass - TheoreticalMass) - Math.Floor(ExperimentalMass - TheoreticalMass) < 0.8) { // elif = false; bool previouslyFound = false; foreach (FusionCandidate oldCandidate in psm.getFusionCandidates()) { if ((BFrag + YFrag).Equals(oldCandidate.seq)) //see if that sequence was already recorded { previouslyFound = true; } } if (!previouslyFound) //if fusion sequence was not previously assigned to this psm { FusionCandidate candidate = new FusionCandidate(BFrag + YFrag); psm.addFusionCandidate(candidate); // MassMatch(B, Y, psm, BIndex + 1, YIndex); } } } else { if ((TheoreticalMass) > (ExperimentalMass * (1 - FalsePositives.precursorMassTolerancePpm / 1000000)) && (TheoreticalMass) < (ExperimentalMass * (1 + FalsePositives.precursorMassTolerancePpm / 1000000))) //if match //if ((TheoreticalMass) > (ExperimentalMass+ i -PrecursorMassToleranceDa) && (TheoreticalMass) < (ExperimentalMass+i +PrecursorMassToleranceDa)) //if match { // elif = false; bool previouslyFound = false; foreach (FusionCandidate oldCandidate in psm.getFusionCandidates()) { if ((BFrag + YFrag).Equals(oldCandidate.seq)) //see if that sequence was already recorded { previouslyFound = true; } } if (!previouslyFound) //if fusion sequence was not previously assigned to this psm { FusionCandidate candidate = new FusionCandidate(BFrag + YFrag); psm.addFusionCandidate(candidate); // MassMatch(B, Y, psm, BIndex + 1, YIndex); } } } // if(elif) //not a match { /* if (TheoreticalMass < ExperimentalMass && BIndex == 0) //first pass, theo less than exp and can't take away more ions * { * //we're done * } * else * { * if (TheoreticalMass < ExperimentalMass) //if b out of ions, but y not, crop off a y and start again * { * BIndex = 0; * YIndex++; * MassMatch(B,Y, psm, BIndex, YIndex); * } * else * { //crop off a b ion * MassMatch(B, Y, psm, BIndex + 1, YIndex); * } * }*/ } }
//returns true if a full fusion sequence could not be made or was found in the database, making it translated instead of a novel fusion. public bool PossibleCandidate(PSM psm) { foundSequences = new Dictionary <string, List <TheoreticalProtein> >(); //used for finding longer fragments than those previously identified. Also populates ParentInfo notFoundSequences = new HashSet <string>(); //don't bother looking for these fragments, since we know they don't exist. Good for multiple homologous putative fusion peptide sequences //conduct an initial search of each candidate's full sequence to identify any that are translated for (int i = 0; i < psm.getFusionCandidates().Count(); i++) //foreach fusion peptide sequence that could map to this scan { string novelSeq = psm.getFusionCandidates()[i].seq; if (foundParent(novelSeq, ParentInfo.terminal.C, psm.getFusionCandidates()[i], false)) //check really quick to see if the whole thing exists as is. If so, assign it as translated. Terminal C was arbitrarily chosen { foreach (ParentInfo info in psm.getFusionCandidates()[i].parentInfo) { foreach (TheoreticalProtein protein in info.theoreticalProteins) { if (protein.seq.Contains(novelSeq)) //if translated { psm.getFusionCandidates()[i].translatedParents.Add(new TranslatedParent(protein.id, protein.seq, protein.seq.IndexOf(psm.getFusionCandidates()[i].seq), psm.getFusionCandidates()[i].seq.Length)); } } } psm.getFusionCandidates()[i].fusionType = FusionCandidate.FusionType.TL; psm.fusionType = psm.getFusionCandidates()[i].fusionType; for (int j = 0; j < psm.getFusionCandidates().Count(); j++) { if (j != i) { psm.getFusionCandidates().Remove(psm.getFusionCandidates()[j]); j--; i--; } } return(true); } } for (int i = 0; i < psm.getFusionCandidates().Count(); i++) //foreach fusion peptide sequence that could map to this scan { //sw.StartFindParents if (!isViable(psm.getFusionCandidates()[i])) //remove this fusion peptide sequence if the parent fragments cannot be found with the given database { psm.getFusionCandidates().Remove(psm.getFusionCandidates()[i]); i--; } else { DetermineFusionCandidateType(psm.getFusionCandidates()[i]); //cis, trans? if (psm.fusionType > psm.getFusionCandidates()[i].fusionType) //if more likely than previous types, change the psm type (golf scoring) { psm.fusionType = psm.getFusionCandidates()[i].fusionType; } if (psm.fusionType.Equals(FusionCandidate.FusionType.TL))//if there's a possible sequence that's present in the database, it is likely correct and is it is not worth it to identify parents of other sequences. { //remove all other candidates for (int j = 0; j < psm.getFusionCandidates().Count(); j++) { if (j != i) { psm.getFusionCandidates().Remove(psm.getFusionCandidates()[j]); j--; i--; } } return(true); } } } if (psm.getFusionCandidates().Count() == 0) //if no candidates are left, we couldn't make the sequence with the database and we'll discard it. { return(false); } return(true); }
public bool IsTooMessy(PSM psm, out string error_message) //return true if too messy for confident identification { error_message = ""; List <string> baseSequences = new List <string>(); int currentBestScore = 0; for (int index = 0; index < psm.getFusionCandidates().Count(); index++) { bool badID = false; FusionCandidate fc = psm.getFusionCandidates()[index]; findIons(fc, psm, out string error_message1); error_message += error_message1; int consecutiveMissedCounter = 0; int totalHitCounter = 0; foreach (bool b in fc.getFoundIons()) { if (consecutiveMissedCounter > maxMissingConsecutivePeaks) //if too many permutations possible because of an unmapped region { badID = true; } else if (!b) { consecutiveMissedCounter++; } else { totalHitCounter++; consecutiveMissedCounter = 0; } //only care about consecutive } bool isRepeat = false; if (baseSequences.Contains(psm.getFusionCandidates()[index].seq)) { isRepeat = true; } if (totalHitCounter > currentBestScore && !badID)//the others were worse, so delete them { for (int i = 0; i < index; i = 0) { psm.getFusionCandidates().Remove(psm.getFusionCandidates()[0]); index--; } currentBestScore = totalHitCounter; baseSequences = new List <string> { psm.getFusionCandidates()[index].seq }; } else if (totalHitCounter < currentBestScore | badID | isRepeat) { psm.getFusionCandidates().Remove(psm.getFusionCandidates()[index]); index--; } } //If there's anything left if (psm.getFusionCandidates().Count() > 0) //It wasn't too messy! Yay! { return(false); } else //this might be a fusion peptide, but we won't get any valuable information from this spectra, so discard it { return(true); } }
public bool GeneratePossibleSequences(PSM psm, out string error_message) //returns false if over the specified number of sequences are generated { error_message = ""; List <string> foundSeq = new List <string>(); //get list of all FP sequences foreach (FusionCandidate fusionCandidate in psm.getFusionCandidates()) { findIons(fusionCandidate, psm, out string error_message1); //populate the foundIons array error_message += error_message1; foundSeq.Add(fusionCandidate.seq); } bool done = false; int globalIndex = 0; while (!done) { done = true; //let's assume we're done and correct it later if we're not if (psm.getFusionCandidates().Count() > maxNumPossibleSequences) //if there are more than a set number of possible sequences, this is junk and we are not searching them all { return(false); } for (int fc = 0; fc < psm.getFusionCandidates().Count(); fc++) { FusionCandidate fusionCandidate = psm.getFusionCandidates()[fc]; if (fusionCandidate.getFoundIons().Count() > globalIndex) //prevent crashing, use to tell when done by hitting end of fc { List <FusionCandidate> tempCandidates = new List <FusionCandidate>(); //fill with possible sequences done = false; //We're not done, because at least one fusion candidate sequence length is still greater than the global index string fusionSeq = fusionCandidate.seq; bool[] IonFound = fusionCandidate.getFoundIons(); if (IonFound[globalIndex]) //only look for ambiguity if a peak was found to provide the stop point. { int mostRecent = -1; //most recent Ion found prior to this one (start point) for (int i = 0; i < globalIndex; i++) //identify start point { if (IonFound[i]) { mostRecent = i; //save most recent hit, exclusive of the current index } } string ambiguousFrag = fusionSeq.Substring(mostRecent + 1, globalIndex - mostRecent); double key = MassCalculator.MonoIsoptopicMass(ambiguousFrag, out string error_message2); error_message += error_message2; List <string> combinations = new List <string>(); double closestPeak = double.NaN; var ipos = Array.BinarySearch(keys, key); if (ipos < 0) { ipos = ~ipos; } if (ipos > 0) { var downIpos = ipos - 1; // Try down while (downIpos >= 0) { closestPeak = keys[downIpos]; if (closestPeak > key - productMassToleranceDa && closestPeak < key + productMassToleranceDa) { string[] value; if (massDict.TryGetValue(closestPeak, out value)) { foreach (string frag in value) { combinations.Add(frag); } } } else { break; } downIpos--; } } if (ipos < keys.Length) { var upIpos = ipos; // Try here and up while (upIpos < keys.Length) { closestPeak = keys[upIpos]; if (closestPeak > key - productMassToleranceDa && closestPeak < key + productMassToleranceDa) { string[] value; if (massDict.TryGetValue(closestPeak, out value)) { foreach (string frag in value) { combinations.Add(frag); } } } else { break; } upIpos++; } } foreach (string str in combinations) { string nTermSeq = fusionSeq.Substring(0, mostRecent + 1); string cTermSeq = fusionSeq.Substring(globalIndex + 1, fusionSeq.Length - globalIndex - 1); string novelSeq = nTermSeq + str + cTermSeq; FusionCandidate tempCandidate = new FusionCandidate(novelSeq); tempCandidates.Add(tempCandidate); } } foreach (FusionCandidate newfc in tempCandidates) { if (!foundSeq.Contains(newfc.seq)) //if new FP sequence, add it. { foundSeq.Add(newfc.seq); findIons(newfc, psm, out string error_message3); error_message += error_message3; psm.getFusionCandidates().Add(newfc); } } } } globalIndex++; if (psm.getFusionCandidates().Count() > maxNumPossibleSequences) { return(false); } } return(true); }