public void deepCopyFoundIons(FusionCandidate original) { this.foundIons = new bool[original.getFoundIons().Count()]; for (int index = 0; index < this.foundIons.Count(); index++) { this.foundIons[index] = original.getFoundIons()[index]; } }
public bool foundParent(string frag, ParentInfo.terminal terminal, FusionCandidate candidate, bool foundFirstSearch) { //localTheoreticals.AsParallel().Where(x => x.Contains(frag)).ToList(); if (notFoundSequences.Contains(frag)) //has the fragment been searched but not found before? { return(false); } List <TheoreticalProtein> matches = new List <TheoreticalProtein>(); if (foundSequences.TryGetValue(frag, out matches)) //has the fragment been searched AND found before? { candidate.parentInfo.Add(new ParentInfo(matches, terminal, frag)); return(true); } if (foundFirstSearch) //Has something smaller been found before? Well, then we can just search against those found sequences { string shorterFrag = terminal.Equals(ParentInfo.terminal.N) ? frag.Substring(0, frag.Length - 1) : frag.Substring(1, frag.Length - 1); foreach (ParentInfo info in candidate.parentInfo) { if (info.parentType.Equals(terminal) && info.fragFound.Equals(shorterFrag)) { List <TheoreticalProtein> tempProtList = new List <TheoreticalProtein>(); info.theoreticalProteins.ForEach(protein => tempProtList.Add(protein)); matches = tempProtList.AsParallel().Where(x => x.seq.Contains(frag)).ToList(); } } } else //it hasn't been found before... we need to search against the whole database :( { matches = theoreticalProteins.AsParallel().Where(x => x.seq.Contains(frag)).ToList(); } if (matches != null && matches.Count() > 0) { foundSequences.Add(frag, matches); candidate.parentInfo.Add(new ParentInfo(matches, terminal, frag)); return(true); } else { notFoundSequences.Add(frag); return(false); } }
public void ExportCandidates(List <PSM> psms, string path, out string error_message) { string mutableError_message = ""; using (StreamWriter file = new StreamWriter(path + folder + @"\" + folder + "ExportedFusionCandidates.txt")) { file.WriteLine("Scan" + '\t' + "ExperimentalMass" + '\t' + "OriginalNSequence" + '\t' + "OriginalNScore" + '\t' + "OriginalCSequence" + '\t' + "OriginalCScore" + '\t' + "SampleSequence" + '\t' + "Ambiguity" + '\t' + "ProbableType" + '\t' + "MostProbableSequenceJunctions" + '\t' + "MostProbableSequence(s)" + '\t' + "MostProbableParents" + '\t' + "AllPossibleSequenceJunctions" + '\t' + "AllPossibleSequence(s)" + '\t' + "AllPossibleParent(s)" + '\t' + "NumberOfPossibleSequences" + "PotentialFalsePositives"); int progress = 0; Parallel.ForEach(psms, (psm) => { //printout the scan, the mass, the sequences with and without junctions, the number of potential sequences string allPossibleSequences = ""; string mostProbableSequences = ""; int indexOfFirstProbableSequence = -1; string mostProbableParents = ""; string allPossibleParents = ""; FusionCandidate.FusionType probableType = psm.fusionType; for (int fc = 0; fc < psm.getFusionCandidates().Count(); fc++) { FusionCandidate fusionCandidate = psm.getFusionCandidates()[fc]; //need fc for indexOfFirstProbableSequence char[] tempArray = fusionCandidate.seq.ToCharArray(); //if most probable, add to all and most if (fusionCandidate.fusionType.Equals(probableType)) { //record sequences if (indexOfFirstProbableSequence < 0) { indexOfFirstProbableSequence = fc; } for (int i = 0; i < tempArray.Count(); i++) { mostProbableSequences += tempArray[i]; allPossibleSequences += tempArray[i]; foreach (int junction in fusionCandidate.getJunctionIndexes()) { if (junction == i) { mostProbableSequences += "-"; allPossibleSequences += "-"; } } } mostProbableSequences += "|"; allPossibleSequences += "|"; //record parents string tempParents = ""; switch (probableType) { case FusionCandidate.FusionType.TL: tempParents += GenerateParentOutput(fusionCandidate.translatedParents, new List <CisParent>(), new List <TransParent>()); break; case FusionCandidate.FusionType.NC: case FusionCandidate.FusionType.RC: tempParents += GenerateParentOutput(new List <TranslatedParent>(), fusionCandidate.cisParents, new List <TransParent>()); break; default: //if trans tempParents += GenerateParentOutput(new List <TranslatedParent>(), new List <CisParent>(), fusionCandidate.transParents); break; } mostProbableParents += tempParents; allPossibleParents += tempParents; } else //not most probable, only add it to allPossibleSequences { //record sequences for (int i = 0; i < tempArray.Count(); i++) { allPossibleSequences += tempArray[i]; if (fusionCandidate.getJunctionIndexes().Contains(i)) { allPossibleSequences += "-"; } } allPossibleSequences += "|"; //record parents allPossibleParents += GenerateParentOutput(fusionCandidate.translatedParents, fusionCandidate.cisParents, fusionCandidate.transParents); } /* foreach(ParentInfo PI in fusionCandidate.parentInfo) * { * parents += PI.accession + "_" + PI.parentType.ToString() + "_" + PI.seqFound + "|"; * }*/ } allPossibleSequences = allPossibleSequences.Substring(0, allPossibleSequences.Length - 1); //remove last "|" mostProbableSequences = mostProbableSequences.Substring(0, mostProbableSequences.Length - 1); //remove last "|" string ambiguity = ""; AlternativeSequences.findIons(psm.getFusionCandidates()[indexOfFirstProbableSequence], psm, out string e); //this should be carried over, but it's not... mutableError_message += e; bool[] foundIons = psm.getFusionCandidates()[indexOfFirstProbableSequence].getFoundIons(); char[] firstSeq = psm.getFusionCandidates()[indexOfFirstProbableSequence].seq.ToCharArray(); // if(foundIons.Count()==firstSeq.Count()) //prevent crashing if something went wrong // { bool ambiguous = false; for (int i = 0; i < foundIons.Count(); i++) { if (foundIons[i]) //if found { ambiguity += firstSeq[i]; //add aa if (ambiguous) //if it is part of an ambiguous sequence { ambiguity += ")"; ambiguous = false; //no longer ambiguous } } else { if (!ambiguous) { ambiguous = true; ambiguity += "("; } ambiguity += firstSeq[i]; } } string potentialFalsePositives = ""; foreach (Variant v in psm.variants) { potentialFalsePositives += v.id + "_" + v.start + "-" + (v.start + v.peptideLength - 1) + "(" + v.pepSeq + ")" + v.varType + "|"; } if (potentialFalsePositives.Length > 0) { potentialFalsePositives = potentialFalsePositives.Substring(0, potentialFalsePositives.Length - 1); //remove last | } //workarounds for excel. Actual limit is 32767, but that doesn't seem to work if (mostProbableParents.Length > 30000) { mostProbableParents = mostProbableParents.Substring(0, 30000); } if (allPossibleParents.Length > 30000) { allPossibleParents = allPossibleParents.Substring(0, 30000); } lock (file) { file.WriteLine(psm.getScan().ToString() + '\t' + psm.getExpMass().ToString() + '\t' + psm.getNInfo().seq + '\t' + psm.getNInfo().score + '\t' + psm.getCInfo().seq + '\t' + psm.getCInfo().score + '\t' + psm.getFusionCandidates()[indexOfFirstProbableSequence].seq + '\t' + ambiguity + '\t' + psm.fusionType.ToString() + '\t' + mostProbableSequences + '\t' + mostProbableSequences.Replace("-", "") + '\t' + mostProbableParents + '\t' + allPossibleSequences + '\t' + allPossibleSequences.Replace("-", "") + '\t' + allPossibleParents + '\t' + psm.getFusionCandidates().Count().ToString() + '\t' + potentialFalsePositives); progress++; this.worker.ReportProgress(progress / psms.Count() * 100); } }); } /* using (System.IO.StreamWriter file = new System.IO.StreamWriter(@"C:\Users\Zach Rolfs\Desktop\Chemistry\Smith Research\Fusion Peptides\Neo\Results\" + folder + @"\"+folder+"testFlipper.txt")) * { * foreach (PSM psm in psms) * { * //printout the scan, the mass, the sequences with and without junctions, the number of potential sequences * foreach (FusionCandidate fusionCandidate in psm.getFusionCandidates()) * { * file.WriteLine(fusionCandidate.seq); * } * } * }*/ error_message = mutableError_message; }
//compare the 6 first and last aa of each fusion candidate with database and determine if precursor mass can be achieved within 5 ppm. If it can, remove the psm from the list public void removeTranslatedPeptides(List <PSM> psms, List <TheoreticalProtein> database, out string error_message) { error_message = ""; for (int i = 0; i < psms.Count(); i++) { this.worker.ReportProgress(Convert.ToInt16((Convert.ToDouble(i) / Convert.ToDouble(psms.Count())) * 100)); bool removed = false; int fcIndex = 0;//FusionCandidate while (!removed && fcIndex < psms[i].getFusionCandidates().Count()) { FusionCandidate fc = psms[i].getFusionCandidates()[fcIndex]; string seq = fc.seq; if (seq.Length >= ionsUsedDigFilter) { string Nterm = seq.Substring(0, ionsUsedDigFilter); string Cterm = seq.Substring(seq.Length - ionsUsedDigFilter, ionsUsedDigFilter); //N-TERMINUS SEARCHING List <TheoreticalProtein> matches = database.AsParallel().Where(x => x.seq.Contains(Nterm)).ToList(); int protIndex = 0; //TheoreticalProtein while (!removed && protIndex < matches.Count()) { TheoreticalProtein prot = matches[protIndex]; double prodMass = 0; //use below code to catch multiple appearances of a frag in a parent protein List <int> indexes = new List <int>(); string subProt = prot.seq; int pastIndex = 0; while (subProt.Contains(Nterm)) { int newIndex = subProt.IndexOf(Nterm); indexes.Add(newIndex + pastIndex); subProt = subProt.Substring(newIndex + 1, subProt.Length - newIndex - 1); //need to remove old match pastIndex += newIndex + 1; } string protProd = ""; int sameProtIndex = 0; while (!removed && sameProtIndex < indexes.Count()) { Boolean hitEndOfProt = false; int numAAused = ionsUsedDigFilter; while (prodMass < psms[i].getExpMass() + 200 && hitEndOfProt == false) { try { // for (int PTMInclude = 0; PTMInclude < PTMIncludeMax; PTMInclude++) //loop twice, once without incorporating identified ptm masses and once with { //Obtain the Product mass //if (Ion == 0) //if B main { protProd = prot.seq.Substring(indexes[sameProtIndex], numAAused); prodMass = MassCalculator.MonoIsoptopicMass(protProd, out string e2); error_message += e2; //MessageBox.Show("1 "+ProtProd+" "+FASTARow[0].ToString() + " " + ProdMass.ToString()); /* if (PTMInclude == 1) * { * ProdMass += BPTMMass; * }*/ } //MissedCleavage and NonSpecific Cleavage/autolysis catch if (generateDecoys) { if (((prodMass) > (psms[i].getExpMass() - 9.5) && (prodMass) < (psms[i].getExpMass() - 4.5)) | ((prodMass) > (psms[i].getExpMass() + 5.5) && (prodMass) < (psms[i].getExpMass() + 7.5))) //if match, add it! { psms.Remove(psms[i]); i--; removed = true; } } else { if ((prodMass) > (psms[i].getExpMass() * (1 - precursorMassTolerancePpm / 1000000)) && (prodMass) < (psms[i].getExpMass() * (1 + precursorMassTolerancePpm / 1000000))) //if match { psms.Remove(psms[i]); i--; removed = true; } } } } catch //sloppy patch used for when hitting the end of a protein { hitEndOfProt = true; } numAAused++; } sameProtIndex++; } protIndex++; } //C-TERMINUS SEARCHING matches = database.AsParallel().Where(x => x.seq.Contains(Cterm)).ToList(); protIndex = 0; //TheoreticalProtein while (!removed && protIndex < matches.Count()) { TheoreticalProtein prot = matches[protIndex]; double prodMass = 0; //use below code to catch multiple appearances of a frag in a parent protein List <int> indexes = new List <int>(); string subProt = prot.seq; int pastIndex = 0; while (subProt.Contains(Cterm)) { int newIndex = subProt.IndexOf(Cterm); indexes.Add(newIndex + pastIndex); subProt = subProt.Substring(newIndex + 1, subProt.Length - newIndex - 1); //need to remove old match pastIndex += newIndex + 1; } string protProd = ""; int sameProtIndex = 0; while (!removed && sameProtIndex < indexes.Count()) { Boolean hitEndOfProt = false; int numAAused = ionsUsedDigFilter; while (prodMass < psms[i].getExpMass() + 200 && hitEndOfProt == false) { try { // for (int PTMInclude = 0; PTMInclude < PTMIncludeMax; PTMInclude++) //loop twice, once without incorporating identified ptm masses and once with { //Obtain the Product mass //else //if y main { protProd = prot.seq.Substring(indexes[sameProtIndex] - numAAused + ionsUsedDigFilter, numAAused); //MessageBox.Show(BProt); prodMass = MassCalculator.MonoIsoptopicMass(protProd, out string e); error_message += e; //MessageBox.Show(Prot.IndexOf(bigFrag).ToString() + " " + AASearchLength + " "+bigFrag.Count().ToString()); //MessageBox.Show("2 "+bigFrag+" "+ProtProd + " " + ProdMass.ToString()); /* if (PTMInclude == 1) * { * ProdMass += YPTMMass; * }*/ } //MissedCleavage and NonSpecific Cleavage/autolysis catch if (generateDecoys) { if (((prodMass) > (psms[i].getExpMass() - 9.5) && (prodMass) < (psms[i].getExpMass() - 4.5)) | ((prodMass) > (psms[i].getExpMass() + 5.5) && (prodMass) < (psms[i].getExpMass() + 7.5))) //if match, add it! { psms.Remove(psms[i]); i--; removed = true; } } else { if ((prodMass) > (psms[i].getExpMass() * (1 - precursorMassTolerancePpm / 1000000)) && (prodMass) < (psms[i].getExpMass() * (1 + precursorMassTolerancePpm / 1000000))) //if match { psms.Remove(psms[i]); i--; removed = true; } } } } catch //sloppy patch used for when hitting the end of a protein { hitEndOfProt = true; } numAAused++; } sameProtIndex++; } protIndex++; } } fcIndex++; } } }
//method was originally written recursively, but large peptides result in stackoverflow exceptions public void MassMatch(string B, string Y, PSM psm, int BIndex, int YIndex, out string error_message) //this is the workhorse of SpliceFragments { error_message = ""; test = psm.getScan().ToString(); double ExperimentalMass = psm.getExpMass(); string BFrag = IonCrop(B, ExperimentalMass, BIndex, IonType.b, false, out string e4); //returns a B ion sequence that has a mass smaller than the experimental mass by cleaving C term AA //BIndex = B.Length - BFrag.Length; //added 11/8/16 Useful first pass to record how many AA have been cleaved from C term string YFrag = IonCrop(Y, ExperimentalMass, YIndex, IonType.y, false, out string e3); //returns a Y ion sequence that has a mass smaller than the experimental mass by cleaving N term AA //YIndex = Y.Length - YFrag.Length; //added 11/8/16 Useful first pass to record how many AA have been cleaved from N term double TheoreticalMass = MassCalculator.MonoIsoptopicMass(BFrag, out string e) + MassCalculator.MonoIsoptopicMass(YFrag, out string e2) - Constants.WATER_MONOISOTOPIC_MASS + fixedModMass; //water added once in b and once in y error_message += e3 + e4 + e + e2; //add PTM masses foreach (PTM ptm in psm.getNInfo().getPTMs()) { if (ptm.index < BFrag.Length) { TheoreticalMass += ptm.mass; } } foreach (PTM ptm in psm.getCInfo().getPTMs()) { if (Y.Length - ptm.index < YFrag.Length) { TheoreticalMass += ptm.mass; } } if (YFrag.Length < ionsUsedMassVer) //If the number of AA from the C-term peptide is less than desired amount, end recursion. { //we're done } else if (BFrag.Length < ionsUsedMassVer) //If the number of AA from the N-term peptide is less than desired amount, start over loop and remove a single aa from the C-term { // MassMatch(B, Y, psm, 0, YIndex+1); } //if match //bool elif = true; //"else if" where not a match==true else if (FalsePositives.generateDecoys) { //else if (((TheoreticalMass - Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS * 7) > (ExperimentalMass - 1 * Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS) && (TheoreticalMass - Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS * 7) < (ExperimentalMass + 1 * Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS)) | ((TheoreticalMass + Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS * 7) > (ExperimentalMass - 1 * Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS) && (TheoreticalMass + Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS * 7) < (ExperimentalMass + 1 * Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS)))//if match //if ((TheoreticalMass) > (ExperimentalMass+ i -PrecursorMassToleranceDa) && (TheoreticalMass) < (ExperimentalMass+i +PrecursorMassToleranceDa)) //if match if (((TheoreticalMass) > (ExperimentalMass - 9.5) && (TheoreticalMass) < (ExperimentalMass - 4.5)) | ((TheoreticalMass) > (ExperimentalMass + 5.5) && (TheoreticalMass) < (ExperimentalMass + 7.5)))//if match //if ((TheoreticalMass) > (ExperimentalMass+ i -PrecursorMassToleranceDa) && (TheoreticalMass) < (ExperimentalMass+i +PrecursorMassToleranceDa)) //if match //else if(Math.Abs(ExperimentalMass - TheoreticalMass)<40 && (ExperimentalMass - TheoreticalMass)-Math.Floor(ExperimentalMass-TheoreticalMass)>0.3 && (ExperimentalMass - TheoreticalMass) - Math.Floor(ExperimentalMass - TheoreticalMass) < 0.8) { // elif = false; bool previouslyFound = false; foreach (FusionCandidate oldCandidate in psm.getFusionCandidates()) { if ((BFrag + YFrag).Equals(oldCandidate.seq)) //see if that sequence was already recorded { previouslyFound = true; } } if (!previouslyFound) //if fusion sequence was not previously assigned to this psm { FusionCandidate candidate = new FusionCandidate(BFrag + YFrag); psm.addFusionCandidate(candidate); // MassMatch(B, Y, psm, BIndex + 1, YIndex); } } } else { if ((TheoreticalMass) > (ExperimentalMass * (1 - FalsePositives.precursorMassTolerancePpm / 1000000)) && (TheoreticalMass) < (ExperimentalMass * (1 + FalsePositives.precursorMassTolerancePpm / 1000000))) //if match //if ((TheoreticalMass) > (ExperimentalMass+ i -PrecursorMassToleranceDa) && (TheoreticalMass) < (ExperimentalMass+i +PrecursorMassToleranceDa)) //if match { // elif = false; bool previouslyFound = false; foreach (FusionCandidate oldCandidate in psm.getFusionCandidates()) { if ((BFrag + YFrag).Equals(oldCandidate.seq)) //see if that sequence was already recorded { previouslyFound = true; } } if (!previouslyFound) //if fusion sequence was not previously assigned to this psm { FusionCandidate candidate = new FusionCandidate(BFrag + YFrag); psm.addFusionCandidate(candidate); // MassMatch(B, Y, psm, BIndex + 1, YIndex); } } } // if(elif) //not a match { /* if (TheoreticalMass < ExperimentalMass && BIndex == 0) //first pass, theo less than exp and can't take away more ions * { * //we're done * } * else * { * if (TheoreticalMass < ExperimentalMass) //if b out of ions, but y not, crop off a y and start again * { * BIndex = 0; * YIndex++; * MassMatch(B,Y, psm, BIndex, YIndex); * } * else * { //crop off a b ion * MassMatch(B, Y, psm, BIndex + 1, YIndex); * } * }*/ } }
public void addFusionCandidate(FusionCandidate candidate) { this.candidates.Add(candidate); }
public void DetermineFusionCandidateType(FusionCandidate fusionCandidate) { if (!fusionCandidate.fusionType.Equals(FusionCandidate.FusionType.TL)) { string sequence = fusionCandidate.seq; foreach (ParentInfo info in fusionCandidate.parentInfo) { int foundLength = info.fragFound.Length; string compFrag = "";// fusionCandidate.seq.Substring() if (info.parentType.Equals(ParentInfo.terminal.N)) { compFrag = fusionCandidate.seq.Substring(foundLength, fusionCandidate.seq.Length - foundLength); } else { compFrag = fusionCandidate.seq.Substring(0, fusionCandidate.seq.Length - foundLength); } foreach (TheoreticalProtein protein in info.theoreticalProteins) { //get the index(es) of where the found fragment is string subProt = protein.seq; List <int> originalIndexes = new List <int>(); int pastIndex = 0; while (subProt.Contains(info.fragFound)) { int newIndex = subProt.IndexOf(info.fragFound); originalIndexes.Add(newIndex + pastIndex); subProt = subProt.Substring(newIndex + 1, subProt.Length - newIndex - 1); //need to remove old match pastIndex += newIndex + 1; } fusionCandidate.transParents.Add(new TransParent(protein.id, protein.seq, originalIndexes, foundLength, info.parentType)); //get the index(es) of where the complimentary fragment is (if it's a cis fusion peptide) subProt = protein.seq; List <int> complementaryIndexes = new List <int>(); pastIndex = 0; while (subProt.Contains(compFrag)) { int newIndex = subProt.IndexOf(compFrag); complementaryIndexes.Add(newIndex + pastIndex); subProt = subProt.Substring(newIndex + 1, subProt.Length - newIndex - 1); //need to remove old match pastIndex += newIndex + 1; } if (complementaryIndexes.Count() > 0) //if it is not trans { //if it is cis if (info.parentType.Equals(ParentInfo.terminal.N)) { fusionCandidate.cisParents.Add(new CisParent(protein.id, protein.seq, originalIndexes, foundLength, complementaryIndexes, compFrag.Length)); } else { fusionCandidate.cisParents.Add(new CisParent(protein.id, protein.seq, complementaryIndexes, compFrag.Length, originalIndexes, foundLength)); } } } } } else { string seq = fusionCandidate.seq; foreach (ParentInfo info in fusionCandidate.parentInfo) { foreach (TheoreticalProtein protein in info.theoreticalProteins) { if (protein.seq.Contains(seq)) //if translated { fusionCandidate.translatedParents.Add(new TranslatedParent(protein.id, protein.seq, protein.seq.IndexOf(fusionCandidate.seq), fusionCandidate.seq.Length)); } } } } foreach (CisParent cisParent in fusionCandidate.cisParents) { if (cisParent.cisType < fusionCandidate.fusionType) { fusionCandidate.fusionType = cisParent.cisType; } } }
public bool IsTooMessy(PSM psm, out string error_message) //return true if too messy for confident identification { error_message = ""; List <string> baseSequences = new List <string>(); int currentBestScore = 0; for (int index = 0; index < psm.getFusionCandidates().Count(); index++) { bool badID = false; FusionCandidate fc = psm.getFusionCandidates()[index]; findIons(fc, psm, out string error_message1); error_message += error_message1; int consecutiveMissedCounter = 0; int totalHitCounter = 0; foreach (bool b in fc.getFoundIons()) { if (consecutiveMissedCounter > maxMissingConsecutivePeaks) //if too many permutations possible because of an unmapped region { badID = true; } else if (!b) { consecutiveMissedCounter++; } else { totalHitCounter++; consecutiveMissedCounter = 0; } //only care about consecutive } bool isRepeat = false; if (baseSequences.Contains(psm.getFusionCandidates()[index].seq)) { isRepeat = true; } if (totalHitCounter > currentBestScore && !badID)//the others were worse, so delete them { for (int i = 0; i < index; i = 0) { psm.getFusionCandidates().Remove(psm.getFusionCandidates()[0]); index--; } currentBestScore = totalHitCounter; baseSequences = new List <string> { psm.getFusionCandidates()[index].seq }; } else if (totalHitCounter < currentBestScore | badID | isRepeat) { psm.getFusionCandidates().Remove(psm.getFusionCandidates()[index]); index--; } } //If there's anything left if (psm.getFusionCandidates().Count() > 0) //It wasn't too messy! Yay! { return(false); } else //this might be a fusion peptide, but we won't get any valuable information from this spectra, so discard it { return(true); } }
public bool isViable(FusionCandidate tempCandidate) //returns if sequence could be made from one or two proteins in database and writes fusion type, parents, and junctions to fusionCandidate { //need to check that each index is viable string novelSeq = tempCandidate.seq; //N// int nTermParentLength = novelSeq.Length - 1; //length-1, because if the whole thing existed we wouldn't have made it to the else loop. Several edits are made to reflect this if (nTermParentLength > 6) //used to speed up search by finding an ideal starting point { nTermParentLength = 6; //low point of random probability (5 or 7 may also be suitable) } bool done = false; bool foundFirstSearch = false; //First pass search string testFrag = novelSeq.Substring(0, nTermParentLength); if (foundParent(testFrag, ParentInfo.terminal.N, tempCandidate, foundFirstSearch)) //if found { foundFirstSearch = true; nTermParentLength++; } else //if not found { foundFirstSearch = false; nTermParentLength--; } //All other passes while (nTermParentLength < novelSeq.Length && nTermParentLength > 0 && !done) //while in range and not done { testFrag = novelSeq.Substring(0, nTermParentLength); if (foundParent(testFrag, ParentInfo.terminal.N, tempCandidate, foundFirstSearch)) //if found { if (!foundFirstSearch) { nTermParentLength--; done = true; } nTermParentLength++; } else //if not found { if (foundFirstSearch) { done = true; } nTermParentLength--; } } //C// done = false; //reset tracker foundFirstSearch = false; int cTermParentLength = novelSeq.Length - 1; if (cTermParentLength > 6) //used to speed up search by finding an ideal starting point { cTermParentLength = 6; //low point of random probability } testFrag = novelSeq.Substring(novelSeq.Length - cTermParentLength, cTermParentLength); //First pass search if (foundParent(testFrag, ParentInfo.terminal.C, tempCandidate, foundFirstSearch)) //if found { foundFirstSearch = true; cTermParentLength++; } else //if not found { foundFirstSearch = false; cTermParentLength--; } while (cTermParentLength > 0 && cTermParentLength < novelSeq.Length && !done) { testFrag = novelSeq.Substring(novelSeq.Length - cTermParentLength, cTermParentLength); if (foundParent(testFrag, ParentInfo.terminal.C, tempCandidate, foundFirstSearch)) { if (!foundFirstSearch) { cTermParentLength--; done = true; } cTermParentLength++; } else { if (foundFirstSearch) { done = true; } cTermParentLength--; } } { /* foreach (ParentInfo PI in nTermPI) //append B searches * { * tempCandidate.parentInfo.Add(PI); * }*/ //Add Result } if (cTermParentLength + nTermParentLength < novelSeq.Length) //if no overlap { return(false); } else { for (int junction = tempCandidate.seq.Length - cTermParentLength - 1; junction < nTermParentLength; junction++) { tempCandidate.addJunctionIndex(junction); } return(true); } }
public bool GeneratePossibleSequences(PSM psm, out string error_message) //returns false if over the specified number of sequences are generated { error_message = ""; List <string> foundSeq = new List <string>(); //get list of all FP sequences foreach (FusionCandidate fusionCandidate in psm.getFusionCandidates()) { findIons(fusionCandidate, psm, out string error_message1); //populate the foundIons array error_message += error_message1; foundSeq.Add(fusionCandidate.seq); } bool done = false; int globalIndex = 0; while (!done) { done = true; //let's assume we're done and correct it later if we're not if (psm.getFusionCandidates().Count() > maxNumPossibleSequences) //if there are more than a set number of possible sequences, this is junk and we are not searching them all { return(false); } for (int fc = 0; fc < psm.getFusionCandidates().Count(); fc++) { FusionCandidate fusionCandidate = psm.getFusionCandidates()[fc]; if (fusionCandidate.getFoundIons().Count() > globalIndex) //prevent crashing, use to tell when done by hitting end of fc { List <FusionCandidate> tempCandidates = new List <FusionCandidate>(); //fill with possible sequences done = false; //We're not done, because at least one fusion candidate sequence length is still greater than the global index string fusionSeq = fusionCandidate.seq; bool[] IonFound = fusionCandidate.getFoundIons(); if (IonFound[globalIndex]) //only look for ambiguity if a peak was found to provide the stop point. { int mostRecent = -1; //most recent Ion found prior to this one (start point) for (int i = 0; i < globalIndex; i++) //identify start point { if (IonFound[i]) { mostRecent = i; //save most recent hit, exclusive of the current index } } string ambiguousFrag = fusionSeq.Substring(mostRecent + 1, globalIndex - mostRecent); double key = MassCalculator.MonoIsoptopicMass(ambiguousFrag, out string error_message2); error_message += error_message2; List <string> combinations = new List <string>(); double closestPeak = double.NaN; var ipos = Array.BinarySearch(keys, key); if (ipos < 0) { ipos = ~ipos; } if (ipos > 0) { var downIpos = ipos - 1; // Try down while (downIpos >= 0) { closestPeak = keys[downIpos]; if (closestPeak > key - productMassToleranceDa && closestPeak < key + productMassToleranceDa) { string[] value; if (massDict.TryGetValue(closestPeak, out value)) { foreach (string frag in value) { combinations.Add(frag); } } } else { break; } downIpos--; } } if (ipos < keys.Length) { var upIpos = ipos; // Try here and up while (upIpos < keys.Length) { closestPeak = keys[upIpos]; if (closestPeak > key - productMassToleranceDa && closestPeak < key + productMassToleranceDa) { string[] value; if (massDict.TryGetValue(closestPeak, out value)) { foreach (string frag in value) { combinations.Add(frag); } } } else { break; } upIpos++; } } foreach (string str in combinations) { string nTermSeq = fusionSeq.Substring(0, mostRecent + 1); string cTermSeq = fusionSeq.Substring(globalIndex + 1, fusionSeq.Length - globalIndex - 1); string novelSeq = nTermSeq + str + cTermSeq; FusionCandidate tempCandidate = new FusionCandidate(novelSeq); tempCandidates.Add(tempCandidate); } } foreach (FusionCandidate newfc in tempCandidates) { if (!foundSeq.Contains(newfc.seq)) //if new FP sequence, add it. { foundSeq.Add(newfc.seq); findIons(newfc, psm, out string error_message3); error_message += error_message3; psm.getFusionCandidates().Add(newfc); } } } } globalIndex++; if (psm.getFusionCandidates().Count() > maxNumPossibleSequences) { return(false); } } return(true); }
//use ion hits to know where peaks have been found by morpheus and where there is ambiguity public static void findIons(FusionCandidate fusionCandidate, PSM psm, out string error_message) { error_message = ""; double[] nPeaks = psm.getNInfo().getPeakHits(); //get peaks double[] cPeaks = psm.getCInfo().getPeakHits(); fusionCandidate.makeFoundIons(); string candSeq = fusionCandidate.seq; bool[] foundIons = fusionCandidate.getFoundIons(); //find which aa have peaks for (int i = 0; i < foundIons.Count() - 1; i++) { //B IONS// if (ionsUsed.Contains(IonType.b)) { double bTheoMass = MassCalculator.MonoIsoptopicMass(candSeq.Substring(0, 1 + i), out string error_message2) - Constants.WATER_MONOISOTOPIC_MASS; error_message += error_message2; foreach (PTM ptm in psm.getNInfo().getPTMs()) { if (ptm.index <= i) { bTheoMass += ptm.mass; } } foreach (double expPeak in nPeaks) { if (expPeak > bTheoMass - productMassToleranceDa && expPeak < bTheoMass + productMassToleranceDa) { foundIons[i] = true; } } } //Y IONS// if (ionsUsed.Contains(IonType.y)) { double yTheoMass = MassCalculator.MonoIsoptopicMass(candSeq.Substring(candSeq.Length - 1 - i, i + 1), out string error_message3); error_message += error_message3; foreach (PTM ptm in psm.getCInfo().getPTMs()) { if (ptm.index >= candSeq.Length - 2 - i) { yTheoMass += ptm.mass; } } foreach (double expPeak in cPeaks) { if (expPeak > yTheoMass - productMassToleranceDa && expPeak < yTheoMass + productMassToleranceDa) { foundIons[foundIons.Count() - 2 - i] = true; } } } //C IONS// if (ionsUsed.Contains(IonType.c)) { double cTheoMass = MassCalculator.MonoIsoptopicMass(candSeq.Substring(0, 1 + i), out string error_message4) - Constants.WATER_MONOISOTOPIC_MASS + Constants.nitrogenMonoisotopicMass + 3 * Constants.hydrogenMonoisotopicMass; error_message += error_message4; foreach (PTM ptm in psm.getNInfo().getPTMs()) { if (ptm.index <= i) { cTheoMass += ptm.mass; } } foreach (double expPeak in nPeaks) { if (expPeak > cTheoMass - productMassToleranceDa && expPeak < cTheoMass + productMassToleranceDa) { foundIons[i] = true; } } } //ZDOT IONS// if (ionsUsed.Contains(IonType.zdot)) { double zdotTheoMass = MassCalculator.MonoIsoptopicMass(candSeq.Substring(candSeq.Length - 1 - i, i + 1), out string error_message5) - Constants.nitrogenMonoisotopicMass - 2 * Constants.hydrogenMonoisotopicMass; error_message += error_message5; foreach (PTM ptm in psm.getCInfo().getPTMs()) { if (ptm.index >= candSeq.Length - 2 - i) { zdotTheoMass += ptm.mass; } } foreach (double expPeak in cPeaks) { if (expPeak > zdotTheoMass - productMassToleranceDa && expPeak < zdotTheoMass + productMassToleranceDa) { foundIons[foundIons.Count() - 2 - i] = true; } } } } //foundIons[0] = true; //AspN always starts with a D foundIons[foundIons.Count() - 1] = true;//A|B|C|D|E|F|K| where the whole peptide peak is always placed arbitrarly at the c term }