public string cleanSeq(string seq, out string error_message) { error_message = ""; bool ModificationOn = false; string ModificationName = ""; int aaIndex = 0; string cleanedSeq = ""; foreach (char amino_acid in seq) { if (amino_acid == ')') //only occurs at end of mod { ModificationOn = false; double modMass = MassCalculator.getPTMMass(ModificationName, out string e); error_message += e; PTM ptm = new PTM(ModificationName, aaIndex, modMass); this.ptms.Add(ptm); } if (ModificationOn == true) //only occurs if "(" already found { ModificationName += amino_acid; } if (amino_acid == '(') //start collecting PTM name { ModificationOn = true; } if (ModificationOn == false && amino_acid != ')') { cleanedSeq += amino_acid; } } return(cleanedSeq); }
public static string cleanSeq(string fullSeq) { string temp = fullSeq.Replace("[Common Fixed:Carbamidomethyl of C]", ""); temp = temp.Replace("ptmlist:", ""); temp = MassCalculator.RemoveNestedParentheses(temp, false); temp = temp.Replace("'", ""); temp = temp.Replace("\"", ""); return(temp); }
//This method removes the number of amino acids specified by FragNumber from the respecitve terminus specified by ion of IonSequence //If checkToRemoveExtraAA is true, additional AA will be removed to achieve a theoretical mass less than the Experimental mass public string IonCrop(string IonSequence, double ExperimentalMass, int FragNumber, IonType ion, bool checkToRemoveExtraAA, out string error_message) { error_message = ""; string IonFrag; if (ion == IonType.b) { IonFrag = IonSequence.Substring(0, (IonSequence.Length - FragNumber)); if (IonFrag.Substring(IonSequence.Length - FragNumber - 1, 1) == ")") //if end of a PTM annotation { while (IonFrag.Substring(IonSequence.Length - FragNumber - 1, 1) != "(") { FragNumber++; IonFrag = IonSequence.Substring(0, (IonSequence.Length - FragNumber)); } FragNumber++; //removes "(" FragNumber++; //removes the AA the PTM was attached to IonFrag = IonSequence.Substring(0, (IonSequence.Length - FragNumber)); } } else //Ion==Y { IonFrag = IonSequence.Substring((0 + FragNumber), (IonSequence.Length - FragNumber)); if (IonFrag.Substring(0, 1) == "(") //if start of a PTM annotation { while (IonFrag.Substring(0, 1) != ")") { FragNumber++; IonFrag = IonSequence.Substring((0 + FragNumber), (IonSequence.Length - FragNumber)); } FragNumber++; //removes ")" IonFrag = IonSequence.Substring((0 + FragNumber), (IonSequence.Length - FragNumber)); } } if (checkToRemoveExtraAA == false) { return(IonFrag); } else { double IonMass = MassCalculator.MonoIsoptopicMass(IonFrag, out string e); error_message += e; if (IonMass < ExperimentalMass) //end if the mass of the fragment is lower than the experimental { return(IonFrag); } else //call the function again to remove another amino acid. { FragNumber++; string x = IonCrop(IonSequence, ExperimentalMass, FragNumber, ion, true, out string e2); error_message += e2; return(x); } } }
}; //20 common AA, ordered by mass assuming carbamido public string PopulateMassDictionary() { string error_message = ""; List <double> tempKeys = new List <double>(); double maxMass = (maxMissingConsecutivePeaks + 1) * 186.079313 + (Constants.WATER_MONOISOTOPIC_MASS + 3); //If one sequence knocks it out of range, the next might not (VVVVW vs VVVVV where the first is out of range but the second is not) W-G prevents this //peaks+1 for converting missed peaks into number of ambiguous aa int mer = (4) * (maxMissingConsecutivePeaks + 1); //maximum length allowed (4>W/G>3) //peaks+1 for converting missed peaks into number of ambiguous aa //int mer = (2*(maxMissingConsecutivePeaks+1)); //only two aa should fit into one... work around to reduce computational demand int[] indexes = new int[mer]; for (int i = 0; i < mer; i++) { indexes[i] = 0; } string seq = ""; int length = 1; /* List<char> firstAA = new List<char>(); * foreach(char aa in AANames) * { * firstAA.Add(aa); * } * Parallel.ForEach(Partitioner.Creat(0,))*/ while (indexes[0] < AANames.Count()) { //get new seq seq = ""; if (indexes[2] == 15) { } for (int n = 0; n < length; n++) { seq += AANames[indexes[n]]; } //if new seq is within range double fragMass = MassCalculator.MonoIsoptopicMass(seq, out string e); error_message += e; if (fragMass < maxMass) { var rounded = Math.Round(fragMass, decimalDigitsForFragmentMassRounding); List <string> value; if (massDict.TryGetValue(rounded, out value)) { if (!value.Contains(seq)) { value.Add(seq); } } else { massDict.Add(rounded, new List <string> { seq }); tempKeys.Add(rounded); } if (mer != length && fragMass + 57.0214 < maxMass) //if not last position { length++; //allow m to increase } else //don't increment length, we're happy right now! { if (indexes[length - 1] < AANames.Count() - 1) //if not last aa in possible aa { indexes[length - 1]++; } else //if it is, we need to go back a bit { indexes[length - 1] = 0; indexes[length - 2]++; //could cause crashing with weird aa length--; } } } else //if not in range, move back one { indexes[length - 1] = 0; indexes[length - 2]++; //could cause crashing with weird aa length--; } //important catch to make sure going back doesn't result in an out of index exception for (int i = indexes.Count() - 1; i >= 0; i--) { if (indexes[i] == AANames.Count()) { if (i > 0) { length--; indexes[i] = 0; indexes[i - 1]++; } } } } tempKeys.Sort(); keys = new double[tempKeys.Count()]; for (int k = 0; k < tempKeys.Count(); k++) { keys[k] = tempKeys[k]; } using (System.IO.StreamWriter file = new System.IO.StreamWriter(@"C:\Users\Zach Rolfs\Desktop\Chemistry\Smith Research\Fusion Peptides\Dictionary" + "3" + ".txt")) { foreach (double key in keys) { List <string> value = new List <string>(); massDict.TryGetValue(key, out value); bool satisfyRule = false; foreach (string s in value) { if (s.Length <= maxMissingConsecutivePeaks + 1) { satisfyRule = true; } } if (satisfyRule) { string output = key.ToString() + '\t'; foreach (string s in value) { output += s + ';'; } output = output.Substring(0, output.Length - 1); file.WriteLine(output); } } } //test /* foreach(double key in keys) * { * List<string> value; * if (massDict.TryGetValue(key, out value)) * { * //MessageBox.Show(key.ToString() + " with " + value.Count()); * foreach(string v in value) * { * // MessageBox.Show(v); * } * } * else * { * //MessageBox.Show("No sequences found for " + key); * } * }*/ return(error_message); }
//compare the 6 first and last aa of each fusion candidate with database and determine if precursor mass can be achieved within 5 ppm. If it can, remove the psm from the list public void removeTranslatedPeptides(List <PSM> psms, List <TheoreticalProtein> database, out string error_message) { error_message = ""; for (int i = 0; i < psms.Count(); i++) { this.worker.ReportProgress(Convert.ToInt16((Convert.ToDouble(i) / Convert.ToDouble(psms.Count())) * 100)); bool removed = false; int fcIndex = 0;//FusionCandidate while (!removed && fcIndex < psms[i].getFusionCandidates().Count()) { FusionCandidate fc = psms[i].getFusionCandidates()[fcIndex]; string seq = fc.seq; if (seq.Length >= ionsUsedDigFilter) { string Nterm = seq.Substring(0, ionsUsedDigFilter); string Cterm = seq.Substring(seq.Length - ionsUsedDigFilter, ionsUsedDigFilter); //N-TERMINUS SEARCHING List <TheoreticalProtein> matches = database.AsParallel().Where(x => x.seq.Contains(Nterm)).ToList(); int protIndex = 0; //TheoreticalProtein while (!removed && protIndex < matches.Count()) { TheoreticalProtein prot = matches[protIndex]; double prodMass = 0; //use below code to catch multiple appearances of a frag in a parent protein List <int> indexes = new List <int>(); string subProt = prot.seq; int pastIndex = 0; while (subProt.Contains(Nterm)) { int newIndex = subProt.IndexOf(Nterm); indexes.Add(newIndex + pastIndex); subProt = subProt.Substring(newIndex + 1, subProt.Length - newIndex - 1); //need to remove old match pastIndex += newIndex + 1; } string protProd = ""; int sameProtIndex = 0; while (!removed && sameProtIndex < indexes.Count()) { Boolean hitEndOfProt = false; int numAAused = ionsUsedDigFilter; while (prodMass < psms[i].getExpMass() + 200 && hitEndOfProt == false) { try { // for (int PTMInclude = 0; PTMInclude < PTMIncludeMax; PTMInclude++) //loop twice, once without incorporating identified ptm masses and once with { //Obtain the Product mass //if (Ion == 0) //if B main { protProd = prot.seq.Substring(indexes[sameProtIndex], numAAused); prodMass = MassCalculator.MonoIsoptopicMass(protProd, out string e2); error_message += e2; //MessageBox.Show("1 "+ProtProd+" "+FASTARow[0].ToString() + " " + ProdMass.ToString()); /* if (PTMInclude == 1) * { * ProdMass += BPTMMass; * }*/ } //MissedCleavage and NonSpecific Cleavage/autolysis catch if (generateDecoys) { if (((prodMass) > (psms[i].getExpMass() - 9.5) && (prodMass) < (psms[i].getExpMass() - 4.5)) | ((prodMass) > (psms[i].getExpMass() + 5.5) && (prodMass) < (psms[i].getExpMass() + 7.5))) //if match, add it! { psms.Remove(psms[i]); i--; removed = true; } } else { if ((prodMass) > (psms[i].getExpMass() * (1 - precursorMassTolerancePpm / 1000000)) && (prodMass) < (psms[i].getExpMass() * (1 + precursorMassTolerancePpm / 1000000))) //if match { psms.Remove(psms[i]); i--; removed = true; } } } } catch //sloppy patch used for when hitting the end of a protein { hitEndOfProt = true; } numAAused++; } sameProtIndex++; } protIndex++; } //C-TERMINUS SEARCHING matches = database.AsParallel().Where(x => x.seq.Contains(Cterm)).ToList(); protIndex = 0; //TheoreticalProtein while (!removed && protIndex < matches.Count()) { TheoreticalProtein prot = matches[protIndex]; double prodMass = 0; //use below code to catch multiple appearances of a frag in a parent protein List <int> indexes = new List <int>(); string subProt = prot.seq; int pastIndex = 0; while (subProt.Contains(Cterm)) { int newIndex = subProt.IndexOf(Cterm); indexes.Add(newIndex + pastIndex); subProt = subProt.Substring(newIndex + 1, subProt.Length - newIndex - 1); //need to remove old match pastIndex += newIndex + 1; } string protProd = ""; int sameProtIndex = 0; while (!removed && sameProtIndex < indexes.Count()) { Boolean hitEndOfProt = false; int numAAused = ionsUsedDigFilter; while (prodMass < psms[i].getExpMass() + 200 && hitEndOfProt == false) { try { // for (int PTMInclude = 0; PTMInclude < PTMIncludeMax; PTMInclude++) //loop twice, once without incorporating identified ptm masses and once with { //Obtain the Product mass //else //if y main { protProd = prot.seq.Substring(indexes[sameProtIndex] - numAAused + ionsUsedDigFilter, numAAused); //MessageBox.Show(BProt); prodMass = MassCalculator.MonoIsoptopicMass(protProd, out string e); error_message += e; //MessageBox.Show(Prot.IndexOf(bigFrag).ToString() + " " + AASearchLength + " "+bigFrag.Count().ToString()); //MessageBox.Show("2 "+bigFrag+" "+ProtProd + " " + ProdMass.ToString()); /* if (PTMInclude == 1) * { * ProdMass += YPTMMass; * }*/ } //MissedCleavage and NonSpecific Cleavage/autolysis catch if (generateDecoys) { if (((prodMass) > (psms[i].getExpMass() - 9.5) && (prodMass) < (psms[i].getExpMass() - 4.5)) | ((prodMass) > (psms[i].getExpMass() + 5.5) && (prodMass) < (psms[i].getExpMass() + 7.5))) //if match, add it! { psms.Remove(psms[i]); i--; removed = true; } } else { if ((prodMass) > (psms[i].getExpMass() * (1 - precursorMassTolerancePpm / 1000000)) && (prodMass) < (psms[i].getExpMass() * (1 + precursorMassTolerancePpm / 1000000))) //if match { psms.Remove(psms[i]); i--; removed = true; } } } } catch //sloppy patch used for when hitting the end of a protein { hitEndOfProt = true; } numAAused++; } sameProtIndex++; } protIndex++; } } fcIndex++; } } }
public void FindCommonFalsePositives(List <PSM> psms, List <TheoreticalProtein> database, out string error_message) { error_message = ""; int i = 0; foreach (PSM psm in psms) { this.worker.ReportProgress(Convert.ToInt16((Convert.ToDouble(i) / Convert.ToDouble(psms.Count())) * 100)); i++; foreach (FusionCandidate fusionCandidate in psm.getFusionCandidates()) { foreach (ParentInfo info in fusionCandidate.parentInfo) { if (info.fragFound.Length >= 6) { foreach (TheoreticalProtein protein in info.theoreticalProteins) { string protSeq = protein.seq; char[] candidateSeq = fusionCandidate.seq.ToCharArray(); int index = protSeq.IndexOf(info.fragFound); int fragLength = info.fragFound.Length; string possibleTranslatedSequence = protSeq.Substring(index, fragLength); if (!possibleTranslatedSequence.Equals(fusionCandidate.seq)) //if not already found as translated { double fragMass = MassCalculator.MonoIsoptopicMass(possibleTranslatedSequence, out string e); error_message += e; double expMass = psm.getExpMass(); while (fragMass < expMass + 187.079 - 57.021 + 1) { //Find SNPs if (candidateSeq.Count() == possibleTranslatedSequence.Length) { char[] possibleSeqArray = possibleTranslatedSequence.ToCharArray(); if (IsSNP(candidateSeq, possibleSeqArray)) { psm.variants.Add(new Variant(possibleTranslatedSequence, Variant.variantType.SNP, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); psm.variants.Add(new Variant(fusionCandidate.seq, Variant.variantType.SNP, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } } if (generateDecoys) { //search for unmodified sequences if (((fragMass) > (expMass - 9.5) && (fragMass) < (expMass - 4.5)) | ((fragMass) > (expMass + 5.5) && (fragMass) < (expMass + 7.5))) //if match, add it! { psm.variants.Add(new Variant(possibleTranslatedSequence, Variant.variantType.UM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } //fragMass, expMass //search for PTMs else if (((fragMass + .984016) > (expMass - 9.5) && (fragMass + .98402) < (expMass - 4.5)) | ((fragMass + .98402) > (expMass + 5.5) && (fragMass + .98402) < (expMass + 7.5))) //if match { psm.variants.Add(new Variant(possibleTranslatedSequence + "+Deamidation", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } else if (((fragMass + 15.99491463) > (expMass - 9.5) && (fragMass + 15.99491463) < (expMass - 4.5)) | ((fragMass + 15.99491463) > (expMass + 5.5) && (fragMass + 15.99491463) < (expMass + 7.5))) //if match { psm.variants.Add(new Variant(possibleTranslatedSequence + "+Oxidation", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } else if (((fragMass + 15.99491463 * 2) > (expMass - 9.5) && (fragMass + 15.99491463 * 2) < (expMass - 4.5)) | ((fragMass + 15.99491463 * 2) > (expMass + 5.5) && (fragMass + 15.99491463 * 2) < (expMass + 7.5))) //if match { psm.variants.Add(new Variant(possibleTranslatedSequence + "+DiOxidation", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } else if (((fragMass + 15.99491463 * 3) > (expMass - 9.5) && (fragMass + 15.99491463 * 3) < (expMass - 4.5)) | ((fragMass + 15.99491463 * 3) > (expMass + 5.5) && (fragMass + 15.99491463 * 3) < (expMass + 7.5))) //if match { psm.variants.Add(new Variant(possibleTranslatedSequence + "+TriOxidation", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } //else if ((fragMass + 14.01565) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 14.01565) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match //{ // psm.variants.Add(new Variant(Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); //} else if (((TheoreticalMass + .984016) > (ExperimentalMass - 9.5) && (TheoreticalMass + .98402) < (ExperimentalMass - 4.5)) | ((TheoreticalMass + .98402) > (ExperimentalMass + 5.5) && (TheoreticalMass) < (ExperimentalMass + 7.5))) //if match else if (((fragMass + 42.01056) > (expMass - 9.5) && (fragMass + 42.01056) < (expMass - 4.5)) | ((fragMass + 42.01056) > (expMass + 5.5) && (fragMass + 42.01056) < (expMass + 7.5))) //if match { psm.variants.Add(new Variant(possibleTranslatedSequence + "+Acetylation", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } else if (((fragMass + 79.96633) > (expMass - 9.5) && (fragMass + 79.96633) < (expMass - 4.5)) | ((fragMass + 79.96633) > (expMass + 5.5) && (fragMass + 79.96633) < (expMass + 7.5))) //if match { psm.variants.Add(new Variant(possibleTranslatedSequence + "+Phosphorylation", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } } else { //search for unmodified sequences if ((fragMass) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match, add it! { psm.variants.Add(new Variant(possibleTranslatedSequence, Variant.variantType.UM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } //search for PTMs else if ((fragMass + .984016) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + .98402) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match { psm.variants.Add(new Variant(possibleTranslatedSequence + "+Deamidation", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } else if ((fragMass + 15.99491463) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 15.99491) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match { psm.variants.Add(new Variant(possibleTranslatedSequence + "+Oxidation", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } else if ((fragMass + 15.99491463 * 2) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 15.99491 * 2) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match { psm.variants.Add(new Variant(possibleTranslatedSequence + "+Dioxidation", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } else if ((fragMass + 15.99491463 * 3) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 15.99491 * 3) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match { psm.variants.Add(new Variant(possibleTranslatedSequence + "+Trioxidation", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } //else if ((fragMass + 14.01565) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 14.01565) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match //{ // psm.variants.Add(new Variant(Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); //} else if ((fragMass + 42.01056) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 42.01056) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match { psm.variants.Add(new Variant(possibleTranslatedSequence + "+Acetyl", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } else if ((fragMass + 79.96633) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 79.96633) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match { psm.variants.Add(new Variant(possibleTranslatedSequence + "+Phospho", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } else if ((fragMass + 61.913495) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 61.913495) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match { psm.variants.Add(new Variant(possibleTranslatedSequence + "+Zinc", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } else if ((fragMass + 37.955588) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 37.955588) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match { psm.variants.Add(new Variant(possibleTranslatedSequence + "+Potassium", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } else if ((fragMass + 21.981944) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 21.981944) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match { psm.variants.Add(new Variant(possibleTranslatedSequence + "+Sodium", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } else if ((fragMass + 57.021464) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 57.021464) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match { psm.variants.Add(new Variant(possibleTranslatedSequence + "+Carbamidomethyl", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } else if ((fragMass + 79.956815) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 79.956815) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match { psm.variants.Add(new Variant(possibleTranslatedSequence + "+Sulfonation", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } else if ((fragMass + 14.01565) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 14.01565) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match { psm.variants.Add(new Variant(possibleTranslatedSequence + "+Methyl", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } else if ((fragMass + 28.0313) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 28.0313) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match { psm.variants.Add(new Variant(possibleTranslatedSequence + "+DiMethyl", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } else if ((fragMass + 42.04695) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass + 42.04695) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match { psm.variants.Add(new Variant(possibleTranslatedSequence + "+TriMethyl", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } else if ((fragMass - -17.026549) > (expMass * (1 - precursorMassTolerancePpm / 1000000)) && (fragMass - -17.026549) < (expMass * (1 + precursorMassTolerancePpm / 1000000))) //if match { psm.variants.Add(new Variant(possibleTranslatedSequence + "+Ammonia loss", Variant.variantType.PTM, protein.id, protSeq, protSeq.IndexOf(info.fragFound), info.fragFound.Length)); } } //-17, 22, //update the sequence fragLength++; if (info.parentType.Equals(ParentInfo.terminal.N)) { if (index + fragLength < protSeq.Length) { possibleTranslatedSequence = protSeq.Substring(index, fragLength); fragMass = MassCalculator.MonoIsoptopicMass(possibleTranslatedSequence, out string e2); error_message += e2; } else { fragMass = expMass + 187.079 - 57.021 + 1; } } else { index--; if (index >= 0) { possibleTranslatedSequence = protSeq.Substring(index, fragLength); fragMass = MassCalculator.MonoIsoptopicMass(possibleTranslatedSequence, out string e2); error_message += e2; } else { fragMass = expMass + 187.079 - 57.021 + 1; } } } } } } } } List <Variant> variants = psm.variants; for (int v = variants.Count() - 1; v >= 0; v--) { for (int v2 = 0; v2 < v; v2++) { if (variants[v].pepSeq.Equals(variants[v2].pepSeq) && variants[v].id.Equals(variants[v2].id)) { variants.Remove(variants[v]); v2 = variants.Count(); } } } } }
//method was originally written recursively, but large peptides result in stackoverflow exceptions public void MassMatch(string B, string Y, PSM psm, int BIndex, int YIndex, out string error_message) //this is the workhorse of SpliceFragments { error_message = ""; test = psm.getScan().ToString(); double ExperimentalMass = psm.getExpMass(); string BFrag = IonCrop(B, ExperimentalMass, BIndex, IonType.b, false, out string e4); //returns a B ion sequence that has a mass smaller than the experimental mass by cleaving C term AA //BIndex = B.Length - BFrag.Length; //added 11/8/16 Useful first pass to record how many AA have been cleaved from C term string YFrag = IonCrop(Y, ExperimentalMass, YIndex, IonType.y, false, out string e3); //returns a Y ion sequence that has a mass smaller than the experimental mass by cleaving N term AA //YIndex = Y.Length - YFrag.Length; //added 11/8/16 Useful first pass to record how many AA have been cleaved from N term double TheoreticalMass = MassCalculator.MonoIsoptopicMass(BFrag, out string e) + MassCalculator.MonoIsoptopicMass(YFrag, out string e2) - Constants.WATER_MONOISOTOPIC_MASS + fixedModMass; //water added once in b and once in y error_message += e3 + e4 + e + e2; //add PTM masses foreach (PTM ptm in psm.getNInfo().getPTMs()) { if (ptm.index < BFrag.Length) { TheoreticalMass += ptm.mass; } } foreach (PTM ptm in psm.getCInfo().getPTMs()) { if (Y.Length - ptm.index < YFrag.Length) { TheoreticalMass += ptm.mass; } } if (YFrag.Length < ionsUsedMassVer) //If the number of AA from the C-term peptide is less than desired amount, end recursion. { //we're done } else if (BFrag.Length < ionsUsedMassVer) //If the number of AA from the N-term peptide is less than desired amount, start over loop and remove a single aa from the C-term { // MassMatch(B, Y, psm, 0, YIndex+1); } //if match //bool elif = true; //"else if" where not a match==true else if (FalsePositives.generateDecoys) { //else if (((TheoreticalMass - Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS * 7) > (ExperimentalMass - 1 * Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS) && (TheoreticalMass - Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS * 7) < (ExperimentalMass + 1 * Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS)) | ((TheoreticalMass + Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS * 7) > (ExperimentalMass - 1 * Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS) && (TheoreticalMass + Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS * 7) < (ExperimentalMass + 1 * Constants.PEPTIDE_N_TERMINAL_MONOISOTOPIC_MASS)))//if match //if ((TheoreticalMass) > (ExperimentalMass+ i -PrecursorMassToleranceDa) && (TheoreticalMass) < (ExperimentalMass+i +PrecursorMassToleranceDa)) //if match if (((TheoreticalMass) > (ExperimentalMass - 9.5) && (TheoreticalMass) < (ExperimentalMass - 4.5)) | ((TheoreticalMass) > (ExperimentalMass + 5.5) && (TheoreticalMass) < (ExperimentalMass + 7.5)))//if match //if ((TheoreticalMass) > (ExperimentalMass+ i -PrecursorMassToleranceDa) && (TheoreticalMass) < (ExperimentalMass+i +PrecursorMassToleranceDa)) //if match //else if(Math.Abs(ExperimentalMass - TheoreticalMass)<40 && (ExperimentalMass - TheoreticalMass)-Math.Floor(ExperimentalMass-TheoreticalMass)>0.3 && (ExperimentalMass - TheoreticalMass) - Math.Floor(ExperimentalMass - TheoreticalMass) < 0.8) { // elif = false; bool previouslyFound = false; foreach (FusionCandidate oldCandidate in psm.getFusionCandidates()) { if ((BFrag + YFrag).Equals(oldCandidate.seq)) //see if that sequence was already recorded { previouslyFound = true; } } if (!previouslyFound) //if fusion sequence was not previously assigned to this psm { FusionCandidate candidate = new FusionCandidate(BFrag + YFrag); psm.addFusionCandidate(candidate); // MassMatch(B, Y, psm, BIndex + 1, YIndex); } } } else { if ((TheoreticalMass) > (ExperimentalMass * (1 - FalsePositives.precursorMassTolerancePpm / 1000000)) && (TheoreticalMass) < (ExperimentalMass * (1 + FalsePositives.precursorMassTolerancePpm / 1000000))) //if match //if ((TheoreticalMass) > (ExperimentalMass+ i -PrecursorMassToleranceDa) && (TheoreticalMass) < (ExperimentalMass+i +PrecursorMassToleranceDa)) //if match { // elif = false; bool previouslyFound = false; foreach (FusionCandidate oldCandidate in psm.getFusionCandidates()) { if ((BFrag + YFrag).Equals(oldCandidate.seq)) //see if that sequence was already recorded { previouslyFound = true; } } if (!previouslyFound) //if fusion sequence was not previously assigned to this psm { FusionCandidate candidate = new FusionCandidate(BFrag + YFrag); psm.addFusionCandidate(candidate); // MassMatch(B, Y, psm, BIndex + 1, YIndex); } } } // if(elif) //not a match { /* if (TheoreticalMass < ExperimentalMass && BIndex == 0) //first pass, theo less than exp and can't take away more ions * { * //we're done * } * else * { * if (TheoreticalMass < ExperimentalMass) //if b out of ions, but y not, crop off a y and start again * { * BIndex = 0; * YIndex++; * MassMatch(B,Y, psm, BIndex, YIndex); * } * else * { //crop off a b ion * MassMatch(B, Y, psm, BIndex + 1, YIndex); * } * }*/ } }
public bool GeneratePossibleSequences(PSM psm, out string error_message) //returns false if over the specified number of sequences are generated { error_message = ""; List <string> foundSeq = new List <string>(); //get list of all FP sequences foreach (FusionCandidate fusionCandidate in psm.getFusionCandidates()) { findIons(fusionCandidate, psm, out string error_message1); //populate the foundIons array error_message += error_message1; foundSeq.Add(fusionCandidate.seq); } bool done = false; int globalIndex = 0; while (!done) { done = true; //let's assume we're done and correct it later if we're not if (psm.getFusionCandidates().Count() > maxNumPossibleSequences) //if there are more than a set number of possible sequences, this is junk and we are not searching them all { return(false); } for (int fc = 0; fc < psm.getFusionCandidates().Count(); fc++) { FusionCandidate fusionCandidate = psm.getFusionCandidates()[fc]; if (fusionCandidate.getFoundIons().Count() > globalIndex) //prevent crashing, use to tell when done by hitting end of fc { List <FusionCandidate> tempCandidates = new List <FusionCandidate>(); //fill with possible sequences done = false; //We're not done, because at least one fusion candidate sequence length is still greater than the global index string fusionSeq = fusionCandidate.seq; bool[] IonFound = fusionCandidate.getFoundIons(); if (IonFound[globalIndex]) //only look for ambiguity if a peak was found to provide the stop point. { int mostRecent = -1; //most recent Ion found prior to this one (start point) for (int i = 0; i < globalIndex; i++) //identify start point { if (IonFound[i]) { mostRecent = i; //save most recent hit, exclusive of the current index } } string ambiguousFrag = fusionSeq.Substring(mostRecent + 1, globalIndex - mostRecent); double key = MassCalculator.MonoIsoptopicMass(ambiguousFrag, out string error_message2); error_message += error_message2; List <string> combinations = new List <string>(); double closestPeak = double.NaN; var ipos = Array.BinarySearch(keys, key); if (ipos < 0) { ipos = ~ipos; } if (ipos > 0) { var downIpos = ipos - 1; // Try down while (downIpos >= 0) { closestPeak = keys[downIpos]; if (closestPeak > key - productMassToleranceDa && closestPeak < key + productMassToleranceDa) { string[] value; if (massDict.TryGetValue(closestPeak, out value)) { foreach (string frag in value) { combinations.Add(frag); } } } else { break; } downIpos--; } } if (ipos < keys.Length) { var upIpos = ipos; // Try here and up while (upIpos < keys.Length) { closestPeak = keys[upIpos]; if (closestPeak > key - productMassToleranceDa && closestPeak < key + productMassToleranceDa) { string[] value; if (massDict.TryGetValue(closestPeak, out value)) { foreach (string frag in value) { combinations.Add(frag); } } } else { break; } upIpos++; } } foreach (string str in combinations) { string nTermSeq = fusionSeq.Substring(0, mostRecent + 1); string cTermSeq = fusionSeq.Substring(globalIndex + 1, fusionSeq.Length - globalIndex - 1); string novelSeq = nTermSeq + str + cTermSeq; FusionCandidate tempCandidate = new FusionCandidate(novelSeq); tempCandidates.Add(tempCandidate); } } foreach (FusionCandidate newfc in tempCandidates) { if (!foundSeq.Contains(newfc.seq)) //if new FP sequence, add it. { foundSeq.Add(newfc.seq); findIons(newfc, psm, out string error_message3); error_message += error_message3; psm.getFusionCandidates().Add(newfc); } } } } globalIndex++; if (psm.getFusionCandidates().Count() > maxNumPossibleSequences) { return(false); } } return(true); }
//use ion hits to know where peaks have been found by morpheus and where there is ambiguity public static void findIons(FusionCandidate fusionCandidate, PSM psm, out string error_message) { error_message = ""; double[] nPeaks = psm.getNInfo().getPeakHits(); //get peaks double[] cPeaks = psm.getCInfo().getPeakHits(); fusionCandidate.makeFoundIons(); string candSeq = fusionCandidate.seq; bool[] foundIons = fusionCandidate.getFoundIons(); //find which aa have peaks for (int i = 0; i < foundIons.Count() - 1; i++) { //B IONS// if (ionsUsed.Contains(IonType.b)) { double bTheoMass = MassCalculator.MonoIsoptopicMass(candSeq.Substring(0, 1 + i), out string error_message2) - Constants.WATER_MONOISOTOPIC_MASS; error_message += error_message2; foreach (PTM ptm in psm.getNInfo().getPTMs()) { if (ptm.index <= i) { bTheoMass += ptm.mass; } } foreach (double expPeak in nPeaks) { if (expPeak > bTheoMass - productMassToleranceDa && expPeak < bTheoMass + productMassToleranceDa) { foundIons[i] = true; } } } //Y IONS// if (ionsUsed.Contains(IonType.y)) { double yTheoMass = MassCalculator.MonoIsoptopicMass(candSeq.Substring(candSeq.Length - 1 - i, i + 1), out string error_message3); error_message += error_message3; foreach (PTM ptm in psm.getCInfo().getPTMs()) { if (ptm.index >= candSeq.Length - 2 - i) { yTheoMass += ptm.mass; } } foreach (double expPeak in cPeaks) { if (expPeak > yTheoMass - productMassToleranceDa && expPeak < yTheoMass + productMassToleranceDa) { foundIons[foundIons.Count() - 2 - i] = true; } } } //C IONS// if (ionsUsed.Contains(IonType.c)) { double cTheoMass = MassCalculator.MonoIsoptopicMass(candSeq.Substring(0, 1 + i), out string error_message4) - Constants.WATER_MONOISOTOPIC_MASS + Constants.nitrogenMonoisotopicMass + 3 * Constants.hydrogenMonoisotopicMass; error_message += error_message4; foreach (PTM ptm in psm.getNInfo().getPTMs()) { if (ptm.index <= i) { cTheoMass += ptm.mass; } } foreach (double expPeak in nPeaks) { if (expPeak > cTheoMass - productMassToleranceDa && expPeak < cTheoMass + productMassToleranceDa) { foundIons[i] = true; } } } //ZDOT IONS// if (ionsUsed.Contains(IonType.zdot)) { double zdotTheoMass = MassCalculator.MonoIsoptopicMass(candSeq.Substring(candSeq.Length - 1 - i, i + 1), out string error_message5) - Constants.nitrogenMonoisotopicMass - 2 * Constants.hydrogenMonoisotopicMass; error_message += error_message5; foreach (PTM ptm in psm.getCInfo().getPTMs()) { if (ptm.index >= candSeq.Length - 2 - i) { zdotTheoMass += ptm.mass; } } foreach (double expPeak in cPeaks) { if (expPeak > zdotTheoMass - productMassToleranceDa && expPeak < zdotTheoMass + productMassToleranceDa) { foundIons[foundIons.Count() - 2 - i] = true; } } } } //foundIons[0] = true; //AspN always starts with a D foundIons[foundIons.Count() - 1] = true;//A|B|C|D|E|F|K| where the whole peptide peak is always placed arbitrarly at the c term }