public void IdentifyAA(double v) { var ok = new HashSet <string>(); for (char c = 'A'; c <= 'Z'; c++) { if (Residue.TryGetResidue(c, out Residue residue)) { if (Math.Abs(residue.MonoisotopicMass - MassShift) <= v) { ok.Add("Add " + residue.Name); } if (Math.Abs(residue.MonoisotopicMass + MassShift) <= v) { ok.Add("Remove " + residue.Name); } for (char cc = 'A'; cc <= 'Z'; cc++) { if (Residue.TryGetResidue(cc, out Residue residueCC)) { if (Math.Abs(residueCC.MonoisotopicMass + residue.MonoisotopicMass - MassShift) <= v) { ok.Add("Add (" + residue.Name + "+" + residueCC.Name + ")"); } if (Math.Abs(residueCC.MonoisotopicMass + residue.MonoisotopicMass + MassShift) <= v) { ok.Add("Remove (" + residue.Name + "+" + residueCC.Name + ")"); } } } } } AA = string.Join("|", ok); }
public static string SanitizeAminoAcidSequence(string originalSequence, char replacementCharacter) { string cleaned = UnicodeRegex.Replace(originalSequence, replacementCharacter.ToString()); for (int r = 0; r < cleaned.Length; r++) { if (!Residue.TryGetResidue(cleaned[r], out Residue res)) { cleaned = cleaned.Replace(cleaned[r], replacementCharacter); } } return(cleaned); }
public static void TestCustomAminoAcidReading() { string aminoAcidPath = Path.Combine(GlobalVariables.DataDir, @"CustomAminoAcids", @"CustomAminoAcids.txt"); //Manually add an entry to it List <string> lines = new List <string>(File.ReadAllLines(aminoAcidPath)); lines.Add("fake\tf\t60\tC5"); File.WriteAllLines(aminoAcidPath, lines); GlobalVariables.RefreshAminoAcidDictionary(); //read the file //test that we read the new amino acid Assert.IsTrue(Residue.TryGetResidue('f', out Residue r)); Assert.IsTrue(r.MonoisotopicMass.Equals(60)); //now crash it intentionally with an invalid character lines.Add("evenFaker\tX\t72\tC6"); File.WriteAllLines(aminoAcidPath, lines); try { GlobalVariables.RefreshAminoAcidDictionary(); //read the file //we're trying to crash it, so if we didn't, we failed :/ Assert.IsTrue(false); } catch (MetaMorpheusException) { //Yay we passed! } //now crash it intentionally with a bad chemical formula lines.RemoveAt(lines.Count - 1); //get rid of that last bad one lines.Add("theFakest\ta\t50\t"); File.WriteAllLines(aminoAcidPath, lines); try { GlobalVariables.RefreshAminoAcidDictionary(); //read the file //we're trying to crash it, so if we didn't, we failed :/ Assert.IsTrue(false); } catch (MetaMorpheusException) { //Yay we passed! } //Delete so it doesn't crash the next time File.Delete(aminoAcidPath); }
private void AminoAcidLookup(object sender, System.Windows.Controls.TextChangedEventArgs e) { System.Windows.Controls.TextBox textBox = ((System.Windows.Controls.TextBox)sender); char letter = textBox.Text.ToUpper().FirstOrDefault(); if (letter == 0) { C12.IsEnabled = false; C13.IsEnabled = false; N14.IsEnabled = false; N15.IsEnabled = false; O16.IsEnabled = false; O18.IsEnabled = false; H1.IsEnabled = false; H2.IsEnabled = false; S32.IsEnabled = false; S34.IsEnabled = false; } else if (textBox.Text.Length != 1) //if too long, don't let them add stuff and don't change anything { textBox.Text = textBox.Text[0].ToString(); } else if (Residue.TryGetResidue(letter, out Residue residue)) { AminoAcid = letter; AminoAcidMonoisotopicMass = residue.MonoisotopicMass; ChemicalFormula.Text = residue.ThisChemicalFormula.Formula; ParseChemicalFormula(); MassDifference.Text = "0.000"; C12.IsEnabled = true; C13.IsEnabled = true; N14.IsEnabled = true; N15.IsEnabled = true; O16.IsEnabled = true; O18.IsEnabled = true; H1.IsEnabled = true; H2.IsEnabled = true; S32.IsEnabled = true; S34.IsEnabled = true; } else { MessageBox.Show('"' + letter.ToString() + '"' + " is not a valid amino acid. Please enter a single amino acid letter."); txtBoxAminoAcidLookup.Text = ""; } }
public static void RefreshAminoAcidDictionary() { //read in all the amino acids (they already exist in mzlib, but there might be synthetic amino acids that need to be included) string aminoAcidPath = Path.Combine(DataDir, @"CustomAminoAcids", @"CustomAminoAcids.txt"); if (File.Exists(aminoAcidPath)) //if it already exists { string[] aminoAcidLines = File.ReadAllLines(aminoAcidPath); List <Residue> residuesToAdd = new List <Residue>(); for (int i = 1; i < aminoAcidLines.Length; i++) { string[] line = aminoAcidLines[i].Split('\t').ToArray(); //tsv Name, one letter, monoisotopic, chemical formula if (line.Length >= 4) //check something is there (not a blank line) { char letter = line[1][0]; if (InvalidAminoAcids.Contains(letter)) { throw new ProteaseGuruException("Error while reading 'CustomAminoAcids.txt'. Line " + (i + 1).ToString() + " contains an invalid amino acid. (Ex: " + string.Join(", ", InvalidAminoAcids.Select(x => x.ToString())) + ")"); } try { ChemicalFormula formula = ChemicalFormula.ParseFormula(line[3]); //if it doesn't already exist or it does exist but has a different mass, add the entry if (!(Residue.TryGetResidue(letter, out Residue residue)) || !(formula.Formula.Equals(residue.ThisChemicalFormula.Formula))) { residuesToAdd.Add(new Residue(line[0], letter, line[1], formula, ModificationSites.Any)); } } catch { throw new ProteaseGuruException("Error while reading 'CustomAminoAcids.txt'. Line " + (i + 1).ToString() + " was not in the correct format."); } } } Residue.AddNewResiduesToDictionary(residuesToAdd); } else //create it so that it can be manipulated { WriteAminoAcidsFile(); } }
public static void WriteAminoAcidsFile() { string directory = Path.Combine(DataDir, @"CustomAminoAcids"); if (!Directory.Exists(directory)) { Directory.CreateDirectory(directory); } string aminoAcidPath = Path.Combine(DataDir, @"CustomAminoAcids", @"CustomAminoAcids.txt"); List <string> linesToWrite = new List <string> { "Name\tOneLetterAbbr.\tMonoisotopicMass\tChemicalFormula" }; for (char letter = 'A'; letter <= 'Z'; letter++) //just the basic residues { if (Residue.TryGetResidue(letter, out Residue residue)) { linesToWrite.Add(residue.Name + '\t' + residue.Letter.ToString() + '\t' + residue.MonoisotopicMass.ToString() + '\t' + residue.ThisChemicalFormula.Formula); } } File.WriteAllLines(aminoAcidPath, linesToWrite.ToArray()); }
public static (SilacLabel updatedLabel, char nextHeavyLabel) UpdateAminoAcidLabel(SilacLabel currentLabel, char heavyLabel) { //make sure we're not overwriting something. , , and if it's a valid residue (not a motif/delimiter) while ((Residue.TryGetResidue(heavyLabel, out Residue residue) && //Check if the amino acid exists. If it already exists, we don't want to overwrite it !residue.ThisChemicalFormula.Formula.Equals(currentLabel.LabelChemicalFormula)) || //if it exists but it's already the label (so we're not overwriting anything), then we're fine GlobalVariables.InvalidAminoAcids.Contains(heavyLabel)) //If it didn't already exist, but it's invalid, we need to keep going { heavyLabel++; } SilacLabel updatedLabel = AssignValidHeavyCharacter(currentLabel, heavyLabel); heavyLabel++; if (currentLabel.AdditionalLabels != null) { foreach (SilacLabel additionalLabel in currentLabel.AdditionalLabels) { updatedLabel.AddAdditionalSilacLabel(AssignValidHeavyCharacter(additionalLabel, heavyLabel)); heavyLabel++; } } return(updatedLabel, heavyLabel); }
private void SaveCustomAminoAcid_Click(object sender, RoutedEventArgs e) { //VALIDATE INPUT if (AminoAcidTextBox.Text.Length == 0) { MessageBox.Show("Please specify the character that represents a synthetic amino acid in the database"); return; } char aminoAcidLetter = AminoAcidTextBox.Text.First(); ChemicalFormula formula; try { formula = ChemicalFormula.ParseFormula(ChemicalFormulaTextBox.Text); } catch { MessageBox.Show("The checmical formula '" + ChemicalFormulaTextBox.Text + "' could not be parsed. Please try again."); return; } if (GlobalVariables.InvalidAminoAcids.Contains(aminoAcidLetter)) { MessageBox.Show("The amino acid '" + aminoAcidLetter + "' cannot be assigned. " + "\nThis character is used for modification motifs or as result delimiters. " + "\nThe following amino acids are not allowed:" + string.Join(", ", GlobalVariables.InvalidAminoAcids.Select(x => x.ToString())) + ")"); return; } //check if the specified amino acid already exists if (Residue.TryGetResidue(aminoAcidLetter, out Residue residue)) { MessageBox.Show("The amino acid '" + aminoAcidLetter + "' already exists." + "\nMonoisotopic Mass: " + residue.MonoisotopicMass + "\nChemical Formula: " + residue.ThisChemicalFormula.Formula + "\n\nYou may overwrite this amino acid by manually deleting/modifying the current entry. " + "\nThis can be done in MetaMorpheus by navigating to 'Data' in the top-left corner, " + "selecting 'Open folder with mods/data files' from the drop down menu, " + "opening the folder 'CustomAminoAcids', and opening the file 'CustomAminoAcids.txt." + "\nMetaMorpheus will need to be restarted for these changes to take effect." + "\n\nAmino acids can be reset to their default values by deleting the file 'CustomAminoAcids.txt' and restarting MetaMorpheus."); return; } //Alright, the entry is valid //Append the entry to the CustomAminoAcids.txt file string aminoAcidDirectory = Path.Combine(GlobalVariables.DataDir, @"CustomAminoAcids"); string customAminoAcidPath = Path.Combine(aminoAcidDirectory, @"CustomAminoAcids.txt"); //check that the file exists and create it if it doesn't if (!File.Exists(customAminoAcidPath)) { GlobalVariables.WriteAminoAcidsFile(); } //save it in the amino acid file List <string> customAminoAcidsText = File.ReadAllLines(customAminoAcidPath).ToList(); customAminoAcidsText.Add(AminoAcidTextBox.Text + '\t' + aminoAcidLetter + '\t' + formula.MonoisotopicMass.ToString() + '\t' + formula.Formula); //tsv Name, one letter, monoisotopic, chemical formula File.WriteAllLines(customAminoAcidPath, customAminoAcidsText); //add the mod to the residue dictionary Residue.AddNewResiduesToDictionary(new List <Residue> { new Residue(AminoAcidTextBox.Text, aminoAcidLetter, AminoAcidTextBox.Text, formula, ModificationSites.Any) }); MessageBox.Show("Success! Amino Acid '" + aminoAcidLetter + "' has been added to the dictionary." + "\nMonoisotopic Mass: " + formula.MonoisotopicMass.ToString() + "\nChemical Formula: " + formula.Formula); DialogResult = true; }
/// <summary> /// Generates theoretical fragments for given dissociation type for this peptide. /// The "products" parameter is filled with these fragments. /// </summary> public void Fragment(DissociationType dissociationType, FragmentationTerminus fragmentationTerminus, List <Product> products) { // This code is specifically written to be memory- and CPU -efficient because it is // called millions of times for a typical search (i.e., at least once per peptide). // If you modify this code, BE VERY CAREFUL about allocating new memory, especially // for new collections. This code also deliberately avoids using "yield return", again // for performance reasons. Be sure to benchmark any changes with a parallelized // fragmentation of every peptide in a database (i.e., test for speed decreases and // memory issues). products.Clear(); var massCaps = DissociationTypeCollection.GetNAndCTerminalMassShiftsForDissociationType(dissociationType); double cTermMass = 0; double nTermMass = 0; List <ProductType> nTermProductTypes = DissociationTypeCollection.GetTerminusSpecificProductTypesFromDissociation(dissociationType, FragmentationTerminus.N); List <ProductType> cTermProductTypes = DissociationTypeCollection.GetTerminusSpecificProductTypesFromDissociation(dissociationType, FragmentationTerminus.C); bool calculateNTermFragments = fragmentationTerminus == FragmentationTerminus.N || fragmentationTerminus == FragmentationTerminus.Both; bool calculateCTermFragments = fragmentationTerminus == FragmentationTerminus.C || fragmentationTerminus == FragmentationTerminus.Both; //From http://www.matrixscience.com/help/fragmentation_help.html //Low Energy CID -- In low energy CID(i.e.collision induced dissociation in a triple quadrupole or an ion trap) a peptide carrying a positive charge fragments mainly along its backbone, //generating predominantly b and y ions. In addition, for fragments containing RKNQ, peaks are seen for ions that have lost ammonia (-17 Da) denoted a*, b* and y*. For fragments containing //STED, loss of water(-18 Da) is denoted a°, b° and y°. Satellite ions from side chain cleavage are not observed. bool haveSeenNTermDegreeIon = false; bool haveSeenNTermStarIon = false; bool haveSeenCTermDegreeIon = false; bool haveSeenCTermStarIon = false; // these two collections keep track of the neutral losses observed so far on the n-term or c-term. // they are apparently necessary, but allocating memory for collections in this function results in // inefficient memory usage and thus frequent garbage collection. // TODO: If you can think of a way to remove these collections and still maintain correct // fragmentation, please do so. HashSet <double> nTermNeutralLosses = null; HashSet <double> cTermNeutralLosses = null; // n-terminus mod if (calculateNTermFragments) { if (AllModsOneIsNterminus.TryGetValue(1, out Modification mod)) { nTermMass += mod.MonoisotopicMass.Value; } } // c-terminus mod if (calculateCTermFragments) { if (AllModsOneIsNterminus.TryGetValue(BaseSequence.Length + 2, out Modification mod)) { cTermMass += mod.MonoisotopicMass.Value; } } for (int r = 0; r < BaseSequence.Length - 1; r++) { // n-term fragments if (calculateNTermFragments) { char nTermResidue = BaseSequence[r]; // get n-term residue mass if (Residue.TryGetResidue(nTermResidue, out Residue residue)) { nTermMass += residue.MonoisotopicMass; } else { nTermMass = double.NaN; } // add side-chain mod if (AllModsOneIsNterminus.TryGetValue(r + 2, out Modification mod)) { nTermMass += mod.MonoisotopicMass.Value; } // handle star and degree ions for low-res CID if (dissociationType == DissociationType.LowCID) { if (nTermResidue == 'R' || nTermResidue == 'K' || nTermResidue == 'N' || nTermResidue == 'Q') { haveSeenNTermStarIon = true; } if (nTermResidue == 'S' || nTermResidue == 'T' || nTermResidue == 'E' || nTermResidue == 'D') { haveSeenNTermDegreeIon = true; } } // skip first N-terminal fragment (b1, aDegree1, ...) for CID if (r == 0 && (dissociationType == DissociationType.CID || dissociationType == DissociationType.LowCID)) { goto CTerminusFragments; } // generate products for (int i = 0; i < nTermProductTypes.Count; i++) { if (dissociationType == DissociationType.LowCID) { if (!haveSeenNTermStarIon && (nTermProductTypes[i] == ProductType.aStar || nTermProductTypes[i] == ProductType.bStar)) { continue; } if (!haveSeenNTermDegreeIon && (nTermProductTypes[i] == ProductType.aDegree || nTermProductTypes[i] == ProductType.bDegree)) { continue; } } products.Add(new Product( nTermProductTypes[i], FragmentationTerminus.N, nTermMass + massCaps.Item1[i], r + 1, r + 1, 0)); if (mod != null && mod.NeutralLosses != null && mod.NeutralLosses.TryGetValue(dissociationType, out List <double> neutralLosses)) { foreach (double neutralLoss in neutralLosses.Where(p => p != 0)) { if (nTermNeutralLosses == null) { nTermNeutralLosses = new HashSet <double>(); } nTermNeutralLosses.Add(neutralLoss); } } if (nTermNeutralLosses != null) { foreach (double neutralLoss in nTermNeutralLosses) { products.Add(new Product( nTermProductTypes[i], FragmentationTerminus.N, nTermMass + massCaps.Item1[i] - neutralLoss, r + 1, r + 1, neutralLoss)); } } } } // c-term fragments CTerminusFragments: if (calculateCTermFragments) { char cTermResidue = BaseSequence[BaseSequence.Length - r - 1]; // get c-term residue mass if (Residue.TryGetResidue(cTermResidue, out Residue residue)) { cTermMass += residue.MonoisotopicMass; } else { cTermMass = double.NaN; } // add side-chain mod if (AllModsOneIsNterminus.TryGetValue(BaseSequence.Length - r + 1, out Modification mod)) { cTermMass += mod.MonoisotopicMass.Value; } // handle star and degree ions for low-res CID if (dissociationType == DissociationType.LowCID) { if (cTermResidue == 'R' || cTermResidue == 'K' || cTermResidue == 'N' || cTermResidue == 'Q') { haveSeenCTermStarIon = true; } if (cTermResidue == 'S' || cTermResidue == 'T' || cTermResidue == 'E' || cTermResidue == 'D') { haveSeenCTermDegreeIon = true; } } // generate products for (int i = 0; i < cTermProductTypes.Count; i++) { // skip zDot ions for proline residues for ETD/ECD/EThcD if (cTermResidue == 'P' && (dissociationType == DissociationType.ECD || dissociationType == DissociationType.ETD || dissociationType == DissociationType.EThcD) && cTermProductTypes[i] == ProductType.zDot) { continue; } if (dissociationType == DissociationType.LowCID) { if (!haveSeenCTermStarIon && cTermProductTypes[i] == ProductType.yStar) { continue; } if (!haveSeenCTermDegreeIon && cTermProductTypes[i] == ProductType.yDegree) { continue; } } products.Add(new Product( cTermProductTypes[i], FragmentationTerminus.C, cTermMass + massCaps.Item2[i], r + 1, BaseSequence.Length - r, 0)); if (mod != null && mod.NeutralLosses != null && mod.NeutralLosses.TryGetValue(dissociationType, out List <double> neutralLosses)) { foreach (double neutralLoss in neutralLosses.Where(p => p != 0)) { if (cTermNeutralLosses == null) { cTermNeutralLosses = new HashSet <double>(); } cTermNeutralLosses.Add(neutralLoss); } } if (cTermNeutralLosses != null) { foreach (double neutralLoss in cTermNeutralLosses) { products.Add(new Product( cTermProductTypes[i], FragmentationTerminus.C, cTermMass + massCaps.Item2[i] - neutralLoss, r + 1, BaseSequence.Length - r, neutralLoss)); } } } } } // zDot generates one more ion... if (cTermProductTypes.Contains(ProductType.zDot) && BaseSequence[0] != 'P') { // get c-term residue mass if (Residue.TryGetResidue(BaseSequence[0], out Residue residue)) { cTermMass += residue.MonoisotopicMass; } else { cTermMass = double.NaN; } // add side-chain mod if (AllModsOneIsNterminus.TryGetValue(1, out Modification mod)) { cTermMass += mod.MonoisotopicMass.Value; } // generate zDot product products.Add(new Product( ProductType.zDot, FragmentationTerminus.C, cTermMass + DissociationTypeCollection.GetMassShiftFromProductType(ProductType.zDot), BaseSequence.Length, 1, 0)); if (mod != null && mod.NeutralLosses != null && mod.NeutralLosses.TryGetValue(dissociationType, out List <double> neutralLosses)) { foreach (double neutralLoss in neutralLosses.Where(p => p != 0)) { products.Add(new Product( ProductType.zDot, FragmentationTerminus.C, cTermMass + DissociationTypeCollection.GetMassShiftFromProductType(ProductType.zDot) - neutralLoss, BaseSequence.Length, 1, neutralLoss)); } } } foreach (var mod in AllModsOneIsNterminus.Where(p => p.Value.NeutralLosses != null)) { // molecular ion minus neutral losses if (mod.Value.NeutralLosses.TryGetValue(dissociationType, out List <double> losses)) { foreach (double neutralLoss in losses.Where(p => p != 0)) { if (neutralLoss != 0) { products.Add(new Product(ProductType.M, FragmentationTerminus.Both, MonoisotopicMass - neutralLoss, 0, 0, neutralLoss)); } } } } // generate diagnostic ions // TODO: this code is memory-efficient but sort of CPU inefficient; it can be further optimized. // however, diagnostic ions are fairly rare so it's probably OK for now foreach (double diagnosticIon in AllModsOneIsNterminus.Where(p => p.Value.DiagnosticIons != null && p.Value.DiagnosticIons.ContainsKey(dissociationType)).SelectMany(p => p.Value.DiagnosticIons[dissociationType]).Distinct()) { int diagnosticIonLabel = (int)Math.Round(diagnosticIon.ToMz(1), 0); // the diagnostic ion is assumed to be annotated in the mod info as the *neutral mass* of the diagnostic ion, not the ionized species products.Add(new Product(ProductType.D, FragmentationTerminus.Both, diagnosticIon, diagnosticIonLabel, 0, 0)); } }
public void GetResidueNotInDictionary() { Assert.IsFalse(Residue.TryGetResidue("?", out Residue r)); Assert.IsFalse(Residue.TryGetResidue('?', out r)); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList) { if (SearchParameters.DoQuantification) { // disable quantification if a .mgf is being used if (currentRawFileList.Any(x => Path.GetExtension(x).Equals(".mgf", StringComparison.OrdinalIgnoreCase))) { SearchParameters.DoQuantification = false; } //if we're doing SILAC, add the silac labels to the residue dictionary else if (SearchParameters.SilacLabels != null) { //change the silac residues to lower case amino acids (currently null) List <SilacLabel> updatedLabels = new List <SilacLabel>(); char heavyLabel = 'a'; for (int i = 0; i < SearchParameters.SilacLabels.Count; i++) { SilacLabel currentLabel = SearchParameters.SilacLabels[i]; //make sure we're not overwriting something. , , and if it's a valid residue (not a motif/delimiter) while ((Residue.TryGetResidue(heavyLabel, out Residue residue) && //Check if the amino acid exists. If it already exists, we don't want to overwrite it !residue.ThisChemicalFormula.Formula.Equals(currentLabel.LabelChemicalFormula)) || //if it exists but it's already the label (so we're not overwriting anything), then we're fine GlobalVariables.InvalidAminoAcids.Contains(heavyLabel)) //If it didn't already exist, but it's invalid, we need to keep going { heavyLabel++; } SilacLabel updatedLabel = SilacConversions.AssignValidHeavyCharacter(currentLabel, heavyLabel); heavyLabel++; if (currentLabel.AdditionalLabels != null) { foreach (SilacLabel additionalLabel in currentLabel.AdditionalLabels) { updatedLabel.AddAdditionalSilacLabel(SilacConversions.AssignValidHeavyCharacter(additionalLabel, heavyLabel)); heavyLabel++; } } updatedLabels.Add(updatedLabel); } SearchParameters.SilacLabels = updatedLabels; } } //if no quant, remove any silac labels that may have been added, because they screw up downstream analysis if (!SearchParameters.DoQuantification) //using "if" instead of "else", because DoQuantification can change if it's an mgf { SearchParameters.SilacLabels = null; } LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes); // load proteins List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, SearchParameters.SearchTarget, SearchParameters.DecoyType, localizeableModificationTypes, CommonParameters); // write prose settings ProseCreatedWhileRunning.Append("The following search settings were used: "); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ? "maximum peptide length = unspecified; " : "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif)) + "; "); ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; "); ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; "); ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; "); ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; "); ProseCreatedWhileRunning.Append("report PSM ambiguity = " + CommonParameters.ReportAllAmbiguity + ". "); ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. "); // start the search task MyTaskResults = new MyTaskResults(this); List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>(); //generate an array to store category specific fdr values (for speedy semi/nonspecific searches) int numFdrCategories = (int)(Enum.GetValues(typeof(FdrCategory)).Cast <FdrCategory>().Last() + 1); //+1 because it starts at zero List <PeptideSpectralMatch>[] allCategorySpecificPsms = new List <PeptideSpectralMatch> [numFdrCategories]; for (int i = 0; i < numFdrCategories; i++) { allCategorySpecificPsms[i] = new List <PeptideSpectralMatch>(); } FlashLfqResults flashLfqResults = null; MyFileManager myFileManager = new MyFileManager(SearchParameters.DisposeOfFileWhenDone); var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b)); int completedFiles = 0; object indexLock = new object(); object psmLock = new object(); Status("Searching files...", taskId); Status("Searching files...", new List <string> { taskId, "Individual Spectra Files" }); Dictionary <string, int[]> numMs2SpectraPerFile = new Dictionary <string, int[]>(); for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++) { if (GlobalVariables.StopLoops) { break; } var origDataFile = currentRawFileList[spectraFileIndex]; // mark the file as in-progress StartingDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); MassDiffAcceptor massDiffAcceptor = GetMassDiffAcceptor(combinedParams.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac); var thisId = new List <string> { taskId, "Individual Spectra Files", origDataFile }; NewCollection(Path.GetFileName(origDataFile), thisId); Status("Loading spectra file...", thisId); MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams); Status("Getting ms2 scans...", thisId); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray(); numMs2SpectraPerFile.Add(Path.GetFileNameWithoutExtension(origDataFile), new int[] { myMsDataFile.GetAllScansList().Count(p => p.MsnOrder == 2), arrayOfMs2ScansSortedByMass.Length }); myFileManager.DoneWithFile(origDataFile); PeptideSpectralMatch[] fileSpecificPsms = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length]; // modern search if (SearchParameters.SearchType == SearchType.Modern) { for (int currentPartition = 0; currentPartition < combinedParams.TotalPartitions; currentPartition++) { List <PeptideWithSetModifications> peptideIndex = null; List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count / combinedParams.TotalPartitions)); Status("Getting fragment dictionary...", new List <string> { taskId }); var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, SearchParameters.SilacLabels, currentPartition, SearchParameters.DecoyType, combinedParams, SearchParameters.MaxFragmentSize, false, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), new List <string> { taskId }); List <int>[] fragmentIndex = null; List <int>[] precursorIndex = null; lock (indexLock) { GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId); } Status("Searching files...", taskId); new ModernSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, currentPartition, combinedParams, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run(); ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + combinedParams.TotalPartitions + "!", thisId)); if (GlobalVariables.StopLoops) { break; } } } // nonspecific search else if (SearchParameters.SearchType == SearchType.NonSpecific) { PeptideSpectralMatch[][] fileSpecificPsmsSeparatedByFdrCategory = new PeptideSpectralMatch[numFdrCategories][]; //generate an array of all possible locals for (int i = 0; i < numFdrCategories; i++) //only add if we're using for FDR, else ignore it as null. { fileSpecificPsmsSeparatedByFdrCategory[i] = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length]; } List <CommonParameters> paramsToUse = new List <CommonParameters> { combinedParams }; if (combinedParams.DigestionParams.SearchModeType == CleavageSpecificity.Semi) //if semi, we need to do both N and C to hit everything { paramsToUse.Clear(); List <FragmentationTerminus> terminiToUse = new List <FragmentationTerminus> { FragmentationTerminus.N, FragmentationTerminus.C }; foreach (FragmentationTerminus terminus in terminiToUse) //set both termini { paramsToUse.Add(combinedParams.CloneWithNewTerminus(terminus)); } } foreach (CommonParameters paramToUse in paramsToUse) { for (int currentPartition = 0; currentPartition < paramToUse.TotalPartitions; currentPartition++) { List <PeptideWithSetModifications> peptideIndex = null; List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count / paramToUse.TotalPartitions, ((currentPartition + 1) * proteinList.Count / paramToUse.TotalPartitions) - (currentPartition * proteinList.Count / paramToUse.TotalPartitions)); List <int>[] fragmentIndex = null; List <int>[] precursorIndex = null; Status("Getting fragment dictionary...", new List <string> { taskId }); var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, SearchParameters.SilacLabels, currentPartition, SearchParameters.DecoyType, paramToUse, SearchParameters.MaxFragmentSize, true, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), new List <string> { taskId }); lock (indexLock) { GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId); } Status("Searching files...", taskId); new NonSpecificEnzymeSearchEngine(fileSpecificPsmsSeparatedByFdrCategory, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, precursorIndex, currentPartition, paramToUse, variableModifications, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run(); ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + paramToUse.TotalPartitions + "!", thisId)); if (GlobalVariables.StopLoops) { break; } } } lock (psmLock) { for (int i = 0; i < allCategorySpecificPsms.Length; i++) { if (allCategorySpecificPsms[i] != null) { allCategorySpecificPsms[i].AddRange(fileSpecificPsmsSeparatedByFdrCategory[i]); } } } } // classic search else { Status("Starting search...", thisId); new ClassicSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, SearchParameters.SilacLabels, proteinList, massDiffAcceptor, combinedParams, thisId).Run(); ReportProgress(new ProgressEventArgs(100, "Done with search!", thisId)); } lock (psmLock) { allPsms.AddRange(fileSpecificPsms); } completedFiles++; FinishedDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> { taskId, "Individual Spectra Files" })); } ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> { taskId, "Individual Spectra Files" })); int numNotches = GetNumNotches(SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac); //resolve category specific fdrs (for speedy semi and nonspecific if (SearchParameters.SearchType == SearchType.NonSpecific) { allPsms = NonSpecificEnzymeSearchEngine.ResolveFdrCategorySpecificPsms(allCategorySpecificPsms, numNotches, taskId, CommonParameters); } PostSearchAnalysisParameters parameters = new PostSearchAnalysisParameters { SearchTaskResults = MyTaskResults, SearchTaskId = taskId, SearchParameters = SearchParameters, ProteinList = proteinList, AllPsms = allPsms, VariableModifications = variableModifications, FixedModifications = fixedModifications, ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams)), CurrentRawFileList = currentRawFileList, MyFileManager = myFileManager, NumNotches = numNotches, OutputFolder = OutputFolder, IndividualResultsOutputFolder = Path.Combine(OutputFolder, "Individual File Results"), FlashLfqResults = flashLfqResults, FileSettingsList = fileSettingsList, NumMs2SpectraPerFile = numMs2SpectraPerFile, DatabaseFilenameList = dbFilenameList }; PostSearchAnalysisTask postProcessing = new PostSearchAnalysisTask { Parameters = parameters, CommonParameters = CommonParameters }; return(postProcessing.Run()); }