public static Dictionary <string, Modification> ReadUniProtXmlModifications(string uniProtXmlProteomeDatabaseFilepath) { if (proteinExistenceCodes == null) { InitializeDictionaries(); } List <string> modifications_in_database = new List <string>(); using (XmlReader xml = XmlReader.Create(uniProtXmlProteomeDatabaseFilepath)) { while (xml.ReadToFollowing("feature")) { if (xml.GetAttribute("type") == "modified residue" && (!ONLY_CONSIDER_MODIFICATIONS_WITH_EVIDENCE || xml.GetAttribute("evidence") != null)) { string description = xml.GetAttribute("description"); if (!description.Contains("variant")) { int semicolon_index = description.IndexOf(';'); if (semicolon_index >= 0) { description = description.Substring(0, semicolon_index); } modifications_in_database.Add(description); } } } } Dictionary <string, Modification> modifications = new Dictionary <string, Modification>(); ModificationDictionary user_modifications = ModificationDictionary.Instance; int i = 0; while (i < modifications_in_database.Count) { string modification_name = modifications_in_database[i]; Modification modification; if (user_modifications.TryGetValue(modification_name, out modification)) { modifications.Add("UniProt: " + modification_name, new Modification("UniProt: " + modification_name, modification.Type, modification.AminoAcid, modification.MonoisotopicMassShift, modification.AverageMassShift, modification.MonoisotopicNeutralLossMass, modification.AverageNeutralLossMass, false, true, modification.Database, modification.DatabaseAccessionNumber, modification.DatabaseName, true)); modifications_in_database.RemoveAt(i); } else { i++; } } if (modifications_in_database.Count > 0) { string old_ptmlist_filepath = Path.Combine(Path.GetDirectoryName(Environment.GetCommandLineArgs()[0]), "ptmlist.txt"); try { string new_ptmlist_filepath = Path.Combine(Path.GetDirectoryName(Environment.GetCommandLineArgs()[0]), "ptmlist.new.txt"); using (WebClient client = new WebClient()) { client.DownloadFile("http://www.uniprot.org/docs/ptmlist.txt", new_ptmlist_filepath); } string old_ptmlist = File.ReadAllText(old_ptmlist_filepath); string new_ptmlist = File.ReadAllText(new_ptmlist_filepath); if (string.Equals(old_ptmlist, new_ptmlist)) { File.Delete(new_ptmlist_filepath); } else { File.Delete(old_ptmlist_filepath); File.Move(new_ptmlist_filepath, old_ptmlist_filepath); } } catch { } XmlDocument psi_mod_temp = new XmlDocument(); psi_mod_temp.Load(Path.Combine(Path.GetDirectoryName(Environment.GetCommandLineArgs()[0]), "PSI-MOD.obo.xml")); XPathNavigator psi_mod = psi_mod_temp.CreateNavigator(); using (StreamReader uniprot_mods = new StreamReader(old_ptmlist_filepath)) { string description = null; string feature_type = null; ModificationType modification_type = ModificationType.AminoAcidResidue; char amino_acid_residue = '\0'; double monoisotopic_mass_shift = double.NaN; double average_mass_shift = double.NaN; string database = null; int database_accession_number = -1; string database_name = null; while (uniprot_mods.Peek() != -1) { string line = uniprot_mods.ReadLine(); if (line.Length >= 2) { switch (line.Substring(0, 2)) { case "ID": description = line.Substring(5); break; case "FT": feature_type = line.Substring(5); break; case "TG": if (feature_type == "MOD_RES") { string amino_acid = line.Substring(5); aminoAcidCodes.TryGetValue(char.ToUpperInvariant(amino_acid[0]) + amino_acid.Substring(1).TrimEnd('.'), out amino_acid_residue); } break; case "PP": if (feature_type == "MOD_RES") { modificationTypeCodes.TryGetValue(line.Substring(5), out modification_type); } break; case "MM": monoisotopic_mass_shift = double.Parse(line.Substring(5)); break; case "MA": average_mass_shift = double.Parse(line.Substring(5)); break; case "DR": if (line.Contains("PSI-MOD")) { Match match = PSI_MOD_ACCESSION_NUMBER_REGEX.Match(line.Substring(5)); if (match.Success) { database = match.Groups[1].Value; database_accession_number = int.Parse(match.Groups[2].Value); XPathNavigator term = psi_mod.SelectSingleNode(@"/obo/term[id='MOD:" + database_accession_number.ToString("00000") + "']"); database_name = term.SelectSingleNode("name").Value; } } break; case "//": if (feature_type == "MOD_RES" && modifications_in_database.Contains(description) && (!double.IsNaN(monoisotopic_mass_shift) || !double.IsNaN(average_mass_shift))) { Modification modification = new Modification("UniProt: " + description, ModificationType.AminoAcidResidue, amino_acid_residue, monoisotopic_mass_shift, average_mass_shift, 0.0, 0.0, false, true, database, database_accession_number, database_name, true); modifications.Add(modification.Description, modification); } description = null; feature_type = null; modification_type = ModificationType.AminoAcidResidue; amino_acid_residue = '\0'; monoisotopic_mass_shift = double.NaN; average_mass_shift = double.NaN; break; } } } } } return(modifications); }
static void Main(string[] args) { if (args.Length > 0) { Arguments arguments = new Arguments(args); List <string> data = new List <string>(arguments["d"].Split(',')); int min_assumed_precursor_charge_state = 2; if (arguments["minprecz"] != null) { min_assumed_precursor_charge_state = int.Parse(arguments["minprecz"]); } int max_assumed_precursor_charge_state = 4; if (arguments["maxprecz"] != null) { max_assumed_precursor_charge_state = int.Parse(arguments["maxprecz"]); } double abs_threshold = -1.0; if (arguments["at"] != null) { abs_threshold = double.Parse(arguments["at"], CultureInfo.InvariantCulture); } double rel_threshold_percent = -1.0; if (arguments["rt"] != null) { rel_threshold_percent = double.Parse(arguments["rt"], CultureInfo.InvariantCulture); } int max_peaks = 400; if (arguments["mp"] != null) { max_peaks = int.Parse(arguments["mp"]); } bool assign_charge_states = true; if (arguments["acs"] != null) { assign_charge_states = bool.Parse(arguments["acs"]); } bool deisotope = true; if (arguments["di"] != null) { deisotope = bool.Parse(arguments["di"]); } string database = arguments["db"]; Dictionary <string, Modification> known_variable_modifications = null; HashSet <Modification> variable_mods = new HashSet <Modification>(); if (Path.GetExtension(database).Equals(".xml", StringComparison.InvariantCultureIgnoreCase)) { bool no_uniprot_mods = false; if (arguments["noup"] != null) { no_uniprot_mods = bool.Parse(arguments["noup"]); } if (!no_uniprot_mods) { known_variable_modifications = ProteomeDatabaseReader.ReadUniProtXmlModifications(database); variable_mods.UnionWith(known_variable_modifications.Values); } } bool append_decoys = false; if (arguments["ad"] != null) { append_decoys = bool.Parse(arguments["ad"]); } else { append_decoys = Path.GetExtension(database).Equals(".xml", StringComparison.InvariantCultureIgnoreCase) || !ProteomeDatabaseReader.HasDecoyProteins(database); } ProteaseDictionary proteases = ProteaseDictionary.Instance; Protease protease = proteases["trypsin (no proline rule)"]; if (arguments["p"] != null) { protease = proteases[arguments["p"]]; } int max_missed_cleavages = 2; if (arguments["mmc"] != null) { max_missed_cleavages = int.Parse(arguments["mmc"]); } InitiatorMethionineBehavior initiator_methionine_behavior = InitiatorMethionineBehavior.Variable; if (arguments["imb"] != null) { initiator_methionine_behavior = (InitiatorMethionineBehavior)Enum.Parse(typeof(InitiatorMethionineBehavior), arguments["imb"], true); } ModificationDictionary mods = ModificationDictionary.Instance; List <Modification> fixed_mods = new List <Modification>(); if (arguments["fm"] != null) { foreach (string fixed_mod in arguments["fm"].Split(';')) { fixed_mods.Add(mods[fixed_mod]); } } if (arguments["vm"] != null) { foreach (string variable_mod in arguments["vm"].Split(';')) { Modification mod; if (!mods.TryGetValue(variable_mod, out mod)) { known_variable_modifications.TryGetValue(variable_mod, out mod); } variable_mods.Add(mod); } } int max_variable_mod_isoforms_per_peptide = 1024; if (arguments["mvmi"] != null) { max_variable_mod_isoforms_per_peptide = int.Parse(arguments["mvmi"]); } double precursor_mass_tolerance_value = 2.1; if (arguments["precmtv"] != null) { precursor_mass_tolerance_value = double.Parse(arguments["precmtv"], CultureInfo.InvariantCulture); } MassToleranceUnits precursor_mass_tolerance_units = MassToleranceUnits.Da; if (arguments["precmtu"] != null) { precursor_mass_tolerance_units = (MassToleranceUnits)Enum.Parse(typeof(MassToleranceUnits), arguments["precmtu"], true); } MassTolerance precursor_mass_tolerance = new MassTolerance(precursor_mass_tolerance_value, precursor_mass_tolerance_units); MassType precursor_mass_type = MassType.Monoisotopic; if (arguments["precmt"] != null) { precursor_mass_type = (MassType)Enum.Parse(typeof(MassType), arguments["precmt"], true); } List <double> accepted_precursor_mass_errors = new List <double>(); if (arguments["apme"] != null && arguments["apme"].Length > 0) { foreach (string accepted_precursor_mass_error in arguments["apme"].Split(';')) { accepted_precursor_mass_errors.Add(double.Parse(accepted_precursor_mass_error, CultureInfo.InvariantCulture)); } } else { accepted_precursor_mass_errors.Add(0.0); } double product_mass_tolerance_value = 0.015; if (arguments["prodmtv"] != null) { product_mass_tolerance_value = double.Parse(arguments["prodmtv"], CultureInfo.InvariantCulture); } MassToleranceUnits product_mass_tolerance_units = MassToleranceUnits.Da; if (arguments["prodmtu"] != null) { product_mass_tolerance_units = (MassToleranceUnits)Enum.Parse(typeof(MassToleranceUnits), arguments["prodmtu"], true); } MassTolerance product_mass_tolerance = new MassTolerance(product_mass_tolerance_value, product_mass_tolerance_units); MassType product_mass_type = MassType.Monoisotopic; if (arguments["prodmt"] != null) { product_mass_type = (MassType)Enum.Parse(typeof(MassType), arguments["prodmt"], true); } double max_fdr = 0.01; if (arguments["fdr"] != null) { max_fdr = double.Parse(arguments["fdr"], CultureInfo.InvariantCulture) / 100.0; } bool consider_mods_unique = false; if (arguments["cmu"] != null) { consider_mods_unique = bool.Parse(arguments["cmu"]); } int max_threads = Environment.ProcessorCount; if (arguments["mt"] != null) { max_threads = int.Parse(arguments["mt"]); } bool minimize_memory_usage = false; if (arguments["mmu"] != null) { minimize_memory_usage = bool.Parse(arguments["mmu"]); } string output_folder = Environment.CurrentDirectory; if (arguments["o"] != null) { output_folder = arguments["o"]; } DatabaseSearcher database_searcher = new DatabaseSearcher(data, min_assumed_precursor_charge_state, max_assumed_precursor_charge_state, abs_threshold, rel_threshold_percent, max_peaks, assign_charge_states, deisotope, database, append_decoys, protease, max_missed_cleavages, initiator_methionine_behavior, fixed_mods, variable_mods, max_variable_mod_isoforms_per_peptide, precursor_mass_tolerance, precursor_mass_type, accepted_precursor_mass_errors, product_mass_tolerance, product_mass_type, max_fdr, consider_mods_unique, max_threads, minimize_memory_usage, output_folder); database_searcher.Starting += HandleStarting; database_searcher.StartingFile += HandleStartingFile; database_searcher.UpdateStatus += HandleUpdateStatus; database_searcher.UpdateProgress += HandleUpdateProgress; database_searcher.ThrowException += HandleThrowException; database_searcher.FinishedFile += HandleFinishedFile; database_searcher.Finished += HandleFinished; database_searcher.Search(); } else { Console.WriteLine(Program.GetProductNameAndVersion() + " USAGE"); } }
private void frmMain_Load(object sender, EventArgs e) { Text = Program.GetProductNameAndVersion(); label1.Text = FORM_LABEL; ofdData.Filter = DIALOG_FILTER; ofdFasta.Filter = Environment.OSVersion.Platform == PlatformID.Unix ? "FASTA proteome database files (" + CASE_INSENSITIVE_FASTA_EXTENSIONS + ")|" + CASE_SENSITIVE_FASTA_EXTENSIONS + "|UniProt XML proteome database files (*.xml)|*.xml;*.XML" : "FASTA proteome database files|" + CASE_INSENSITIVE_FASTA_EXTENSIONS + "|UniProt XML proteome database files|*.xml"; if (Application.ProductName.Contains("Thermo")) { chkDeisotope.Enabled = false; chkDeisotope.Checked = false; } ProteaseDictionary proteases = null; try { proteases = ProteaseDictionary.Instance; foreach (Protease protease in proteases.Values) { cboProtease.Items.Add(protease); } } catch { MessageBox.Show("Your proteases file (" + Path.Combine(Path.GetDirectoryName(Environment.GetCommandLineArgs()[0]), "proteases.tsv") + ") is likely corrupt. Please correct it. Program will now exit."); Application.Exit(); } foreach (string initiatior_methionine_behavior in Enum.GetNames(typeof(InitiatorMethionineBehavior))) { cboInitiatorMethionineBehavior.Items.Add(initiatior_methionine_behavior.ToLower()); } ModificationDictionary modifications = null; try { modifications = ModificationDictionary.Instance; clbFixedModifications.BeginUpdate(); clbVariableModifications.BeginUpdate(); foreach (Modification modification in modifications.Values) { clbFixedModifications.Items.Add(modification, modification.DefaultFixed); clbVariableModifications.Items.Add(modification, modification.DefaultVariable); } clbFixedModifications.EndUpdate(); clbVariableModifications.EndUpdate(); } catch { MessageBox.Show("Your modifications file (" + Path.Combine(Path.GetDirectoryName(Environment.GetCommandLineArgs()[0]), "modifications.tsv") + ") is likely corrupt. Please correct it."); } cboPrecursorMassToleranceUnits.Items.AddRange(Enum.GetNames(typeof(MassToleranceUnits))); cboProductMassToleranceUnits.Items.AddRange(Enum.GetNames(typeof(MassToleranceUnits))); foreach (string mass_type in Enum.GetNames(typeof(MassType))) { cboPrecursorMassType.Items.Add(mass_type.ToLower()); cboProductMassType.Items.Add(mass_type.ToLower()); } numMaxThreads.Maximum = Environment.ProcessorCount; string settings_filepath = Path.Combine(Application.UserAppDataPath, "settings.tsv"); if (File.Exists(settings_filepath)) { try { using (StreamReader settings = new StreamReader(settings_filepath)) { while (settings.Peek() != -1) { string line = settings.ReadLine(); string[] fields = line.Split('\t'); string name = fields[0]; string value = fields[1]; string[] value_fields; switch (name) { case "Minimum Assumed Precursor Charge State": numMinimumAssumedPrecursorChargeState.Value = int.Parse(value); break; case "Maximum Assumed Precursor Charge State": numMaximumAssumedPrecursorChargeState.Value = int.Parse(value); break; case "Absolute MS/MS Peak Threshold": value_fields = value.Split(';'); chkAbsoluteThreshold.Checked = bool.Parse(value_fields[0]); txtAbsoluteThreshold.Text = value_fields[1]; break; case "Relative MS/MS Peak Threshold (%)": value_fields = value.Split(';'); chkRelativeThreshold.Checked = bool.Parse(value_fields[0]); txtRelativeThresholdPercent.Text = value_fields[1]; break; case "Maximum Number of MS/MS Peaks": value_fields = value.Split(';'); chkMaxNumPeaks.Checked = bool.Parse(value_fields[0]); numMaxPeaks.Value = int.Parse(value_fields[1]); break; case "Assign Charge States": chkAssignChargeStates.Checked = bool.Parse(value); break; case "De-isotope": if (!Application.ProductName.Contains("Thermo")) { chkDeisotope.Checked = bool.Parse(value); } break; case "Protease": cboProtease.SelectedItem = proteases[value]; break; case "Maximum Missed Cleavages": numMaxMissedCleavages.Value = int.Parse(value); break; case "Initiator Methionine Behavior": cboInitiatorMethionineBehavior.SelectedIndex = (int)Enum.Parse(typeof(InitiatorMethionineBehavior), value, true); break; case "Maximum Variable Modification Isoforms per Peptide": numMaxVariableModIsoforms.Value = int.Parse(value); break; case "Precursor Mass Tolerance": numPrecursorMassTolerance.Value = decimal.Parse(value, CultureInfo.InvariantCulture); break; case "Precursor Mass Tolerance Units": cboPrecursorMassToleranceUnits.SelectedIndex = (int)Enum.Parse(typeof(MassToleranceUnits), value, true); break; case "Precursor Mass Type": cboPrecursorMassType.SelectedIndex = (int)Enum.Parse(typeof(MassType), value, true); break; case "Accepted Precursor Mass Errors (Da)": txtAcceptedPrecursorMassErrors.Text = value; break; case "Product Mass Tolerance": numProductMassTolerance.Value = decimal.Parse(value, CultureInfo.InvariantCulture); break; case "Product Mass Tolerance Units": cboProductMassToleranceUnits.SelectedIndex = (int)Enum.Parse(typeof(MassToleranceUnits), value, true); break; case "Product Mass Type": cboProductMassType.SelectedIndex = (int)Enum.Parse(typeof(MassType), value, true); break; case "Maximum FDR (%)": numMaximumFalseDiscoveryRatePercent.Value = decimal.Parse(value, CultureInfo.InvariantCulture); break; case "Consider Modified Forms as Unique Peptides": chkConsiderModifiedUnique.Checked = bool.Parse(value); break; case "Maximum Threads": if (int.Parse(value) > numMaxThreads.Maximum) { numMaxThreads.Value = numMaxThreads.Maximum; } else { numMaxThreads.Value = int.Parse(value); } break; case "Minimize Memory Usage": chkMinimizeMemoryUsage.Checked = bool.Parse(value); break; } } } return; } catch { MessageBox.Show("Your settings file (" + settings_filepath + ") is likely corrupt. Defaults will be used."); } } cboProtease.SelectedItem = proteases["trypsin (no proline rule)"]; cboInitiatorMethionineBehavior.SelectedIndex = (int)InitiatorMethionineBehavior.Variable; cboPrecursorMassToleranceUnits.SelectedIndex = (int)MassToleranceUnits.Da; cboPrecursorMassType.SelectedIndex = (int)MassType.Monoisotopic; cboProductMassToleranceUnits.SelectedIndex = (int)MassToleranceUnits.Da; cboProductMassType.SelectedIndex = (int)MassType.Monoisotopic; numMaxThreads.Value = numMaxThreads.Maximum; }