示例#1
0
        public static Dictionary <string, Modification> ReadUniProtXmlModifications(string uniProtXmlProteomeDatabaseFilepath)
        {
            if (proteinExistenceCodes == null)
            {
                InitializeDictionaries();
            }

            List <string> modifications_in_database = new List <string>();

            using (XmlReader xml = XmlReader.Create(uniProtXmlProteomeDatabaseFilepath))
            {
                while (xml.ReadToFollowing("feature"))
                {
                    if (xml.GetAttribute("type") == "modified residue" && (!ONLY_CONSIDER_MODIFICATIONS_WITH_EVIDENCE || xml.GetAttribute("evidence") != null))
                    {
                        string description = xml.GetAttribute("description");
                        if (!description.Contains("variant"))
                        {
                            int semicolon_index = description.IndexOf(';');
                            if (semicolon_index >= 0)
                            {
                                description = description.Substring(0, semicolon_index);
                            }
                            modifications_in_database.Add(description);
                        }
                    }
                }
            }

            Dictionary <string, Modification> modifications = new Dictionary <string, Modification>();

            ModificationDictionary user_modifications = ModificationDictionary.Instance;
            int i = 0;

            while (i < modifications_in_database.Count)
            {
                string       modification_name = modifications_in_database[i];
                Modification modification;
                if (user_modifications.TryGetValue(modification_name, out modification))
                {
                    modifications.Add("UniProt: " + modification_name, new Modification("UniProt: " + modification_name, modification.Type, modification.AminoAcid, modification.MonoisotopicMassShift, modification.AverageMassShift, modification.MonoisotopicNeutralLossMass, modification.AverageNeutralLossMass, false, true, modification.Database, modification.DatabaseAccessionNumber, modification.DatabaseName, true));
                    modifications_in_database.RemoveAt(i);
                }
                else
                {
                    i++;
                }
            }

            if (modifications_in_database.Count > 0)
            {
                string old_ptmlist_filepath = Path.Combine(Path.GetDirectoryName(Environment.GetCommandLineArgs()[0]), "ptmlist.txt");
                try
                {
                    string new_ptmlist_filepath = Path.Combine(Path.GetDirectoryName(Environment.GetCommandLineArgs()[0]), "ptmlist.new.txt");
                    using (WebClient client = new WebClient())
                    {
                        client.DownloadFile("http://www.uniprot.org/docs/ptmlist.txt", new_ptmlist_filepath);
                    }
                    string old_ptmlist = File.ReadAllText(old_ptmlist_filepath);
                    string new_ptmlist = File.ReadAllText(new_ptmlist_filepath);
                    if (string.Equals(old_ptmlist, new_ptmlist))
                    {
                        File.Delete(new_ptmlist_filepath);
                    }
                    else
                    {
                        File.Delete(old_ptmlist_filepath);
                        File.Move(new_ptmlist_filepath, old_ptmlist_filepath);
                    }
                }
                catch
                {
                }

                XmlDocument psi_mod_temp = new XmlDocument();
                psi_mod_temp.Load(Path.Combine(Path.GetDirectoryName(Environment.GetCommandLineArgs()[0]), "PSI-MOD.obo.xml"));
                XPathNavigator psi_mod = psi_mod_temp.CreateNavigator();

                using (StreamReader uniprot_mods = new StreamReader(old_ptmlist_filepath))
                {
                    string           description             = null;
                    string           feature_type            = null;
                    ModificationType modification_type       = ModificationType.AminoAcidResidue;
                    char             amino_acid_residue      = '\0';
                    double           monoisotopic_mass_shift = double.NaN;
                    double           average_mass_shift      = double.NaN;
                    string           database        = null;
                    int    database_accession_number = -1;
                    string database_name             = null;
                    while (uniprot_mods.Peek() != -1)
                    {
                        string line = uniprot_mods.ReadLine();
                        if (line.Length >= 2)
                        {
                            switch (line.Substring(0, 2))
                            {
                            case "ID":
                                description = line.Substring(5);
                                break;

                            case "FT":
                                feature_type = line.Substring(5);
                                break;

                            case "TG":
                                if (feature_type == "MOD_RES")
                                {
                                    string amino_acid = line.Substring(5);
                                    aminoAcidCodes.TryGetValue(char.ToUpperInvariant(amino_acid[0]) + amino_acid.Substring(1).TrimEnd('.'), out amino_acid_residue);
                                }
                                break;

                            case "PP":
                                if (feature_type == "MOD_RES")
                                {
                                    modificationTypeCodes.TryGetValue(line.Substring(5), out modification_type);
                                }
                                break;

                            case "MM":
                                monoisotopic_mass_shift = double.Parse(line.Substring(5));
                                break;

                            case "MA":
                                average_mass_shift = double.Parse(line.Substring(5));
                                break;

                            case "DR":
                                if (line.Contains("PSI-MOD"))
                                {
                                    Match match = PSI_MOD_ACCESSION_NUMBER_REGEX.Match(line.Substring(5));
                                    if (match.Success)
                                    {
                                        database = match.Groups[1].Value;
                                        database_accession_number = int.Parse(match.Groups[2].Value);
                                        XPathNavigator term = psi_mod.SelectSingleNode(@"/obo/term[id='MOD:" + database_accession_number.ToString("00000") + "']");
                                        database_name = term.SelectSingleNode("name").Value;
                                    }
                                }
                                break;

                            case "//":
                                if (feature_type == "MOD_RES" && modifications_in_database.Contains(description) && (!double.IsNaN(monoisotopic_mass_shift) || !double.IsNaN(average_mass_shift)))
                                {
                                    Modification modification = new Modification("UniProt: " + description, ModificationType.AminoAcidResidue, amino_acid_residue, monoisotopic_mass_shift, average_mass_shift, 0.0, 0.0, false, true, database, database_accession_number, database_name, true);
                                    modifications.Add(modification.Description, modification);
                                }
                                description             = null;
                                feature_type            = null;
                                modification_type       = ModificationType.AminoAcidResidue;
                                amino_acid_residue      = '\0';
                                monoisotopic_mass_shift = double.NaN;
                                average_mass_shift      = double.NaN;
                                break;
                            }
                        }
                    }
                }
            }

            return(modifications);
        }
示例#2
0
        static void Main(string[] args)
        {
            if (args.Length > 0)
            {
                Arguments     arguments = new Arguments(args);
                List <string> data      = new List <string>(arguments["d"].Split(','));
                int           min_assumed_precursor_charge_state = 2;
                if (arguments["minprecz"] != null)
                {
                    min_assumed_precursor_charge_state = int.Parse(arguments["minprecz"]);
                }
                int max_assumed_precursor_charge_state = 4;
                if (arguments["maxprecz"] != null)
                {
                    max_assumed_precursor_charge_state = int.Parse(arguments["maxprecz"]);
                }
                double abs_threshold = -1.0;
                if (arguments["at"] != null)
                {
                    abs_threshold = double.Parse(arguments["at"], CultureInfo.InvariantCulture);
                }
                double rel_threshold_percent = -1.0;
                if (arguments["rt"] != null)
                {
                    rel_threshold_percent = double.Parse(arguments["rt"], CultureInfo.InvariantCulture);
                }
                int max_peaks = 400;
                if (arguments["mp"] != null)
                {
                    max_peaks = int.Parse(arguments["mp"]);
                }
                bool assign_charge_states = true;
                if (arguments["acs"] != null)
                {
                    assign_charge_states = bool.Parse(arguments["acs"]);
                }
                bool deisotope = true;
                if (arguments["di"] != null)
                {
                    deisotope = bool.Parse(arguments["di"]);
                }
                string database = arguments["db"];
                Dictionary <string, Modification> known_variable_modifications = null;
                HashSet <Modification>            variable_mods = new HashSet <Modification>();
                if (Path.GetExtension(database).Equals(".xml", StringComparison.InvariantCultureIgnoreCase))
                {
                    bool no_uniprot_mods = false;
                    if (arguments["noup"] != null)
                    {
                        no_uniprot_mods = bool.Parse(arguments["noup"]);
                    }
                    if (!no_uniprot_mods)
                    {
                        known_variable_modifications = ProteomeDatabaseReader.ReadUniProtXmlModifications(database);
                        variable_mods.UnionWith(known_variable_modifications.Values);
                    }
                }
                bool append_decoys = false;
                if (arguments["ad"] != null)
                {
                    append_decoys = bool.Parse(arguments["ad"]);
                }
                else
                {
                    append_decoys = Path.GetExtension(database).Equals(".xml", StringComparison.InvariantCultureIgnoreCase) || !ProteomeDatabaseReader.HasDecoyProteins(database);
                }
                ProteaseDictionary proteases = ProteaseDictionary.Instance;
                Protease           protease  = proteases["trypsin (no proline rule)"];
                if (arguments["p"] != null)
                {
                    protease = proteases[arguments["p"]];
                }
                int max_missed_cleavages = 2;
                if (arguments["mmc"] != null)
                {
                    max_missed_cleavages = int.Parse(arguments["mmc"]);
                }
                InitiatorMethionineBehavior initiator_methionine_behavior = InitiatorMethionineBehavior.Variable;
                if (arguments["imb"] != null)
                {
                    initiator_methionine_behavior = (InitiatorMethionineBehavior)Enum.Parse(typeof(InitiatorMethionineBehavior), arguments["imb"], true);
                }
                ModificationDictionary mods       = ModificationDictionary.Instance;
                List <Modification>    fixed_mods = new List <Modification>();
                if (arguments["fm"] != null)
                {
                    foreach (string fixed_mod in arguments["fm"].Split(';'))
                    {
                        fixed_mods.Add(mods[fixed_mod]);
                    }
                }
                if (arguments["vm"] != null)
                {
                    foreach (string variable_mod in arguments["vm"].Split(';'))
                    {
                        Modification mod;
                        if (!mods.TryGetValue(variable_mod, out mod))
                        {
                            known_variable_modifications.TryGetValue(variable_mod, out mod);
                        }
                        variable_mods.Add(mod);
                    }
                }
                int max_variable_mod_isoforms_per_peptide = 1024;
                if (arguments["mvmi"] != null)
                {
                    max_variable_mod_isoforms_per_peptide = int.Parse(arguments["mvmi"]);
                }
                double precursor_mass_tolerance_value = 2.1;
                if (arguments["precmtv"] != null)
                {
                    precursor_mass_tolerance_value = double.Parse(arguments["precmtv"], CultureInfo.InvariantCulture);
                }
                MassToleranceUnits precursor_mass_tolerance_units = MassToleranceUnits.Da;
                if (arguments["precmtu"] != null)
                {
                    precursor_mass_tolerance_units = (MassToleranceUnits)Enum.Parse(typeof(MassToleranceUnits), arguments["precmtu"], true);
                }
                MassTolerance precursor_mass_tolerance = new MassTolerance(precursor_mass_tolerance_value, precursor_mass_tolerance_units);
                MassType      precursor_mass_type      = MassType.Monoisotopic;
                if (arguments["precmt"] != null)
                {
                    precursor_mass_type = (MassType)Enum.Parse(typeof(MassType), arguments["precmt"], true);
                }
                List <double> accepted_precursor_mass_errors = new List <double>();
                if (arguments["apme"] != null && arguments["apme"].Length > 0)
                {
                    foreach (string accepted_precursor_mass_error in arguments["apme"].Split(';'))
                    {
                        accepted_precursor_mass_errors.Add(double.Parse(accepted_precursor_mass_error, CultureInfo.InvariantCulture));
                    }
                }
                else
                {
                    accepted_precursor_mass_errors.Add(0.0);
                }
                double product_mass_tolerance_value = 0.015;
                if (arguments["prodmtv"] != null)
                {
                    product_mass_tolerance_value = double.Parse(arguments["prodmtv"], CultureInfo.InvariantCulture);
                }
                MassToleranceUnits product_mass_tolerance_units = MassToleranceUnits.Da;
                if (arguments["prodmtu"] != null)
                {
                    product_mass_tolerance_units = (MassToleranceUnits)Enum.Parse(typeof(MassToleranceUnits), arguments["prodmtu"], true);
                }
                MassTolerance product_mass_tolerance = new MassTolerance(product_mass_tolerance_value, product_mass_tolerance_units);
                MassType      product_mass_type      = MassType.Monoisotopic;
                if (arguments["prodmt"] != null)
                {
                    product_mass_type = (MassType)Enum.Parse(typeof(MassType), arguments["prodmt"], true);
                }
                double max_fdr = 0.01;
                if (arguments["fdr"] != null)
                {
                    max_fdr = double.Parse(arguments["fdr"], CultureInfo.InvariantCulture) / 100.0;
                }
                bool consider_mods_unique = false;
                if (arguments["cmu"] != null)
                {
                    consider_mods_unique = bool.Parse(arguments["cmu"]);
                }
                int max_threads = Environment.ProcessorCount;
                if (arguments["mt"] != null)
                {
                    max_threads = int.Parse(arguments["mt"]);
                }
                bool minimize_memory_usage = false;
                if (arguments["mmu"] != null)
                {
                    minimize_memory_usage = bool.Parse(arguments["mmu"]);
                }
                string output_folder = Environment.CurrentDirectory;
                if (arguments["o"] != null)
                {
                    output_folder = arguments["o"];
                }

                DatabaseSearcher database_searcher = new DatabaseSearcher(data,
                                                                          min_assumed_precursor_charge_state, max_assumed_precursor_charge_state,
                                                                          abs_threshold, rel_threshold_percent, max_peaks,
                                                                          assign_charge_states, deisotope,
                                                                          database, append_decoys,
                                                                          protease, max_missed_cleavages, initiator_methionine_behavior,
                                                                          fixed_mods, variable_mods, max_variable_mod_isoforms_per_peptide,
                                                                          precursor_mass_tolerance, precursor_mass_type,
                                                                          accepted_precursor_mass_errors,
                                                                          product_mass_tolerance, product_mass_type,
                                                                          max_fdr, consider_mods_unique,
                                                                          max_threads, minimize_memory_usage,
                                                                          output_folder);

                database_searcher.Starting       += HandleStarting;
                database_searcher.StartingFile   += HandleStartingFile;
                database_searcher.UpdateStatus   += HandleUpdateStatus;
                database_searcher.UpdateProgress += HandleUpdateProgress;
                database_searcher.ThrowException += HandleThrowException;
                database_searcher.FinishedFile   += HandleFinishedFile;
                database_searcher.Finished       += HandleFinished;

                database_searcher.Search();
            }
            else
            {
                Console.WriteLine(Program.GetProductNameAndVersion() + " USAGE");
            }
        }
示例#3
0
        private void frmMain_Load(object sender, EventArgs e)
        {
            Text            = Program.GetProductNameAndVersion();
            label1.Text     = FORM_LABEL;
            ofdData.Filter  = DIALOG_FILTER;
            ofdFasta.Filter = Environment.OSVersion.Platform == PlatformID.Unix
                ? "FASTA proteome database files (" + CASE_INSENSITIVE_FASTA_EXTENSIONS + ")|" + CASE_SENSITIVE_FASTA_EXTENSIONS + "|UniProt XML proteome database files (*.xml)|*.xml;*.XML"
                : "FASTA proteome database files|" + CASE_INSENSITIVE_FASTA_EXTENSIONS + "|UniProt XML proteome database files|*.xml";
            if (Application.ProductName.Contains("Thermo"))
            {
                chkDeisotope.Enabled = false;
                chkDeisotope.Checked = false;
            }

            ProteaseDictionary proteases = null;

            try
            {
                proteases = ProteaseDictionary.Instance;
                foreach (Protease protease in proteases.Values)
                {
                    cboProtease.Items.Add(protease);
                }
            }
            catch
            {
                MessageBox.Show("Your proteases file (" + Path.Combine(Path.GetDirectoryName(Environment.GetCommandLineArgs()[0]), "proteases.tsv") + ") is likely corrupt. Please correct it. Program will now exit.");
                Application.Exit();
            }

            foreach (string initiatior_methionine_behavior in Enum.GetNames(typeof(InitiatorMethionineBehavior)))
            {
                cboInitiatorMethionineBehavior.Items.Add(initiatior_methionine_behavior.ToLower());
            }

            ModificationDictionary modifications = null;

            try
            {
                modifications = ModificationDictionary.Instance;
                clbFixedModifications.BeginUpdate();
                clbVariableModifications.BeginUpdate();
                foreach (Modification modification in modifications.Values)
                {
                    clbFixedModifications.Items.Add(modification, modification.DefaultFixed);
                    clbVariableModifications.Items.Add(modification, modification.DefaultVariable);
                }
                clbFixedModifications.EndUpdate();
                clbVariableModifications.EndUpdate();
            }
            catch
            {
                MessageBox.Show("Your modifications file (" + Path.Combine(Path.GetDirectoryName(Environment.GetCommandLineArgs()[0]), "modifications.tsv") + ") is likely corrupt. Please correct it.");
            }

            cboPrecursorMassToleranceUnits.Items.AddRange(Enum.GetNames(typeof(MassToleranceUnits)));
            cboProductMassToleranceUnits.Items.AddRange(Enum.GetNames(typeof(MassToleranceUnits)));

            foreach (string mass_type in Enum.GetNames(typeof(MassType)))
            {
                cboPrecursorMassType.Items.Add(mass_type.ToLower());
                cboProductMassType.Items.Add(mass_type.ToLower());
            }

            numMaxThreads.Maximum = Environment.ProcessorCount;

            string settings_filepath = Path.Combine(Application.UserAppDataPath, "settings.tsv");

            if (File.Exists(settings_filepath))
            {
                try
                {
                    using (StreamReader settings = new StreamReader(settings_filepath))
                    {
                        while (settings.Peek() != -1)
                        {
                            string   line   = settings.ReadLine();
                            string[] fields = line.Split('\t');
                            string   name   = fields[0];
                            string   value  = fields[1];
                            string[] value_fields;

                            switch (name)
                            {
                            case "Minimum Assumed Precursor Charge State":
                                numMinimumAssumedPrecursorChargeState.Value = int.Parse(value);
                                break;

                            case "Maximum Assumed Precursor Charge State":
                                numMaximumAssumedPrecursorChargeState.Value = int.Parse(value);
                                break;

                            case "Absolute MS/MS Peak Threshold":
                                value_fields = value.Split(';');
                                chkAbsoluteThreshold.Checked = bool.Parse(value_fields[0]);
                                txtAbsoluteThreshold.Text    = value_fields[1];
                                break;

                            case "Relative MS/MS Peak Threshold (%)":
                                value_fields = value.Split(';');
                                chkRelativeThreshold.Checked     = bool.Parse(value_fields[0]);
                                txtRelativeThresholdPercent.Text = value_fields[1];
                                break;

                            case "Maximum Number of MS/MS Peaks":
                                value_fields           = value.Split(';');
                                chkMaxNumPeaks.Checked = bool.Parse(value_fields[0]);
                                numMaxPeaks.Value      = int.Parse(value_fields[1]);
                                break;

                            case "Assign Charge States":
                                chkAssignChargeStates.Checked = bool.Parse(value);
                                break;

                            case "De-isotope":
                                if (!Application.ProductName.Contains("Thermo"))
                                {
                                    chkDeisotope.Checked = bool.Parse(value);
                                }
                                break;

                            case "Protease":
                                cboProtease.SelectedItem = proteases[value];
                                break;

                            case "Maximum Missed Cleavages":
                                numMaxMissedCleavages.Value = int.Parse(value);
                                break;

                            case "Initiator Methionine Behavior":
                                cboInitiatorMethionineBehavior.SelectedIndex = (int)Enum.Parse(typeof(InitiatorMethionineBehavior), value, true);
                                break;

                            case "Maximum Variable Modification Isoforms per Peptide":
                                numMaxVariableModIsoforms.Value = int.Parse(value);
                                break;

                            case "Precursor Mass Tolerance":
                                numPrecursorMassTolerance.Value = decimal.Parse(value, CultureInfo.InvariantCulture);
                                break;

                            case "Precursor Mass Tolerance Units":
                                cboPrecursorMassToleranceUnits.SelectedIndex = (int)Enum.Parse(typeof(MassToleranceUnits), value, true);
                                break;

                            case "Precursor Mass Type":
                                cboPrecursorMassType.SelectedIndex = (int)Enum.Parse(typeof(MassType), value, true);
                                break;

                            case "Accepted Precursor Mass Errors (Da)":
                                txtAcceptedPrecursorMassErrors.Text = value;
                                break;

                            case "Product Mass Tolerance":
                                numProductMassTolerance.Value = decimal.Parse(value, CultureInfo.InvariantCulture);
                                break;

                            case "Product Mass Tolerance Units":
                                cboProductMassToleranceUnits.SelectedIndex = (int)Enum.Parse(typeof(MassToleranceUnits), value, true);
                                break;

                            case "Product Mass Type":
                                cboProductMassType.SelectedIndex = (int)Enum.Parse(typeof(MassType), value, true);
                                break;

                            case "Maximum FDR (%)":
                                numMaximumFalseDiscoveryRatePercent.Value = decimal.Parse(value, CultureInfo.InvariantCulture);
                                break;

                            case "Consider Modified Forms as Unique Peptides":
                                chkConsiderModifiedUnique.Checked = bool.Parse(value);
                                break;

                            case "Maximum Threads":
                                if (int.Parse(value) > numMaxThreads.Maximum)
                                {
                                    numMaxThreads.Value = numMaxThreads.Maximum;
                                }
                                else
                                {
                                    numMaxThreads.Value = int.Parse(value);
                                }
                                break;

                            case "Minimize Memory Usage":
                                chkMinimizeMemoryUsage.Checked = bool.Parse(value);
                                break;
                            }
                        }
                    }
                    return;
                }
                catch
                {
                    MessageBox.Show("Your settings file (" + settings_filepath + ") is likely corrupt. Defaults will be used.");
                }
            }

            cboProtease.SelectedItem = proteases["trypsin (no proline rule)"];
            cboInitiatorMethionineBehavior.SelectedIndex = (int)InitiatorMethionineBehavior.Variable;
            cboPrecursorMassToleranceUnits.SelectedIndex = (int)MassToleranceUnits.Da;
            cboPrecursorMassType.SelectedIndex           = (int)MassType.Monoisotopic;
            cboProductMassToleranceUnits.SelectedIndex   = (int)MassToleranceUnits.Da;
            cboProductMassType.SelectedIndex             = (int)MassType.Monoisotopic;
            numMaxThreads.Value = numMaxThreads.Maximum;
        }