Exemple #1
0
 internal void GetSpectrum(out double[] mz, out double[] intensity)
 {
     if(this.mz == null || this.intensity == null)
     {
         TandemMassSpectra.ReadDataFromSpectrumNavigator(navigator.Select("mzML:binaryDataArrayList/mzML:binaryDataArray/*", xmlNamespaceManager), out mz, out intensity);
         this.mz = mz;
         this.intensity = intensity;
     }
     else
     {
         mz = this.mz;
         intensity = this.intensity;
     }
 }
Exemple #2
0
        private void DoSearch()
        {
            StreamWriter overall_log = null;
            StreamWriter summary = null;
            StreamWriter log = null;
            FileStream proteome_database = null;

            #if !NO_EXCEPTION_HANDLING
            try
            {
            #endif
                DateTime overall_start = DateTime.Now;

                OnUpdateStatus(new StatusEventArgs("Initializing..."));
                OnReportTaskWithoutProgress(EventArgs.Empty);
                OnUpdateProgress(new ProgressEventArgs(0));

                // convert all paths to absolute for outputs
                for(int i = 0; i < dataFilepaths.Count; i++)
                {
                    dataFilepaths[i] = Path.GetFullPath(dataFilepaths[i]);
                }
                proteomeDatabaseFilepath = Path.GetFullPath(proteomeDatabaseFilepath);
                outputFolder = Path.GetFullPath(outputFolder);

                PeptideSpectrumMatch.SetPrecursorMassType(precursorMassType);
                AminoAcidPolymer.SetProductMassType(productMassType);

                proteome_database = new FileStream(proteomeDatabaseFilepath, FileMode.Open, FileAccess.Read, FileShare.Read);

                int target_proteins;
                int decoy_proteins;
                int on_the_fly_decoy_proteins;
                int total_proteins = ProteomeDatabaseReader.CountProteins(proteome_database, onTheFlyDecoys, out target_proteins, out decoy_proteins, out on_the_fly_decoy_proteins);
                double decoys_over_targets_protein_ratio = (double)(decoy_proteins + on_the_fly_decoy_proteins) / target_proteins;

                int num_target_peptides = 0;
                int num_decoy_peptides = 0;
                double decoys_over_targets_peptide_ratio = double.NaN;

                string fixed_modifications = null;
                foreach(Modification fixed_modification in fixedModifications)
                {
                    fixed_modifications += fixed_modification.ToString() + ", ";
                }
                if(fixed_modifications != null)
                {
                    fixed_modifications = fixed_modifications.Substring(0, fixed_modifications.Length - 2);
                }
                else
                {
                    fixed_modifications = "none";
                }
                string variable_modifications = null;
                foreach(Modification variable_modification in variableModifications)
                {
                    variable_modifications += variable_modification.ToString() + ", ";
                }
                if(variable_modifications != null)
                {
                    variable_modifications = variable_modifications.Substring(0, variable_modifications.Length - 2);
                }
                else
                {
                    variable_modifications = "none";
                }

                int total_spectra = 0;
                List<PeptideSpectrumMatch> aggregate_psms = null;

                SortedList<string, HashSet<string>> parents = null;
                Dictionary<string, int> num_spectra = null;
                Dictionary<string, List<PeptideSpectrumMatch>> grouped_aggregate_psms = null;

                Dictionary<string, Modification> known_variable_modifications = new Dictionary<string, Modification>();
                List<Modification> unknown_variable_modifications = new List<Modification>();
                foreach(Modification variable_modification in variableModifications)
                {
                    if(variable_modification.Known)
                    {
                        known_variable_modifications.Add(variable_modification.Description, variable_modification);
                    }
                    else
                    {
                        unknown_variable_modifications.Add(variable_modification);
                    }
                }

                if(dataFilepaths.Count > 1)
                {
                    overall_log = new StreamWriter(Path.Combine(outputFolder, "log.txt"));
                    overall_log.AutoFlush = true;

                    overall_log.WriteLine(Program.GetProductNameAndVersion() + " LOG");
                    overall_log.WriteLine();

                    overall_log.WriteLine("PARAMETERS");
                    string data_filepaths = null;
                    foreach(string data_filepath in dataFilepaths)
                    {
                        data_filepaths += data_filepath.ToString() + ", ";
                    }
                    data_filepaths = data_filepaths.Substring(0, data_filepaths.Length - 2);
                    overall_log.WriteLine("Input Data Files: " + data_filepaths);
                    overall_log.WriteLine("Unknown Precursor Charge State Range: " + minimumAssumedPrecursorChargeState.ToString("+0;-0;0") + ".." + maximumAssumedPrecursorChargeState.ToString("+0;-0;0"));
                    overall_log.WriteLine("Absolute MS/MS Intensity Threshold: " + (absoluteThreshold >= 0.0 ? absoluteThreshold.ToString(CultureInfo.InvariantCulture) : "disabled"));
                    overall_log.WriteLine("Relative MS/MS Intensity Threshold: " + (relativeThresholdPercent >= 0.0 ? relativeThresholdPercent.ToString(CultureInfo.InvariantCulture) + '%' : "disabled"));
                    overall_log.WriteLine("Maximum Number of MS/MS Peaks: " + (maximumNumberOfPeaks >= 0 ? maximumNumberOfPeaks.ToString() : "disabled"));
                    overall_log.WriteLine("Assign Charge States: " + assignChargeStates.ToString().ToLower());
                    overall_log.WriteLine("De-isotope: " + deisotope.ToString().ToLower());
                    overall_log.WriteLine("Proteome Database: " + proteomeDatabaseFilepath);
                    overall_log.WriteLine("Create Target–Decoy Database On The Fly: " + onTheFlyDecoys.ToString().ToLower());
                    overall_log.WriteLine("Protease: " + protease.ToString());
                    overall_log.WriteLine("Maximum Missed Cleavages: " + maximumMissedCleavages.ToString());
                    overall_log.WriteLine("Initiator Methionine Behavior: " + initiatorMethionineBehavior.ToString().ToLower());
                    overall_log.WriteLine("Fixed Modifications: " + fixed_modifications);
                    overall_log.WriteLine("Variable Modifications: " + variable_modifications);
                    overall_log.WriteLine("Maximum Variable Modification Isoforms Per Peptide: " + maximumVariableModificationIsoforms.ToString());
                    overall_log.WriteLine("Precursor Mass Tolerance: ±" + precursorMassTolerance.Value.ToString(CultureInfo.InvariantCulture) + ' ' + precursorMassTolerance.Units.ToString() + " (" + precursorMassType.ToString().ToLower() + ')');
                    overall_log.WriteLine("Accepted Precursor Mass Errors: " + string.Join("; ", acceptedPrecursorMassErrors) + " Da");
                    overall_log.WriteLine("Product Mass Tolerance: ±" + productMassTolerance.Value.ToString(CultureInfo.InvariantCulture) + ' ' + productMassTolerance.Units.ToString() + " (" + productMassType.ToString().ToLower() + ')');
                    overall_log.WriteLine("Maximum False Discovery Rate: " + (maximumFalseDiscoveryRate * 100).ToString(CultureInfo.InvariantCulture) + '%');
                    overall_log.WriteLine("Consider Modified Forms as Unique Peptides: " + considerModifiedFormsAsUniquePeptides.ToString().ToLower());
                    overall_log.WriteLine("Maximum Threads: " + maximumThreads.ToString());
                    overall_log.WriteLine("Minimize Memory Usage: " + minimizeMemoryUsage.ToString().ToLower());
                    overall_log.WriteLine("Output Folder: " + outputFolder.ToString());
                    overall_log.WriteLine();

                    overall_log.WriteLine("RESULTS");

                    overall_log.WriteLine(total_proteins.ToString("N0") + " total (" + target_proteins.ToString("N0") + " target + " + decoy_proteins.ToString("N0") + " decoy + " + on_the_fly_decoy_proteins.ToString("N0") + " on-the-fly decoy) proteins");

                    aggregate_psms = new List<PeptideSpectrumMatch>();

                    parents = DetermineSemiAggregateParentFolders(dataFilepaths);
                    if(parents.Count > 0)
                    {
                        num_spectra = new Dictionary<string, int>(dataFilepaths.Count);
                        grouped_aggregate_psms = new Dictionary<string, List<PeptideSpectrumMatch>>(dataFilepaths.Count);
                    }
                }

                summary = new StreamWriter(Path.Combine(outputFolder, "summary.tsv"));
                summary.AutoFlush = true;

                summary.WriteLine("Dataset\tProteins\tMS/MS Spectra\tPSM Morpheus Score Threshold\tTarget PSMs\tDecoy PSMs\tPSM FDR (%)\tUnique Peptide Morpheus Score Threshold\tUnique Target Peptides\tUnique Decoy Peptides\tUnique Peptide FDR (%)\tProtein Group Summed Morpheus Score Threshold\tTarget Protein Groups\tDecoy Protein Groups\tProtein Group FDR (%)");

                foreach(string data_filepath in dataFilepaths)
                {
                    DateTime start = DateTime.Now;

                    OnStartingFile(new FilepathEventArgs(data_filepath));

                    OnUpdateProgress(new ProgressEventArgs(0));

                    log = new StreamWriter(Path.Combine(outputFolder, Path.GetFileNameWithoutExtension(data_filepath) + ".log.txt"));
                    log.AutoFlush = true;

                    log.WriteLine(Program.GetProductNameAndVersion() + " LOG");
                    log.WriteLine();

                    log.WriteLine("PARAMETERS");
                    log.WriteLine("Input Data File: " + data_filepath);
                    log.WriteLine("Unknown Precursor Charge State Range: " + minimumAssumedPrecursorChargeState.ToString("+0;-0;0") + ".." + maximumAssumedPrecursorChargeState.ToString("+0;-0;0"));
                    log.WriteLine("Absolute MS/MS Intensity Threshold: " + (absoluteThreshold >= 0.0 ? absoluteThreshold.ToString(CultureInfo.InvariantCulture) : "disabled"));
                    log.WriteLine("Relative MS/MS Intensity Threshold: " + (relativeThresholdPercent >= 0.0 ? relativeThresholdPercent.ToString(CultureInfo.InvariantCulture) + '%' : "disabled"));
                    log.WriteLine("Maximum Number of MS/MS Peaks: " + (maximumNumberOfPeaks >= 0 ? maximumNumberOfPeaks.ToString() : "disabled"));
                    log.WriteLine("Assign Charge States: " + assignChargeStates.ToString().ToLower());
                    log.WriteLine("De-isotope: " + deisotope.ToString().ToLower());
                    log.WriteLine("Proteome Database: " + proteomeDatabaseFilepath);
                    log.WriteLine("Create Target–Decoy Database On The Fly: " + onTheFlyDecoys.ToString().ToLower());
                    log.WriteLine("Protease: " + protease.ToString());
                    log.WriteLine("Maximum Missed Cleavages: " + maximumMissedCleavages.ToString());
                    log.WriteLine("Initiator Methionine Behavior: " + initiatorMethionineBehavior.ToString().ToLower());
                    log.WriteLine("Fixed Modifications: " + fixed_modifications);
                    log.WriteLine("Variable Modifications: " + variable_modifications);
                    log.WriteLine("Maximum Variable Modification Isoforms Per Peptide: " + maximumVariableModificationIsoforms.ToString());
                    log.WriteLine("Precursor Mass Tolerance: ±" + precursorMassTolerance.Value.ToString(CultureInfo.InvariantCulture) + ' ' + precursorMassTolerance.Units.ToString() + " (" + precursorMassType.ToString().ToLower() + ')');
                    log.WriteLine("Accepted Precursor Mass Errors: " + string.Join("; ", acceptedPrecursorMassErrors) + " Da");
                    log.WriteLine("Product Mass Tolerance: ±" + productMassTolerance.Value.ToString(CultureInfo.InvariantCulture) + ' ' + productMassTolerance.Units.ToString() + " (" + productMassType.ToString().ToLower() + ')');
                    log.WriteLine("Maximum False Discovery Rate: " + (maximumFalseDiscoveryRate * 100).ToString(CultureInfo.InvariantCulture) + '%');
                    log.WriteLine("Consider Modified Forms as Unique Peptides: " + considerModifiedFormsAsUniquePeptides.ToString().ToLower());
                    log.WriteLine("Maximum Threads: " + maximumThreads.ToString());
                    log.WriteLine("Minimize Memory Usage: " + minimizeMemoryUsage.ToString().ToLower());
                    log.WriteLine("Output Folder: " + outputFolder.ToString());
                    log.WriteLine();

                    log.WriteLine("RESULTS");

                    log.WriteLine(total_proteins.ToString("N0") + " total (" + target_proteins.ToString("N0") + " target + " + decoy_proteins.ToString("N0") + " decoy + " + on_the_fly_decoy_proteins.ToString("N0") + " on-the-fly decoy) proteins");

                    OnUpdateStatus(new StatusEventArgs("Extracting and preprocessing MS/MS spectra..."));
                    OnReportTaskWithProgress(EventArgs.Empty);
                    OnUpdateProgress(new ProgressEventArgs(0));

                    TandemMassSpectra spectra = new TandemMassSpectra();

                    spectra.ReportTaskWithoutProgress += HandleReportTaskWithoutProgress;
                    spectra.ReportTaskWithProgress += HandleReportTaskWithProgress;
                    spectra.UpdateProgress += HandleUpdateProgress;

                    spectra.Load(data_filepath, minimumAssumedPrecursorChargeState, maximumAssumedPrecursorChargeState,
                         absoluteThreshold, relativeThresholdPercent, maximumNumberOfPeaks,
                         assignChargeStates, deisotope, productMassTolerance, maximumThreads);

                    spectra.ReportTaskWithoutProgress -= HandleReportTaskWithoutProgress;
                    spectra.ReportTaskWithProgress -= HandleReportTaskWithProgress;
                    spectra.UpdateProgress -= HandleUpdateProgress;

                    if(dataFilepaths.Count > 1)
                    {
                        total_spectra += spectra.Count;
                        if(parents.Count > 0)
                        {
                            num_spectra.Add(data_filepath, spectra.Count);
                        }
                    }

                    OnUpdateStatus(new StatusEventArgs("Searching MS/MS spectra..."));
                    OnReportTaskWithProgress(EventArgs.Empty);
                    OnUpdateProgress(new ProgressEventArgs(0));

                    PeptideSpectrumMatch[] psms = null;
                    if(spectra.Count > 0)
                    {
                        int max_spectrum_number = 0;
                        foreach(TandemMassSpectrum spectrum in spectra)
                        {
                            if(spectrum.SpectrumNumber > max_spectrum_number)
                            {
                                max_spectrum_number = spectrum.SpectrumNumber;
                            }
                        }

                        psms = new PeptideSpectrumMatch[max_spectrum_number];

                        spectra.Sort(TandemMassSpectrum.AscendingPrecursorMassComparison);
                    }

                    Dictionary<string, bool> peptides_observed = null;
                    if(!minimizeMemoryUsage)
                    {
                        peptides_observed = new Dictionary<string, bool>();
                    }

                    num_target_peptides = 0;
                    num_decoy_peptides = 0;

            #if NON_MULTITHREADED
                    int proteins = 0;
                    int old_progress = 0;
                    foreach(Protein protein in ProteomeDatabaseReader.ReadProteins(proteome_database, onTheFlyDecoys, known_variable_modifications))
                    {
                        foreach(Peptide peptide in protein.Digest(protease, maximumMissedCleavages, initiatorMethionineBehavior, null, null))
                        {
                            if(peptide.Target)
                            {
                                num_target_peptides++;
                            }
                            else
                            {
                                num_decoy_peptides++;
                            }

                            if(!minimizeMemoryUsage)
                            {
                                // This block of code is to ensure that (1) we don't re-search the same base leucine peptide sequence more than we need to,
                                // and (2) that we are maximally conservative by calling PSMs decoy whenever possible.
                                // If we haven't already seen this base leucine peptide sequence, add it to the dictionary with a value indicating whether it was decoy or not.
                                // Then perform the search as usual.
                                // If we have already seen it and it was decoy or this time it is target, we don't need to search it again, skip the peptide.
                                // Otherwise, update the dictionary to reflect that we have now seen it as a decoy and perform the search.
                                bool observed_as_decoy = false;
                                if(!peptides_observed.TryGetValue(peptide.BaseLeucineSequence, out observed_as_decoy))
                                {
                                    peptides_observed.Add(peptide.BaseLeucineSequence, peptide.Decoy);
                                }
                                else
                                {
                                    if(observed_as_decoy || peptide.Target)
                                    {
                                        // if the peptide has no known mods we have already searched all its isoforms, skip it
                                        if(peptide.KnownModifications == null || peptide.KnownModifications.Count == 0)
                                        {
                                            continue;
                                        }
                                    }
                                    else
                                    {
                                        peptides_observed[peptide.BaseLeucineSequence] = true;
                                    }
                                }
                            }

                            peptide.SetFixedModifications(fixedModifications);
                            foreach(Peptide modified_peptide in peptide.GetVariablyModifiedPeptides(unknown_variable_modifications, maximumVariableModificationIsoforms))
                            {
                                foreach(TandemMassSpectrum spectrum in precursorMonoisotopicPeakCorrection ?
                                    spectra.GetTandemMassSpectraInMassRange(precursorMassType == MassType.Average ? modified_peptide.AverageMass : modified_peptide.MonoisotopicMass, precursorMassTolerance, minimumPrecursorMonoisotopicPeakOffset, maximumPrecursorMonoisotopicPeakOffset) :
                                    spectra.GetTandemMassSpectraInMassRange(precursorMassType == MassType.Average ? modified_peptide.AverageMass : modified_peptide.MonoisotopicMass, precursorMassTolerance))
                                {
                                    PeptideSpectrumMatch psm = new PeptideSpectrumMatch(spectrum, modified_peptide, productMassTolerance);
                                    PeptideSpectrumMatch current_best_psm = psms[spectrum.SpectrumNumber - 1];
                                    if(current_best_psm == null || PeptideSpectrumMatch.DescendingMorpheusScoreComparison(psm, current_best_psm) < 0)
                                    {
                                        psms[spectrum.SpectrumNumber - 1] = psm;
                                    }
                                }
                            }
                        }

                        proteins++;
                        int new_progress = (int)((double)proteins / total_proteins * 100);
                        if(new_progress > old_progress)
                        {
                            OnUpdateProgress(new ProgressEventArgs(new_progress));
                            old_progress = new_progress;
                        }
                    }
            #else
                    object progress_lock = new object();
                    int proteins = 0;
                    int old_progress = 0;
                    ParallelOptions parallel_options = new ParallelOptions();
                    parallel_options.MaxDegreeOfParallelism = maximumThreads;
                    Parallel.ForEach(ProteomeDatabaseReader.ReadProteins(proteome_database, onTheFlyDecoys, known_variable_modifications), parallel_options, protein =>
                    {
                        foreach(Peptide peptide in protein.Digest(protease, maximumMissedCleavages, initiatorMethionineBehavior, null, null))
                        {
                            if(peptide.Target)
                            {
                                Interlocked.Increment(ref num_target_peptides);
                            }
                            else
                            {
                                Interlocked.Increment(ref num_decoy_peptides);
                            }

                            if(!minimizeMemoryUsage)
                            {
                                // This block of code is to ensure that (1) we don't re-search the same base leucine peptide sequence more than we need to,
                                // and (2) that we are maximally conservative by calling PSMs decoy whenever possible.
                                // If we haven't already seen this base leucine peptide sequence, add it to the dictionary with a value indicating whether it was decoy or not.
                                // Then perform the search as usual.
                                // If we have already seen it and it was decoy or this time it is target, we don't need to search it again, skip the peptide.
                                // Otherwise, update the dictionary to reflect that we have now seen it as a decoy and perform the search.
                                lock(peptides_observed)
                                {
                                    bool observed_as_decoy = false;
                                    if(!peptides_observed.TryGetValue(peptide.BaseLeucineSequence, out observed_as_decoy))
                                    {
                                        peptides_observed.Add(peptide.BaseLeucineSequence, peptide.Decoy);
                                    }
                                    else
                                    {
                                        if(observed_as_decoy || peptide.Target)
                                        {
                                            // if the peptide has no known mods we have already searched all its isoforms, skip it
                                            if(peptide.KnownModifications == null || peptide.KnownModifications.Count == 0)
                                            {
                                                continue;
                                            }
                                        }
                                        else
                                        {
                                            peptides_observed[peptide.BaseLeucineSequence] = true;
                                        }
                                    }
                                }
                            }

                            peptide.SetFixedModifications(fixedModifications);
                            foreach(Peptide modified_peptide in peptide.GetVariablyModifiedPeptides(unknown_variable_modifications, maximumVariableModificationIsoforms))
                            {
                                foreach (TandemMassSpectrum spectrum in spectra.GetTandemMassSpectraInMassRanges(precursorMassType == MassType.Average ? modified_peptide.AverageMass : modified_peptide.MonoisotopicMass, acceptedPrecursorMassErrors, precursorMassTolerance))
                                {
                                    PeptideSpectrumMatch psm = new PeptideSpectrumMatch(spectrum, modified_peptide, productMassTolerance);
                                    lock(psms)
                                    {
                                        PeptideSpectrumMatch current_best_psm = psms[spectrum.SpectrumNumber - 1];
                                        if(current_best_psm == null || PeptideSpectrumMatch.DescendingMorpheusScoreComparison(psm, current_best_psm) < 0)
                                        {
                                            psms[spectrum.SpectrumNumber - 1] = psm;
                                        }
                                    }
                                }
                            }
                        }

                        lock(progress_lock)
                        {
                            proteins++;
                            int new_progress = (int)((double)proteins / total_proteins * 100);
                            if(new_progress > old_progress)
                            {
                                OnUpdateProgress(new ProgressEventArgs(new_progress));
                                old_progress = new_progress;
                            }
                        }
                    });
            #endif

                    OnUpdateStatus(new StatusEventArgs("Performing post-search analyses..."));
                    OnReportTaskWithoutProgress(EventArgs.Empty);
                    OnUpdateProgress(new ProgressEventArgs(0));

                    log.WriteLine((num_target_peptides + num_decoy_peptides).ToString("N0") + " total (" + num_target_peptides.ToString("N0") + " target + " + num_decoy_peptides.ToString("N0") + " decoy) non-unique peptides");
                    decoys_over_targets_peptide_ratio = (double)num_decoy_peptides / num_target_peptides;

                    log.WriteLine(spectra.Count.ToString("N0") + " MS/MS spectra");

                    List<PeptideSpectrumMatch> psms_no_nulls;
                    if(psms != null)
                    {
                        psms_no_nulls = new List<PeptideSpectrumMatch>(psms.Length);
                        foreach(PeptideSpectrumMatch psm in psms)
                        {
                            if(psm != null)
                            {
                                psms_no_nulls.Add(psm);
                            }
                        }

                        if(dataFilepaths.Count > 1)
                        {
                            aggregate_psms.AddRange(psms_no_nulls);
                            if(parents.Count > 0)
                            {
                                grouped_aggregate_psms.Add(data_filepath, psms_no_nulls);
                            }
                        }
                    }
                    else
                    {
                        psms_no_nulls = new List<PeptideSpectrumMatch>(0);
                    }

                    List<PeptideSpectrumMatch> sorted_psms = new List<PeptideSpectrumMatch>(psms_no_nulls);
                    sorted_psms.Sort(PeptideSpectrumMatch.DescendingMorpheusScoreComparison);

                    IEnumerable<IdentificationWithFalseDiscoveryRate<PeptideSpectrumMatch>> psms_with_fdr = FalseDiscoveryRate.DoFalseDiscoveryRateAnalysis(sorted_psms, decoys_over_targets_peptide_ratio);
                    Exporters.WriteToTabDelimitedTextFile(psms_with_fdr, Path.Combine(outputFolder, Path.GetFileNameWithoutExtension(data_filepath) + ".PSMs.tsv"));
                    double psm_score_threshold = double.NegativeInfinity;
                    int target_psms = sorted_psms.Count;
                    int decoy_psms = 0;
                    double psm_fdr = double.NaN;
                    if(decoys_over_targets_peptide_ratio == 0.0)
                    {
                        log.WriteLine(sorted_psms.Count.ToString("N0") + " PSMs (unknown FDR)");
                    }
                    else
                    {
                        FalseDiscoveryRate.DetermineMaximumIdentifications(psms_with_fdr, false, maximumFalseDiscoveryRate, out psm_score_threshold, out target_psms, out decoy_psms, out psm_fdr);
                        log.WriteLine(target_psms.ToString("N0") + " target (" + decoy_psms.ToString("N0") + " decoy) PSMs at " + psm_fdr.ToString("0.000%") + " PSM FDR (" + psm_score_threshold.ToString("0.000") + " Morpheus score threshold)");
                    }

                    Exporters.WritePsmsToPepXmlFile(Path.Combine(outputFolder, Path.GetFileNameWithoutExtension(data_filepath) + ".pep.xml"),
                        data_filepath,
                        minimumAssumedPrecursorChargeState, maximumAssumedPrecursorChargeState,
                        absoluteThreshold, relativeThresholdPercent, maximumNumberOfPeaks,
                        assignChargeStates, deisotope,
                        proteomeDatabaseFilepath, onTheFlyDecoys, target_proteins,
                        protease, maximumMissedCleavages, initiatorMethionineBehavior,
                        fixedModifications, fixed_modifications, variableModifications, variable_modifications, maximumVariableModificationIsoforms,
                        precursorMassTolerance, precursorMassType,
                        acceptedPrecursorMassErrors,
                        productMassTolerance, productMassType,
                        maximumFalseDiscoveryRate, considerModifiedFormsAsUniquePeptides,
                        maximumThreads, minimizeMemoryUsage,
                        outputFolder,
                        psms_with_fdr);

                    Dictionary<string, PeptideSpectrumMatch> peptides = new Dictionary<string, PeptideSpectrumMatch>();

                    foreach(PeptideSpectrumMatch psm in sorted_psms)
                    {
                        if(!peptides.ContainsKey(considerModifiedFormsAsUniquePeptides ? psm.Peptide.LeucineSequence : psm.Peptide.BaseLeucineSequence))
                        {
                            peptides.Add(considerModifiedFormsAsUniquePeptides ? psm.Peptide.LeucineSequence : psm.Peptide.BaseLeucineSequence, psm);
                        }
                    }

                    List<PeptideSpectrumMatch> sorted_peptides = new List<PeptideSpectrumMatch>(peptides.Values);
                    sorted_peptides.Sort(PeptideSpectrumMatch.DescendingMorpheusScoreComparison);

                    IEnumerable<IdentificationWithFalseDiscoveryRate<PeptideSpectrumMatch>> peptides_with_fdr = FalseDiscoveryRate.DoFalseDiscoveryRateAnalysis(sorted_peptides, decoys_over_targets_peptide_ratio);
                    Exporters.WriteToTabDelimitedTextFile(peptides_with_fdr, Path.Combine(outputFolder, Path.GetFileNameWithoutExtension(data_filepath) + ".unique_peptides.tsv"));
                    double peptide_score_threshold = double.NegativeInfinity;
                    int target_peptides = sorted_peptides.Count;
                    int decoy_peptides = 0;
                    double peptide_fdr = double.NaN;
                    if(decoys_over_targets_peptide_ratio == 0.0)
                    {
                        log.WriteLine(sorted_peptides.Count.ToString("N0") + " unique peptides (unknown FDR)");
                    }
                    else
                    {
                        FalseDiscoveryRate.DetermineMaximumIdentifications(peptides_with_fdr, false, maximumFalseDiscoveryRate, out peptide_score_threshold, out target_peptides, out decoy_peptides, out peptide_fdr);
                        log.WriteLine(target_peptides.ToString("N0") + " unique target (" + decoy_peptides.ToString("N0") + " decoy) peptides at " + peptide_fdr.ToString("0.000%") + " unique peptide FDR (" + peptide_score_threshold.ToString("0.000") + " Morpheus score threshold)");
                    }

                    List<ProteinGroup> protein_groups = ProteinGroup.ApplyProteinParsimony(sorted_psms, peptide_score_threshold, proteome_database, onTheFlyDecoys, known_variable_modifications, protease, maximumMissedCleavages, initiatorMethionineBehavior, maximumThreads);

                    IEnumerable<IdentificationWithFalseDiscoveryRate<ProteinGroup>> protein_groups_with_fdr = FalseDiscoveryRate.DoFalseDiscoveryRateAnalysis(protein_groups, decoys_over_targets_protein_ratio);
                    Exporters.WriteToTabDelimitedTextFile(protein_groups_with_fdr, Path.Combine(outputFolder, Path.GetFileNameWithoutExtension(data_filepath) + ".protein_groups.tsv"));
                    double protein_group_score_threshold = double.NegativeInfinity;
                    int target_protein_groups = protein_groups.Count;
                    int decoy_protein_groups = 0;
                    double protein_group_fdr = double.NaN;
                    if(decoys_over_targets_protein_ratio == 0.0)
                    {
                        log.WriteLine(protein_groups.Count.ToString("N0") + " protein groups (unknown FDR)");
                    }
                    else
                    {
                        FalseDiscoveryRate.DetermineMaximumIdentifications(protein_groups_with_fdr, false, maximumFalseDiscoveryRate, out protein_group_score_threshold, out target_protein_groups, out decoy_protein_groups, out protein_group_fdr);
                        log.WriteLine(target_protein_groups.ToString("N0") + " target (" + decoy_protein_groups.ToString("N0") + " decoy) protein groups at " + protein_group_fdr.ToString("0.000%") + " protein group FDR (" + protein_group_score_threshold.ToString("0.000") + " summed Morpheus score threshold)");
                    }

                    Exporters.WriteMZIdentMLFile(Path.Combine(outputFolder, Path.GetFileNameWithoutExtension(data_filepath) + ".mzid"),
                        new string[] { data_filepath },
                        minimumAssumedPrecursorChargeState, maximumAssumedPrecursorChargeState,
                        absoluteThreshold, relativeThresholdPercent, maximumNumberOfPeaks,
                        assignChargeStates, deisotope,
                        proteomeDatabaseFilepath, proteome_database, onTheFlyDecoys, target_proteins,
                        protease, maximumMissedCleavages, initiatorMethionineBehavior,
                        fixedModifications, variableModifications, maximumVariableModificationIsoforms,
                        precursorMassTolerance, precursorMassType,
                        acceptedPrecursorMassErrors,
                        productMassTolerance, productMassType,
                        maximumFalseDiscoveryRate, considerModifiedFormsAsUniquePeptides,
                        maximumThreads, minimizeMemoryUsage,
                        outputFolder,
                        psms_with_fdr,
                        protein_groups_with_fdr);

                    DateTime stop = DateTime.Now;
                    log.WriteLine((stop - start).TotalMinutes.ToString("0.00") + " minutes to analyze");

                    log.Close();

                    summary.Write(data_filepath + '\t');
                    summary.Write(proteins.ToString() + '\t');
                    summary.Write(spectra.Count.ToString() + '\t');
                    summary.Write(psm_score_threshold.ToString("0.000") + '\t');
                    summary.Write(target_psms.ToString() + '\t');
                    summary.Write(decoy_psms.ToString() + '\t');
                    summary.Write(psm_fdr.ToString("0.000%") + '\t');
                    summary.Write(peptide_score_threshold.ToString("0.000") + '\t');
                    summary.Write(target_peptides.ToString() + '\t');
                    summary.Write(decoy_peptides.ToString() + '\t');
                    summary.Write(peptide_fdr.ToString("0.000%") + '\t');
                    summary.Write(protein_group_score_threshold.ToString("0.000") + '\t');
                    summary.Write(target_protein_groups.ToString() + '\t');
                    summary.Write(decoy_protein_groups.ToString() + '\t');
                    summary.Write(protein_group_fdr.ToString("0.000%") + '\t');
                    summary.WriteLine();

                    OnFinishedFile(new FilepathEventArgs(data_filepath));
                }

                if(dataFilepaths.Count > 1)
                {
                    OnUpdateStatus(new StatusEventArgs("Performing aggregate post-search analyses..."));
                    OnReportTaskWithoutProgress(EventArgs.Empty);
                    OnUpdateProgress(new ProgressEventArgs(0));

                    overall_log.WriteLine((num_target_peptides + num_decoy_peptides).ToString("N0") + " total (" + num_target_peptides.ToString("N0") + " target + " + num_decoy_peptides.ToString("N0") + " decoy) non-unique peptides");

                    HashSet<string> prefixes = new HashSet<string>();
                    prefixes.Add("aggregate");
                    foreach(KeyValuePair<string, HashSet<string>> kvp in parents)
                    {
                        DirectoryInfo directory_info = new DirectoryInfo(kvp.Key.Replace("*", null));
                        string prefix = directory_info.Name.Replace(@":\", null);
                        int id = 1;
                        while(prefixes.Contains(prefix))
                        {
                            prefix = directory_info.Name + '#' + id.ToString();
                            id++;
                        }

                        int semi_aggregate_spectra = 0;
                        List<PeptideSpectrumMatch> semi_aggregate_psms = new List<PeptideSpectrumMatch>();
                        foreach(string data_filepath in kvp.Value)
                        {
                            semi_aggregate_spectra += num_spectra[data_filepath];
                            semi_aggregate_psms.AddRange(grouped_aggregate_psms[data_filepath]);
                        }

                        overall_log.WriteLine(semi_aggregate_spectra.ToString("N0") + " MS/MS spectra in " + kvp.Key);

                        semi_aggregate_psms.Sort(PeptideSpectrumMatch.DescendingMorpheusScoreComparison);

                        IEnumerable<IdentificationWithFalseDiscoveryRate<PeptideSpectrumMatch>> semi_aggregate_psms_with_fdr = FalseDiscoveryRate.DoFalseDiscoveryRateAnalysis(semi_aggregate_psms, decoys_over_targets_peptide_ratio);
                        Exporters.WriteToTabDelimitedTextFile(semi_aggregate_psms_with_fdr, Path.Combine(outputFolder, prefix + ".PSMs.tsv"));
                        double semi_aggregate_psm_score_threshold;
                        int semi_aggregate_target_psms;
                        int semi_aggregate_decoy_psms;
                        double semi_aggregate_psm_fdr;
                        FalseDiscoveryRate.DetermineMaximumIdentifications(semi_aggregate_psms_with_fdr, false, maximumFalseDiscoveryRate, out semi_aggregate_psm_score_threshold, out semi_aggregate_target_psms, out semi_aggregate_decoy_psms, out semi_aggregate_psm_fdr);
                        overall_log.WriteLine(semi_aggregate_target_psms.ToString("N0") + " target (" + semi_aggregate_decoy_psms.ToString("N0") + " decoy) PSMs at " + semi_aggregate_psm_fdr.ToString("0.000%") + " PSM FDR (" + semi_aggregate_psm_score_threshold.ToString("0.000") + " Morpheus score threshold) in " + kvp.Key);

                        Dictionary<string, PeptideSpectrumMatch> semi_aggregate_peptides = new Dictionary<string, PeptideSpectrumMatch>();

                        foreach(PeptideSpectrumMatch psm in semi_aggregate_psms)
                        {
                            if(!semi_aggregate_peptides.ContainsKey(considerModifiedFormsAsUniquePeptides ? psm.Peptide.LeucineSequence : psm.Peptide.BaseLeucineSequence))
                            {
                                semi_aggregate_peptides.Add(considerModifiedFormsAsUniquePeptides ? psm.Peptide.LeucineSequence : psm.Peptide.BaseLeucineSequence, psm);
                            }
                        }

                        List<PeptideSpectrumMatch> semi_aggregate_sorted_peptides = new List<PeptideSpectrumMatch>(semi_aggregate_peptides.Values);
                        semi_aggregate_sorted_peptides.Sort(PeptideSpectrumMatch.DescendingMorpheusScoreComparison);

                        IEnumerable<IdentificationWithFalseDiscoveryRate<PeptideSpectrumMatch>> semi_aggregate_peptides_with_fdr = FalseDiscoveryRate.DoFalseDiscoveryRateAnalysis(semi_aggregate_sorted_peptides, decoys_over_targets_peptide_ratio);
                        Exporters.WriteToTabDelimitedTextFile(semi_aggregate_peptides_with_fdr, Path.Combine(outputFolder, prefix + ".unique_peptides.tsv"));
                        double semi_aggregate_peptide_score_threshold;
                        int semi_aggregate_target_peptides;
                        int semi_aggregate_decoy_peptides;
                        double semi_aggregate_peptide_fdr;
                        FalseDiscoveryRate.DetermineMaximumIdentifications(semi_aggregate_peptides_with_fdr, false, maximumFalseDiscoveryRate, out semi_aggregate_peptide_score_threshold, out semi_aggregate_target_peptides, out semi_aggregate_decoy_peptides, out semi_aggregate_peptide_fdr);
                        overall_log.WriteLine(semi_aggregate_target_peptides.ToString("N0") + " unique target (" + semi_aggregate_decoy_peptides.ToString("N0") + " decoy) peptides at " + semi_aggregate_peptide_fdr.ToString("0.000%") + " unique peptide FDR (" + semi_aggregate_peptide_score_threshold.ToString("0.000") + " Morpheus score threshold) in " + kvp.Key);

                        List<ProteinGroup> semi_aggregate_protein_groups = ProteinGroup.ApplyProteinParsimony(semi_aggregate_psms, semi_aggregate_peptide_score_threshold, proteome_database, onTheFlyDecoys, known_variable_modifications, protease, maximumMissedCleavages, initiatorMethionineBehavior, maximumThreads);

                        IEnumerable<IdentificationWithFalseDiscoveryRate<ProteinGroup>> semi_aggregate_protein_groups_with_fdr = FalseDiscoveryRate.DoFalseDiscoveryRateAnalysis(semi_aggregate_protein_groups, decoys_over_targets_protein_ratio);
                        Exporters.WriteToTabDelimitedTextFile(semi_aggregate_protein_groups_with_fdr, Path.Combine(outputFolder, prefix + ".protein_groups.tsv"));
                        double semi_aggregate_protein_group_score_threshold;
                        int semi_aggregate_target_protein_groups;
                        int semi_aggregate_decoy_protein_groups;
                        double semi_aggregate_protein_group_fdr;
                        FalseDiscoveryRate.DetermineMaximumIdentifications(semi_aggregate_protein_groups_with_fdr, false, maximumFalseDiscoveryRate, out semi_aggregate_protein_group_score_threshold, out semi_aggregate_target_protein_groups, out semi_aggregate_decoy_protein_groups, out semi_aggregate_protein_group_fdr);
                        overall_log.WriteLine(semi_aggregate_target_protein_groups.ToString("N0") + " target (" + semi_aggregate_decoy_protein_groups.ToString("N0") + " decoy) protein groups at " + semi_aggregate_protein_group_fdr.ToString("0.000%") + " protein group FDR (" + semi_aggregate_protein_group_score_threshold.ToString("0.000") + " summed Morpheus score threshold) in " + kvp.Key);

                        Exporters.WriteMZIdentMLFile(Path.Combine(outputFolder, prefix + ".mzid"),
                            kvp.Value,
                            minimumAssumedPrecursorChargeState, maximumAssumedPrecursorChargeState,
                            absoluteThreshold, relativeThresholdPercent, maximumNumberOfPeaks,
                            assignChargeStates, deisotope,
                            proteomeDatabaseFilepath, proteome_database, onTheFlyDecoys, target_proteins,
                            protease, maximumMissedCleavages, initiatorMethionineBehavior,
                            fixedModifications, variableModifications, maximumVariableModificationIsoforms,
                            precursorMassTolerance, precursorMassType,
                            acceptedPrecursorMassErrors,
                            productMassTolerance, productMassType,
                            maximumFalseDiscoveryRate, considerModifiedFormsAsUniquePeptides,
                            maximumThreads, minimizeMemoryUsage,
                            outputFolder,
                            semi_aggregate_psms_with_fdr,
                            semi_aggregate_protein_groups_with_fdr);

                        summary.Write(kvp.Key + '\t');
                        summary.Write(total_proteins.ToString() + '\t');
                        summary.Write(semi_aggregate_spectra.ToString() + '\t');
                        summary.Write(semi_aggregate_psm_score_threshold.ToString("0.000") + '\t');
                        summary.Write(semi_aggregate_target_psms.ToString() + '\t');
                        summary.Write(semi_aggregate_decoy_psms.ToString() + '\t');
                        summary.Write(semi_aggregate_psm_fdr.ToString("0.000%") + '\t');
                        summary.Write(semi_aggregate_peptide_score_threshold.ToString("0.000") + '\t');
                        summary.Write(semi_aggregate_target_peptides.ToString() + '\t');
                        summary.Write(semi_aggregate_decoy_peptides.ToString() + '\t');
                        summary.Write(semi_aggregate_peptide_fdr.ToString("0.000%") + '\t');
                        summary.Write(semi_aggregate_protein_group_score_threshold.ToString("0.000") + '\t');
                        summary.Write(semi_aggregate_target_protein_groups.ToString() + '\t');
                        summary.Write(semi_aggregate_decoy_protein_groups.ToString() + '\t');
                        summary.Write(semi_aggregate_protein_group_fdr.ToString("0.000%") + '\t');
                        summary.WriteLine();
                    }

                    overall_log.WriteLine(total_spectra.ToString("N0") + " MS/MS spectra");

                    aggregate_psms.Sort(PeptideSpectrumMatch.DescendingMorpheusScoreComparison);

                    IEnumerable<IdentificationWithFalseDiscoveryRate<PeptideSpectrumMatch>> aggregate_psms_with_fdr = FalseDiscoveryRate.DoFalseDiscoveryRateAnalysis(aggregate_psms, decoys_over_targets_peptide_ratio);
                    Exporters.WriteToTabDelimitedTextFile(aggregate_psms_with_fdr, Path.Combine(outputFolder, "aggregate.PSMs.tsv"));
                    double aggregate_psm_score_threshold;
                    int aggregate_target_psms;
                    int aggregate_decoy_psms;
                    double aggregate_psm_fdr;
                    FalseDiscoveryRate.DetermineMaximumIdentifications(aggregate_psms_with_fdr, false, maximumFalseDiscoveryRate, out aggregate_psm_score_threshold, out aggregate_target_psms, out aggregate_decoy_psms, out aggregate_psm_fdr);
                    overall_log.WriteLine(aggregate_target_psms.ToString("N0") + " target (" + aggregate_decoy_psms.ToString("N0") + " decoy) aggregate PSMs at " + aggregate_psm_fdr.ToString("0.000%") + " PSM FDR (" + aggregate_psm_score_threshold.ToString("0.000") + " Morpheus score threshold)");

                    Dictionary<string, PeptideSpectrumMatch> aggregate_peptides = new Dictionary<string, PeptideSpectrumMatch>();

                    foreach(PeptideSpectrumMatch psm in aggregate_psms)
                    {
                        if(!aggregate_peptides.ContainsKey(considerModifiedFormsAsUniquePeptides ? psm.Peptide.LeucineSequence : psm.Peptide.BaseLeucineSequence))
                        {
                            aggregate_peptides.Add(considerModifiedFormsAsUniquePeptides ? psm.Peptide.LeucineSequence : psm.Peptide.BaseLeucineSequence, psm);
                        }
                    }

                    List<PeptideSpectrumMatch> aggregate_sorted_peptides = new List<PeptideSpectrumMatch>(aggregate_peptides.Values);
                    aggregate_sorted_peptides.Sort(PeptideSpectrumMatch.DescendingMorpheusScoreComparison);

                    IEnumerable<IdentificationWithFalseDiscoveryRate<PeptideSpectrumMatch>> aggregate_peptides_with_fdr = FalseDiscoveryRate.DoFalseDiscoveryRateAnalysis(aggregate_sorted_peptides, decoys_over_targets_peptide_ratio);
                    Exporters.WriteToTabDelimitedTextFile(aggregate_peptides_with_fdr, Path.Combine(outputFolder, "aggregate.unique_peptides.tsv"));
                    double aggregate_peptide_score_threshold;
                    int aggregate_target_peptides;
                    int aggregate_decoy_peptides;
                    double aggregate_peptide_fdr;
                    FalseDiscoveryRate.DetermineMaximumIdentifications(aggregate_peptides_with_fdr, false, maximumFalseDiscoveryRate, out aggregate_peptide_score_threshold, out aggregate_target_peptides, out aggregate_decoy_peptides, out aggregate_peptide_fdr);
                    overall_log.WriteLine(aggregate_target_peptides.ToString("N0") + " unique target (" + aggregate_decoy_peptides.ToString("N0") + " decoy) aggregate peptides at " + aggregate_peptide_fdr.ToString("0.000%") + " unique peptide FDR (" + aggregate_peptide_score_threshold.ToString("0.000") + " Morpheus score threshold)");

                    List<ProteinGroup> aggregate_protein_groups = ProteinGroup.ApplyProteinParsimony(aggregate_psms, aggregate_peptide_score_threshold, proteome_database, onTheFlyDecoys, known_variable_modifications, protease, maximumMissedCleavages, initiatorMethionineBehavior, maximumThreads);

                    IEnumerable<IdentificationWithFalseDiscoveryRate<ProteinGroup>> aggregate_protein_groups_with_fdr = FalseDiscoveryRate.DoFalseDiscoveryRateAnalysis(aggregate_protein_groups, decoys_over_targets_protein_ratio);
                    Exporters.WriteToTabDelimitedTextFile(aggregate_protein_groups_with_fdr, Path.Combine(outputFolder, "aggregate.protein_groups.tsv"));
                    double aggregate_protein_group_score_threshold;
                    int aggregate_target_protein_groups;
                    int aggregate_decoy_protein_groups;
                    double aggregate_protein_group_fdr;
                    FalseDiscoveryRate.DetermineMaximumIdentifications(aggregate_protein_groups_with_fdr, false, maximumFalseDiscoveryRate, out aggregate_protein_group_score_threshold, out aggregate_target_protein_groups, out aggregate_decoy_protein_groups, out aggregate_protein_group_fdr);
                    overall_log.WriteLine(aggregate_target_protein_groups.ToString("N0") + " target (" + aggregate_decoy_protein_groups.ToString("N0") + " decoy) aggregate protein groups at " + aggregate_protein_group_fdr.ToString("0.000%") + " protein group FDR (" + aggregate_protein_group_score_threshold.ToString("0.000") + " summed Morpheus score threshold)");

                    Exporters.WriteMZIdentMLFile(Path.Combine(outputFolder, "aggregate.mzid"),
                        dataFilepaths,
                        minimumAssumedPrecursorChargeState, maximumAssumedPrecursorChargeState,
                        absoluteThreshold, relativeThresholdPercent, maximumNumberOfPeaks,
                        assignChargeStates, deisotope,
                        proteomeDatabaseFilepath, proteome_database, onTheFlyDecoys, target_proteins,
                        protease, maximumMissedCleavages, initiatorMethionineBehavior,
                        fixedModifications, variableModifications, maximumVariableModificationIsoforms,
                        precursorMassTolerance, precursorMassType,
                        acceptedPrecursorMassErrors,
                        productMassTolerance, productMassType,
                        maximumFalseDiscoveryRate, considerModifiedFormsAsUniquePeptides,
                        maximumThreads, minimizeMemoryUsage,
                        outputFolder,
                        aggregate_psms_with_fdr,
                        aggregate_protein_groups_with_fdr);

                    DateTime overall_stop = DateTime.Now;
                    overall_log.WriteLine((overall_stop - overall_start).TotalMinutes.ToString("0.00") + " minutes to analyze");

                    overall_log.Close();

                    summary.Write("AGGREGATE" + '\t');
                    summary.Write(total_proteins.ToString() + '\t');
                    summary.Write(total_spectra.ToString() + '\t');
                    summary.Write(aggregate_psm_score_threshold.ToString("0.000") + '\t');
                    summary.Write(aggregate_target_psms.ToString() + '\t');
                    summary.Write(aggregate_decoy_psms.ToString() + '\t');
                    summary.Write(aggregate_psm_fdr.ToString("0.000%") + '\t');
                    summary.Write(aggregate_peptide_score_threshold.ToString("0.000") + '\t');
                    summary.Write(aggregate_target_peptides.ToString() + '\t');
                    summary.Write(aggregate_decoy_peptides.ToString() + '\t');
                    summary.Write(aggregate_peptide_fdr.ToString("0.000%") + '\t');
                    summary.Write(aggregate_protein_group_score_threshold.ToString("0.000") + '\t');
                    summary.Write(aggregate_target_protein_groups.ToString() + '\t');
                    summary.Write(aggregate_decoy_protein_groups.ToString() + '\t');
                    summary.Write(aggregate_protein_group_fdr.ToString("0.000%") + '\t');
                    summary.WriteLine();
                }

                proteome_database.Close();

                summary.Close();
            #if !NO_EXCEPTION_HANDLING
            }
            catch(Exception ex)
            {
                if(overall_log != null && overall_log.BaseStream != null && overall_log.BaseStream.CanWrite)
                {
                    overall_log.WriteLine(ex.ToString());
                }
                if(log != null && log.BaseStream != null && log.BaseStream.CanWrite)
                {
                    log.WriteLine(ex.ToString());
                }
                OnThrowException(new ExceptionEventArgs(ex));
            }
            finally
            {
            #endif
                if(overall_log != null)
                {
                    overall_log.Close();
                }
                if(summary != null)
                {
                    summary.Close();
                }
                if(log != null)
                {
                    log.Close();
                }
                if(proteome_database != null)
                {
                    proteome_database.Close();
                }
            #if !NO_EXCEPTION_HANDLING
            }
            #endif
        }