public static void TestModernSearchEngineTopDown()
        {
            CommonParameters CommonParameters = new CommonParameters(
                digestionParams: new DigestionParams(protease: "top-down"),
                scoreCutoff: 1,
                assumeOrphanPeaksAreZ1Fragments: false);

            var variableModifications = new List <Modification>();
            var fixedModifications    = new List <Modification>();
            var proteinList           = new List <Protein>
            {
                new Protein("MPKVYSYQEVAEHNGPENFWIIIDDKVYDVSQFKDEHPGGDEIIMDLGGQDATESFVDIGHSDEALRLLKGLYIGDVDKTSERVSVEKVSTSENQSKGSGTLVVILAILMLGVAYYLLNE", "P40312")
            };

            var myMsDataFile = Mzml.LoadAllStaticData(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TopDownTestData\slicedTDYeast.mzML"));

            var searchMode = new SinglePpmAroundZeroSearchMode(5);

            Tolerance DeconvolutionMassTolerance = new PpmTolerance(5);

            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray();

            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];

            var indexEngine  = new IndexingEngine(proteinList, variableModifications, fixedModifications, null, 1, DecoyType.Reverse, CommonParameters, 30000, false, new List <FileInfo>(), new List <string>());
            var indexResults = (IndexingResults)indexEngine.Run();

            new ModernSearchEngine(allPsmsArray, listOfSortedms2Scans, indexResults.PeptideIndex, indexResults.FragmentIndex, 0, CommonParameters, searchMode, 0, new List <string>()).Run();

            var psm = allPsmsArray.Where(p => p != null).FirstOrDefault();

            Assert.That(psm.MatchedFragmentIons.Count > 50);
        }
Example #2
0
        public static void TestClassicSearchEngineTopDown()
        {
            CommonParameters CommonParameters = new CommonParameters(
                digestionParams: new DigestionParams(protease: "top-down"),
                scoreCutoff: 1,
                assumeOrphanPeaksAreZ1Fragments: false);

            MetaMorpheusTask.DetermineAnalyteType(CommonParameters);

            // test output file name (should be proteoform and not peptide)
            Assert.That(GlobalVariables.AnalyteType == "Proteoform");

            var variableModifications = new List <Modification>();
            var fixedModifications    = new List <Modification>();
            var proteinList           = new List <Protein>
            {
                new Protein("MPKVYSYQEVAEHNGPENFWIIIDDKVYDVSQFKDEHPGGDEIIMDLGGQDATESFVDIGHSDEALRLLKGLYIGDVDKTSERVSVEKVSTSENQSKGSGTLVVILAILMLGVAYYLLNE", "P40312")
            };

            var myMsDataFile = Mzml.LoadAllStaticData(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TopDownTestData\slicedTDYeast.mzML"));

            var searchMode = new SinglePpmAroundZeroSearchMode(5);

            Tolerance DeconvolutionMassTolerance = new PpmTolerance(5);

            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray();

            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
                                    proteinList, searchMode, CommonParameters, null, null, new List <string>()).Run();

            var psm = allPsmsArray.Where(p => p != null).FirstOrDefault();

            Assert.That(psm.MatchedFragmentIons.Count == 47);
        }
Example #3
0
        public static void ReplaceBadStdevTwo()
        {
            //here we are adding a really hydrophobic psm at the same time as a regular peptide so that there is a big difference in their computed hydrophobicities. The stdev of these hydrophobicities is out of whach the the collective and so it needs to get replaced by the global average

            var              variableModifications = new List <Modification>();
            var              fixedModifications    = new List <Modification>();
            var              origDataFile          = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML");
            MyFileManager    myFileManager         = new MyFileManager(true);
            CommonParameters CommonParameters      = new CommonParameters(digestionParams: new DigestionParams());
            var              myMsDataFile          = myFileManager.LoadFile(origDataFile, CommonParameters);
            var              searchModes           = new SinglePpmAroundZeroSearchMode(5);
            List <Protein>   proteinList           = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\hela_snip_for_unitTest.fasta"), true, DecoyType.Reverse, false, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex,
                                                                                      ProteinDbLoader.UniprotOrganismRegex, out var dbErrors, -1);
            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, @"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML", CommonParameters).OrderBy(b => b.PrecursorMass).ToArray();

            //adding a new scan that creates a psm at an isolated retention time. This will ultimately cause PEP to replace its retention time standard deviation "Z-score" with the global average.
            Ms2ScanWithSpecificMass topMs2Scan = listOfSortedms2Scans[395];
            int        newOneBasedScanNumber   = 1000;
            MzRange    range             = new MzRange(topMs2Scan.TheScan.MassSpectrum.XArray.Min(), topMs2Scan.TheScan.MassSpectrum.XArray.Max());
            MzSpectrum mzs               = new MzSpectrum(topMs2Scan.TheScan.MassSpectrum.XArray, topMs2Scan.TheScan.MassSpectrum.YArray, true);
            double     newRetentionTime  = topMs2Scan.TheScan.RetentionTime - 25;
            MsDataScan msd               = new MsDataScan(mzs, newOneBasedScanNumber, 2, topMs2Scan.TheScan.IsCentroid, Polarity.Positive, newRetentionTime, range, "", MZAnalyzerType.Orbitrap, topMs2Scan.TheScan.TotalIonCurrent, topMs2Scan.TheScan.InjectionTime, topMs2Scan.TheScan.NoiseData, "", topMs2Scan.TheScan.SelectedIonMZ, topMs2Scan.TheScan.SelectedIonChargeStateGuess, topMs2Scan.TheScan.SelectedIonIntensity, topMs2Scan.TheScan.IsolationMz, topMs2Scan.TheScan.IsolationWidth, DissociationType.HCD, topMs2Scan.TheScan.OneBasedPrecursorScanNumber, topMs2Scan.TheScan.SelectedIonMonoisotopicGuessMz);
            Ms2ScanWithSpecificMass mwsm = new Ms2ScanWithSpecificMass(msd, topMs2Scan.PrecursorMonoisotopicPeakMz, topMs2Scan.PrecursorCharge, topMs2Scan.FullFilePath, new CommonParameters(), topMs2Scan.ExperimentalFragments);

            Ms2ScanWithSpecificMass[] extendedArray = new Ms2ScanWithSpecificMass[listOfSortedms2Scans.Length + 1];
            for (int i = 0; i < listOfSortedms2Scans.Length; i++)
            {
                extendedArray[i] = listOfSortedms2Scans[i];
            }
            extendedArray[listOfSortedms2Scans.Length] = mwsm;

            extendedArray = extendedArray.OrderBy(b => b.PrecursorMass).ToArray();

            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[extendedArray.Length];
            new ClassicSearchEngine(allPsmsArray, extendedArray, variableModifications, fixedModifications, null, null, null, proteinList, searchModes, CommonParameters, new List <string>()).Run();

            List <PeptideSpectralMatch> nonNullPsms = allPsmsArray.Where(p => p != null).ToList();

            nonNullPsms = nonNullPsms.OrderByDescending(p => p.Score).ToList();
            List <PeptideSpectralMatch> psmBloated = new List <PeptideSpectralMatch>();

            psmBloated.AddRange(nonNullPsms);
            int arrayMax = nonNullPsms.Count;

            psmBloated.AddRange(nonNullPsms.GetRange(2, arrayMax - 2));
            psmBloated.AddRange(nonNullPsms.GetRange(2, arrayMax - 2));


            PeptideSpectralMatch pp = psmBloated.Where(p => p.ScanRetentionTime < (newRetentionTime + 1)).First();

            PeptideWithSetModifications newPwsmTwo = new PeptideWithSetModifications(new Protein("WAGVLPWFPWAAVVWGFWF", "ACCESSION", "ORGANISM"), new DigestionParams(), 1, 2, CleavageSpecificity.Full, "", 0, new Dictionary <int, Modification>(), 0);
            PeptideSpectralMatch        newPsmTwo  = new PeptideSpectralMatch(newPwsmTwo, pp.BestMatchingPeptides.First().Notch, pp.Score, pp.ScanIndex, mwsm, new DigestionParams(), pp.MatchedFragmentIons);

            psmBloated.Add(newPsmTwo);

            FdrAnalysisResults fdrResultsClassicDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(psmBloated.Where(p => p != null).ToList(), 1, CommonParameters, new List <string>()).Run());
        }
Example #4
0
        private DataPointAquisitionResults GetDataAcquisitionResults(MsDataFile myMsDataFile, string currentDataFile, List <Modification> variableModifications, List <Modification> fixedModifications, List <Protein> proteinList, string taskId, CommonParameters combinedParameters, Tolerance initPrecTol, Tolerance initProdTol)
        {
            var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(currentDataFile);
            MassDiffAcceptor searchMode  = initPrecTol is PpmTolerance ?
                                           (MassDiffAcceptor) new SinglePpmAroundZeroSearchMode(initPrecTol.Value) :
                                           new SingleAbsoluteAroundZeroSearchMode(initPrecTol.Value);

            var listOfSortedms2Scans = GetMs2Scans(myMsDataFile, currentDataFile, combinedParameters).OrderBy(b => b.PrecursorMass).ToArray();

            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];

            Log("Searching with searchMode: " + searchMode, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            });
            Log("Searching with productMassTolerance: " + initProdTol, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            });

            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null, proteinList, searchMode, combinedParameters, this.FileSpecificParameters, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();
            List <PeptideSpectralMatch> allPsms = allPsmsArray.Where(b => b != null).ToList();

            allPsms = allPsms.OrderByDescending(b => b.Score)
                      .ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue)
                      .GroupBy(b => (b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();

            new FdrAnalysisEngine(allPsms, searchMode.NumNotches, CommonParameters, this.FileSpecificParameters, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            List <PeptideSpectralMatch> goodIdentifications = allPsms.Where(b => b.FdrInfo.QValueNotch < 0.001 && !b.IsDecoy && b.FullSequence != null).ToList();

            if (!goodIdentifications.Any())
            {
                return(new DataPointAquisitionResults(null, new List <PeptideSpectralMatch>(), new List <LabeledDataPoint>(), new List <LabeledDataPoint>(), 0, 0, 0, 0));
            }

            DataPointAquisitionResults currentResult = (DataPointAquisitionResults) new DataPointAcquisitionEngine(
                goodIdentifications,
                myMsDataFile,
                initPrecTol,
                CalibrationParameters.MinMS1IsotopicPeaksNeededForConfirmedIdentification,
                CommonParameters,
                this.FileSpecificParameters,
                new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            return(currentResult);
        }
Example #5
0
        public static void RemoveAmbiguousPeptides()
        {
            var              variableModifications = new List <Modification>();
            var              fixedModifications    = new List <Modification>();
            var              origDataFile          = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML");
            MyFileManager    myFileManager         = new MyFileManager(true);
            CommonParameters CommonParameters      = new CommonParameters(digestionParams: new DigestionParams());
            var              myMsDataFile          = myFileManager.LoadFile(origDataFile, CommonParameters);
            var              searchModes           = new SinglePpmAroundZeroSearchMode(5);
            List <Protein>   proteinList           = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\hela_snip_for_unitTest.fasta"), true, DecoyType.Reverse, false, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex,
                                                                                      ProteinDbLoader.UniprotOrganismRegex, out var dbErrors, -1);
            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, @"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML", CommonParameters).OrderBy(b => b.PrecursorMass).ToArray();

            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null, proteinList, searchModes, CommonParameters, new List <string>()).Run();

            var nonNullPsms = allPsmsArray.Where(p => p != null).ToList();

            nonNullPsms.OrderByDescending(p => p.Score);
            var maxScore = nonNullPsms.Select(n => n.Score).Max();
            PeptideSpectralMatch maxScorePsm = nonNullPsms.Where(n => n.Score == maxScore).First();

            Protein newProteinToRemove = new Protein("RREMVE", "BUBBA", isDecoy: false);
            PeptideWithSetModifications pwsmToRemove = new PeptideWithSetModifications(newProteinToRemove, new DigestionParams(), 1, 6, CleavageSpecificity.Full, "peptideDescription", 2, new Dictionary <int, Modification>(), 1, "RREMVE");

            maxScorePsm.AddOrReplace(pwsmToRemove, maxScore, 1, true, maxScorePsm.MatchedFragmentIons, maxScore);
            maxScorePsm.ResolveAllAmbiguities();

            List <PeptideSpectralMatch> psmBloated = new List <PeptideSpectralMatch>();

            psmBloated.AddRange(nonNullPsms);
            psmBloated.AddRange(nonNullPsms.GetRange(0, nonNullPsms.Count - 2));
            foreach (PeptideSpectralMatch psm in nonNullPsms.GetRange(0, nonNullPsms.Count - 2))
            {
                Protein newDecoyProtein = new Protein(psm.BestMatchingPeptides.First().Peptide.BaseSequence + "K", "DECOY_" + psm.BestMatchingPeptides.First().Peptide.Protein.Accession, isDecoy: true);
                PeptideWithSetModifications pwsmDecoy = new PeptideWithSetModifications(newDecoyProtein, new DigestionParams(), 1, psm.BestMatchingPeptides.First().Peptide.BaseSequence.Length + 1, CleavageSpecificity.Full, "peptideDescription", 2, new Dictionary <int, Modification>(), 1, psm.BestMatchingPeptides.First().Peptide.BaseSequence + "K");
                PeptideSpectralMatch        decoyPsm  = new PeptideSpectralMatch(pwsmDecoy, 1, psm.Score, psm.ScanIndex, listOfSortedms2Scans[psm.ScanIndex], new DigestionParams(), psm.MatchedFragmentIons);
                decoyPsm.ResolveAllAmbiguities();
                psmBloated.Add(decoyPsm);
            }

            PeptideSpectralMatch oldBloatedMaxScorePsm = psmBloated.Where(n => n.Score == maxScore).First();
            int countOfBestPeptidesBloatedMax          = oldBloatedMaxScorePsm.BestMatchingPeptides.Count();

            FdrAnalysisResults fdrResultsClassicDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(psmBloated.Where(p => p != null).ToList(), 1, CommonParameters, new List <string>()).Run());

            PeptideSpectralMatch newMaxScorePsm = psmBloated.Where(n => n.Score == maxScore).First();

            Assert.AreEqual(countOfBestPeptidesBloatedMax - 1, newMaxScorePsm.BestMatchingPeptides.Count());
        }
Example #6
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
        {
            LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes);

            // TODO: print error messages loading GPTMD mods
            List <Modification> gptmdModifications       = GlobalVariables.AllModsKnown.OfType <Modification>().Where(b => GptmdParameters.ListOfModsGptmd.Contains((b.ModificationType, b.IdWithMotif))).ToList();
            IEnumerable <Tuple <double, double> > combos = LoadCombos(gptmdModifications).ToList();

            // load proteins
            List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, DecoyType.Reverse, localizeableModificationTypes, CommonParameters);

            List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>();

            var numRawFiles = currentRawFileList.Count;

            // write prose settings
            ProseCreatedWhileRunning.Append("The following G-PTM-D settings were used: "); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ?
                                            "maximum peptide length = unspecified; " :
                                            "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");
            ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif)) + "; ");
            ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; ");
            ProseCreatedWhileRunning.Append("G-PTM-D modifications count = " + gptmdModifications.Count + "; ");

            // temporary search type for writing prose
            // the actual search type is technically file-specific but we don't allow file-specific notches, so it's safe to do this
            MassDiffAcceptor tempSearchMode = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), CommonParameters.PrecursorMassTolerance);

            ProseCreatedWhileRunning.Append("precursor mass tolerance(s) = {" + tempSearchMode.ToProseString() + "}; ");

            ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + ". ");
            ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. ");

            // start the G-PTM-D task
            Status("Running G-PTM-D...", new List <string> {
                taskId
            });
            MyTaskResults = new MyTaskResults(this)
            {
                NewDatabases = new List <DbForTask>()
            };
            var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b));
            HashSet <DigestionParams> ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams));

            MyFileManager myFileManager = new MyFileManager(true);

            object lock1 = new object();
            object lock2 = new object();

            for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
            {
                // Stop if canceled
                if (GlobalVariables.StopLoops)
                {
                    break;
                }

                var origDataFile = currentRawFileList[spectraFileIndex];

                // mark the file as in-progress
                StartingDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });

                CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);
                MassDiffAcceptor searchMode     = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), combinedParams.PrecursorMassTolerance);

                NewCollection(Path.GetFileName(origDataFile), new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });

                Status("Loading spectra file...", new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams);
                Status("Getting ms2 scans...", new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray();
                myFileManager.DoneWithFile(origDataFile);
                PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length];
                new ClassicSearchEngine(allPsmsArray, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, null, null, null, proteinList, searchMode, combinedParams, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                }).Run();
                allPsms.AddRange(allPsmsArray.Where(p => p != null));
                FinishedDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                }));
            }
            ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            allPsms = allPsms.OrderByDescending(b => b.Score)
                      .ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue)
                      .GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass))
                      .Select(b => b.First()).ToList();

            new FdrAnalysisEngine(allPsms, tempSearchMode.NumNotches, CommonParameters, new List <string> {
                taskId
            }).Run();

            var writtenFile = Path.Combine(OutputFolder, "GPTMD_Candidates.psmtsv");

            WritePsmsToTsv(allPsms, writtenFile, new Dictionary <string, int>());
            FinishedWritingFile(writtenFile, new List <string> {
                taskId
            });

            // get file-specific precursor mass tolerances for the GPTMD engine
            var filePathToPrecursorMassTolerance = new Dictionary <string, Tolerance>();

            for (int i = 0; i < currentRawFileList.Count; i++)
            {
                string    filePath      = currentRawFileList[i];
                Tolerance fileTolerance = CommonParameters.PrecursorMassTolerance;
                if (fileSettingsList[i] != null && fileSettingsList[i].PrecursorMassTolerance != null)
                {
                    fileTolerance = fileSettingsList[i].PrecursorMassTolerance;
                }
                filePathToPrecursorMassTolerance.Add(filePath, fileTolerance);
            }

            // run GPTMD engine
            var gptmdResults = (GptmdResults) new GptmdEngine(allPsms, gptmdModifications, combos, filePathToPrecursorMassTolerance, CommonParameters, new List <string> {
                taskId
            }).Run();

            // Stop if canceled
            if (GlobalVariables.StopLoops)
            {
                return(MyTaskResults);
            }

            // write GPTMD databases
            if (dbFilenameList.Any(b => !b.IsContaminant))
            {
                List <string> databaseNames = new List <string>();
                foreach (var nonContaminantDb in dbFilenameList.Where(p => !p.IsContaminant))
                {
                    var  dbName       = Path.GetFileNameWithoutExtension(nonContaminantDb.FilePath);
                    var  theExtension = Path.GetExtension(nonContaminantDb.FilePath).ToLowerInvariant();
                    bool compressed   = theExtension.EndsWith("gz");
                    databaseNames.Add(compressed ? Path.GetFileNameWithoutExtension(dbName) : dbName);
                }
                string outputXMLdbFullName = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml");

                var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && !b.IsContaminant).ToList(), outputXMLdbFullName);

                FinishedWritingFile(outputXMLdbFullName, new List <string> {
                    taskId
                });

                MyTaskResults.NewDatabases.Add(new DbForTask(outputXMLdbFullName, false));
                MyTaskResults.AddTaskSummaryText("Modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum());
                MyTaskResults.AddTaskSummaryText("Mods types and counts:");
                MyTaskResults.AddTaskSummaryText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value)));
            }
            if (dbFilenameList.Any(b => b.IsContaminant))
            {
                // do NOT use this code (Path.GetFilenameWithoutExtension) because GPTMD on .xml.gz will result in .xml.xml file type being written
                //string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", dbFilenameList.Where(b => b.IsContaminant).Select(b => Path.GetFileNameWithoutExtension(b.FilePath))) + "GPTMD.xml");
                List <string> databaseNames = new List <string>();
                foreach (var contaminantDb in dbFilenameList.Where(p => p.IsContaminant))
                {
                    var dbName          = Path.GetFileName(contaminantDb.FilePath);
                    int indexOfFirstDot = dbName.IndexOf(".");
                    databaseNames.Add(dbName.Substring(0, indexOfFirstDot));
                }
                string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml");

                var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && b.IsContaminant).ToList(), outputXMLdbFullNameContaminants);

                FinishedWritingFile(outputXMLdbFullNameContaminants, new List <string> {
                    taskId
                });

                MyTaskResults.NewDatabases.Add(new DbForTask(outputXMLdbFullNameContaminants, true));
                MyTaskResults.AddTaskSummaryText("Contaminant modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum());
                MyTaskResults.AddTaskSummaryText("Mods types and counts:");
                MyTaskResults.AddTaskSummaryText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value)));
            }
            return(MyTaskResults);
        }
Example #7
0
        public static void TestComputePEPValue()
        {
            var              variableModifications = new List <Modification>();
            var              fixedModifications    = new List <Modification>();
            var              origDataFile          = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML");
            MyFileManager    myFileManager         = new MyFileManager(true);
            CommonParameters CommonParameters      = new CommonParameters(digestionParams: new DigestionParams());
            var              myMsDataFile          = myFileManager.LoadFile(origDataFile, CommonParameters);
            var              searchModes           = new SinglePpmAroundZeroSearchMode(5);
            List <Protein>   proteinList           = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\hela_snip_for_unitTest.fasta"), true, DecoyType.Reverse, false, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex,
                                                                                      ProteinDbLoader.UniprotOrganismRegex, out var dbErrors, -1);
            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, CommonParameters).OrderBy(b => b.PrecursorMass).ToArray();

            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, proteinList, searchModes, CommonParameters, new List <string>()).Run();
            FdrAnalysisResults fdrResultsClassicDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.Where(p => p != null).ToList(), 1, CommonParameters, new List <string>()).Run());

            var nonNullPsms             = allPsmsArray.Where(p => p != null).ToList();
            var nonNullPsmsOriginalCopy = allPsmsArray.Where(p => p != null).ToList();
            var accessionCounts         = PEP_Analysis.GetAccessionCounts(nonNullPsms);

            var maxScore    = nonNullPsms.Select(n => n.Score).Max();
            var maxScorePsm = nonNullPsms.Where(n => n.Score == maxScore).First();

            Dictionary <string, int> sequenceToPsmCount = new Dictionary <string, int>();

            List <string> sequences = new List <string>();

            foreach (PeptideSpectralMatch psm in nonNullPsms)
            {
                var ss = psm.BestMatchingPeptides.Select(b => b.Peptide.FullSequence).ToList();
                sequences.Add(String.Join("|", ss));
            }

            var s = sequences.GroupBy(i => i);

            foreach (var grp in s)
            {
                sequenceToPsmCount.Add(grp.Key, grp.Count());
            }

            var maxPsmData = PEP_Analysis.CreateOnePsmDataFromPsm(maxScorePsm, accessionCounts, sequenceToPsmCount);

            Assert.That(maxScorePsm.PeptidesToMatchingFragments.Count, Is.EqualTo(maxPsmData.Ambiguity));
            Assert.That(maxScorePsm.DeltaScore, Is.EqualTo(maxPsmData.DeltaScore).Within(0.05));
            Assert.That((float)(maxScorePsm.Score - (int)maxScorePsm.Score), Is.EqualTo(maxPsmData.Intensity).Within(0.05));

            Assert.That(maxScorePsm.BestMatchingPeptides.Select(p => p.Peptide).First().MissedCleavages, Is.EqualTo(maxPsmData.MissedCleavagesCount));
            Assert.That(maxScorePsm.BestMatchingPeptides.Select(p => p.Peptide).First().AllModsOneIsNterminus.Values.Count(), Is.EqualTo(maxPsmData.ModsCount));
            Assert.That(maxScorePsm.Notch ?? 0, Is.EqualTo(maxPsmData.Notch));
            Assert.That(maxScorePsm.PsmCount, Is.EqualTo(maxPsmData.PsmCount));
            Assert.That(maxScorePsm.ScanPrecursorCharge, Is.EqualTo(maxPsmData.ScanPrecursorCharge));

            PEP_Analysis.ComputePEPValuesForAllPSMsGeneric(nonNullPsms);

            int trueCount = 0;

            foreach (var item in allPsmsArray.Where(p => p != null))
            {
                var b = item.FdrInfo.PEP;
                if (b >= 0.5)
                {
                    trueCount++;
                }
            }

            Assert.GreaterOrEqual(32, trueCount);
        }
        public static void TestReverseDecoyGenerationDuringSearch()
        {
            CommonParameters CommonParameters = new CommonParameters();

            MetaMorpheusTask.DetermineAnalyteType(CommonParameters);

            var variableModifications = new List <Modification>();
            var fixedModifications    = new List <Modification>();

            var proteinList = new List <Protein>
            {
                new Protein("KKAEDGINK", ""), new Protein("AVNSISLK", ""), new Protein("EKAEAEAEK", ""), new Protein("DITANLR", ""), new Protein("QNAIGTAK", ""),
                new Protein("FHKSQLNK", ""), new Protein("KQVAQWNK", ""), new Protein("NTRIEELK", ""), new Protein("RQPAQPR", ""),
            };
            var myMsDataFile = Mzml.LoadAllStaticData(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\SmallCalibratible_Yeast.mzML"));


            var searchMode = new SinglePpmAroundZeroSearchMode(5);

            Tolerance DeconvolutionMassTolerance = new PpmTolerance(5);

            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray();


            var path = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\myPrositLib.msp");

            var testLibrary = new SpectralLibrary(new List <string> {
                path
            });



            //test when doing spectral library search without generating library
            PeptideSpectralMatch[] allPsmsArray1 = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray1, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
                                    proteinList, searchMode, CommonParameters, null, testLibrary, new List <string>(), false).Run();
            var psm1 = allPsmsArray1.Where(p => p != null).ToList();

            Assert.That(psm1[0].IsDecoy == false && psm1[0].FullSequence == "DITANLR");
            Assert.That(psm1[1].IsDecoy == true && psm1[1].FullSequence == "LSISNVAK");
            Assert.That(psm1[2].IsDecoy == true && psm1[2].FullSequence == "LSISNVAK");
            Assert.That(psm1[3].IsDecoy == false && psm1[3].FullSequence == "RQPAQPR");
            Assert.That(psm1[4].IsDecoy == false && psm1[4].FullSequence == "KKAEDGINK");
            Assert.That(psm1[5].IsDecoy == false && psm1[5].FullSequence == "EKAEAEAEK");
            Assert.That(psm1[6].IsDecoy == false && psm1[6].FullSequence == "EKAEAEAEK");


            proteinList.Add(new Protein("LSISNVAK", "", isDecoy: true));
            //test when doing spectral library search with generating library; non spectral search won't generate decoy by "decoy on the fly" , so proteinlist used by non spectral library search would contain decoys
            PeptideSpectralMatch[] allPsmsArray2 = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray2, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
                                    proteinList, searchMode, CommonParameters, null, testLibrary, new List <string>(), true).Run();
            var psm2 = allPsmsArray2.Where(p => p != null).ToList();

            Assert.That(psm2[0].IsDecoy == false && psm2[0].FullSequence == "DITANLR");
            Assert.That(psm2[1].IsDecoy == true && psm2[1].FullSequence == "LSISNVAK");
            Assert.That(psm2[2].IsDecoy == true && psm2[2].FullSequence == "LSISNVAK");
            Assert.That(psm2[3].IsDecoy == false && psm2[3].FullSequence == "RQPAQPR");
            Assert.That(psm2[4].IsDecoy == false && psm2[4].FullSequence == "KKAEDGINK");
            Assert.That(psm2[5].IsDecoy == false && psm2[5].FullSequence == "EKAEAEAEK");
            Assert.That(psm2[6].IsDecoy == false && psm2[6].FullSequence == "EKAEAEAEK");

            //test when doing non spectral library search without generating library
            PeptideSpectralMatch[] allPsmsArray3 = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray3, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
                                    proteinList, searchMode, CommonParameters, null, null, new List <string>(), false).Run();
            var psm3 = allPsmsArray3.Where(p => p != null).ToList();

            Assert.That(psm3[0].IsDecoy == false && psm3[0].FullSequence == "DITANLR");
            Assert.That(psm3[1].IsDecoy == true && psm3[1].FullSequence == "LSISNVAK");
            Assert.That(psm3[2].IsDecoy == true && psm3[2].FullSequence == "LSISNVAK");
            Assert.That(psm3[3].IsDecoy == false && psm3[3].FullSequence == "RQPAQPR");
            Assert.That(psm3[4].IsDecoy == false && psm3[4].FullSequence == "KKAEDGINK");
            Assert.That(psm3[5].IsDecoy == false && psm3[5].FullSequence == "EKAEAEAEK");
            Assert.That(psm3[6].IsDecoy == false && psm3[6].FullSequence == "EKAEAEAEK");


            //test when doing non spectral library search with generating library
            PeptideSpectralMatch[] allPsmsArray4 = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray4, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
                                    proteinList, searchMode, CommonParameters, null, null, new List <string>(), true).Run();
            var psm4 = allPsmsArray4.Where(p => p != null).ToList();

            Assert.That(psm4[0].IsDecoy == false && psm4[0].FullSequence == "DITANLR");
            Assert.That(psm4[1].IsDecoy == true && psm4[1].FullSequence == "LSISNVAK");
            Assert.That(psm4[2].IsDecoy == true && psm4[2].FullSequence == "LSISNVAK");
            Assert.That(psm4[3].IsDecoy == false && psm4[3].FullSequence == "RQPAQPR");
            Assert.That(psm4[4].IsDecoy == false && psm4[4].FullSequence == "KKAEDGINK");
            Assert.That(psm4[5].IsDecoy == false && psm4[5].FullSequence == "EKAEAEAEK");
            Assert.That(psm4[6].IsDecoy == false && psm4[6].FullSequence == "EKAEAEAEK");


            //compare psm's target/decoy results in 4 conditions. they should be same as new decoy methods shouldn't change the t/d results
            for (int i = 0; i < psm1.Count; i++)
            {
                Assert.That(psm1[i].FullSequence == psm2[i].FullSequence && psm3[i].FullSequence == psm3[i].FullSequence && psm2[i].FullSequence == psm3[i].FullSequence);
                Assert.That(psm1[i].IsDecoy == psm2[i].IsDecoy && psm3[i].IsDecoy == psm3[i].IsDecoy && psm2[i].IsDecoy == psm3[i].IsDecoy);
            }

            //compare MetaMorpheus scores in 4 conditions; for some psms, they should have a little higher score when "generating library" as they switch to all charges ions matching function
            for (int j = 0; j < psm1.Count; j++)
            {
                if (psm1[j].FullSequence == psm2[j].FullSequence && psm1[j].MatchedFragmentIons.Count != psm2[j].MatchedFragmentIons.Count)
                {
                    Assert.That(psm1[j].Score < psm2[j].Score);
                }
            }
        }
        public static void TestMatchIonsOfAllChargesBottomUp()
        {
            CommonParameters CommonParameters = new CommonParameters();

            MetaMorpheusTask.DetermineAnalyteType(CommonParameters);

            var variableModifications = new List <Modification>();
            var fixedModifications    = new List <Modification>();

            var proteinList = new List <Protein>
            {
                new Protein("AAAHSSLK", ""), new Protein("RQPAQPR", ""), new Protein("EKAEAEAEK", "")
            };
            var myMsDataFile = Mzml.LoadAllStaticData(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\SmallCalibratible_Yeast.mzML"));


            var searchMode = new SinglePpmAroundZeroSearchMode(5);

            Tolerance DeconvolutionMassTolerance = new PpmTolerance(5);

            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray();

            //search by new method of looking for all charges
            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];

            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
                                    proteinList, searchMode, CommonParameters, null, null, new List <string>(), true).Run();
            var psm = allPsmsArray.Where(p => p != null).ToList();

            Assert.That(psm[1].MatchedFragmentIons.Count == 14);
            //there are ions with same product type and same fragment number but different charges
            Assert.That(psm[1].MatchedFragmentIons[8].NeutralTheoreticalProduct.ProductType == psm[1].MatchedFragmentIons[9].NeutralTheoreticalProduct.ProductType &&
                        psm[1].MatchedFragmentIons[8].NeutralTheoreticalProduct.FragmentNumber == psm[1].MatchedFragmentIons[9].NeutralTheoreticalProduct.FragmentNumber &&
                        psm[1].MatchedFragmentIons[8].Charge != psm[1].MatchedFragmentIons[9].Charge);
            Assert.That(psm[2].MatchedFragmentIons.Count == 14);
            Assert.That(psm[4].MatchedFragmentIons.Count == 16);

            //search by old method of looking for only one charge
            PeptideSpectralMatch[] allPsmsArray_oneCharge = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray_oneCharge, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
                                    proteinList, searchMode, CommonParameters, null, null, new List <string>(), false).Run();
            var psm_oneCharge = allPsmsArray_oneCharge.Where(p => p != null).ToList();

            //compare 2 scores , they should have same integer part but new search has a little higher score than old search
            Assert.That(psm[1].Score > psm_oneCharge[1].Score);
            Assert.AreEqual(Math.Truncate(psm[1].Score), 12);
            Assert.AreEqual(Math.Truncate(psm_oneCharge[1].Score), 12);

            //compare 2 results and evaluate the different matched ions
            var peptideTheorProducts = new List <Product>();

            Assert.That(psm_oneCharge[1].MatchedFragmentIons.Count == 12);
            var differences = psm[1].MatchedFragmentIons.Except(psm_oneCharge[1].MatchedFragmentIons);

            psm[1].BestMatchingPeptides.First().Peptide.Fragment(CommonParameters.DissociationType, CommonParameters.DigestionParams.FragmentationTerminus, peptideTheorProducts);
            foreach (var ion in differences)
            {
                foreach (var product in peptideTheorProducts)
                {
                    if (product.Annotation.ToString().Equals(ion.NeutralTheoreticalProduct.Annotation.ToString()))
                    {
                        //to see if the different matched ions are qualified
                        Assert.That(CommonParameters.ProductMassTolerance.Within(ion.Mz.ToMass(ion.Charge), product.NeutralMass));
                    }
                }
            }

            //test specific condition: unknown fragment mass; this only happens rarely for sequences with unknown amino acids
            var myMsDataFile1          = new TestDataFile();
            var variableModifications1 = new List <Modification>();
            var fixedModifications1    = new List <Modification>();
            var proteinList1           = new List <Protein> {
                new Protein("QXQ", null)
            };
            var productMassTolerance = new AbsoluteTolerance(0.01);
            var searchModes          = new OpenSearchMode();

            Tolerance DeconvolutionMassTolerance1 = new PpmTolerance(5);

            var listOfSortedms2Scans1 = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray();

            List <DigestionMotif> motifs = new List <DigestionMotif> {
                new DigestionMotif("K", null, 1, null)
            };
            Protease protease = new Protease("Custom Protease3", CleavageSpecificity.Full, null, null, motifs);

            ProteaseDictionary.Dictionary.Add(protease.Name, protease);

            CommonParameters CommonParameters1 = new CommonParameters(
                digestionParams: new DigestionParams(protease: protease.Name, maxMissedCleavages: 0, minPeptideLength: 1),
                scoreCutoff: 1,
                addCompIons: false);
            var fsp = new List <(string fileName, CommonParameters fileSpecificParameters)>();

            fsp.Add(("", CommonParameters));
            PeptideSpectralMatch[] allPsmsArray1 = new PeptideSpectralMatch[listOfSortedms2Scans.Length];

            bool writeSpectralLibrary = true;

            new ClassicSearchEngine(allPsmsArray1, listOfSortedms2Scans1, variableModifications1, fixedModifications1, null, null, null,
                                    proteinList1, searchModes, CommonParameters1, fsp, null, new List <string>(), writeSpectralLibrary).Run();

            var psm1 = allPsmsArray1.Where(p => p != null).ToList();

            Assert.AreEqual(psm1.Count, 222);
        }
        public static void TestMatchIonsOfAllChargesTopDown()
        {
            CommonParameters CommonParameters = new CommonParameters(
                digestionParams: new DigestionParams(protease: "top-down"),
                scoreCutoff: 1,
                assumeOrphanPeaksAreZ1Fragments: false);

            MetaMorpheusTask.DetermineAnalyteType(CommonParameters);

            // test output file name (should be proteoform and not peptide)
            Assert.That(GlobalVariables.AnalyteType == "Proteoform");

            var variableModifications = new List <Modification>();
            var fixedModifications    = new List <Modification>();
            var proteinList           = new List <Protein>
            {
                new Protein("MPKVYSYQEVAEHNGPENFWIIIDDKVYDVSQFKDEHPGGDEIIMDLGGQDATESFVDIGHSDEALRLLKGLYIGDVDKTSERVSVEKVSTSENQSKGSGTLVVILAILMLGVAYYLLNE", "P40312")
            };

            var myMsDataFile = Mzml.LoadAllStaticData(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TopDownTestData\slicedTDYeast.mzML"));

            var searchMode = new SinglePpmAroundZeroSearchMode(5);

            Tolerance DeconvolutionMassTolerance = new PpmTolerance(5);

            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray();

            //search by new method of looking for all charges
            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
                                    proteinList, searchMode, CommonParameters, null, null, new List <string>(), true).Run();

            var psm = allPsmsArray.Where(p => p != null).FirstOrDefault();

            Assert.That(psm.MatchedFragmentIons.Count == 62);


            //search by old method of looking for only one charge
            PeptideSpectralMatch[] allPsmsArray_oneCharge = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray_oneCharge, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null,
                                    proteinList, searchMode, CommonParameters, null, null, new List <string>(), false).Run();

            var psm_oneCharge = allPsmsArray_oneCharge.Where(p => p != null).FirstOrDefault();

            Assert.That(psm_oneCharge.MatchedFragmentIons.Count == 47);

            //compare 2 scores , they should have same integer but new search has a little higher score than old search
            Assert.That(psm.Score > psm_oneCharge.Score);
            Assert.AreEqual(Math.Truncate(psm.Score), 47);
            Assert.AreEqual(Math.Truncate(psm_oneCharge.Score), 47);

            //compare 2 results and evaluate the different matched ions
            var peptideTheorProducts = new List <Product>();
            var differences          = psm.MatchedFragmentIons.Except(psm_oneCharge.MatchedFragmentIons);

            psm.BestMatchingPeptides.First().Peptide.Fragment(CommonParameters.DissociationType, CommonParameters.DigestionParams.FragmentationTerminus, peptideTheorProducts);
            foreach (var ion in differences)
            {
                foreach (var product in peptideTheorProducts)
                {
                    if (product.Annotation.ToString().Equals(ion.NeutralTheoreticalProduct.Annotation.ToString()))
                    {
                        //to see if the different matched ions are qualified
                        Assert.That(CommonParameters.ProductMassTolerance.Within(ion.Mz.ToMass(ion.Charge), product.NeutralMass));
                    }
                }
            }
        }
Example #11
0
        private DataPointAquisitionResults GetDataAcquisitionResults(MsDataFile myMsDataFile, string currentDataFile, List <ModificationWithMass> variableModifications, List <ModificationWithMass> fixedModifications, List <Protein> proteinList, string taskId, CommonParameters combinedParameters, Tolerance initPrecTol, Tolerance initProdTol)
        {
            var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(currentDataFile);
            MassDiffAcceptor searchMode  = initPrecTol is PpmTolerance ?
                                           (MassDiffAcceptor) new SinglePpmAroundZeroSearchMode(initPrecTol.Value) :
                                           new SingleAbsoluteAroundZeroSearchMode(initPrecTol.Value);

            FragmentTypes fragmentTypesForCalibration = FragmentTypes.None;

            if (combinedParameters.BIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.b;
            }
            if (combinedParameters.YIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.y;
            }
            if (combinedParameters.CIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.c;
            }
            if (combinedParameters.ZdotIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.zdot;
            }

            var listOfSortedms2Scans = GetMs2Scans(myMsDataFile, currentDataFile, combinedParameters.DoPrecursorDeconvolution, combinedParameters.UseProvidedPrecursorInfo, combinedParameters.DeconvolutionIntensityRatio, combinedParameters.DeconvolutionMaxAssumedChargeState, combinedParameters.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray();

            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];

            List <ProductType> productTypes = new List <ProductType>();

            if (combinedParameters.BIons)
            {
                productTypes.Add(ProductType.B);
            }
            if (combinedParameters.YIons)
            {
                productTypes.Add(ProductType.Y);
            }
            if (combinedParameters.CIons)
            {
                productTypes.Add(ProductType.C);
            }
            if (combinedParameters.ZdotIons)
            {
                productTypes.Add(ProductType.Zdot);
            }

            Log("Searching with searchMode: " + searchMode, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            });
            Log("Searching with productMassTolerance: " + initProdTol, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            });

            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, proteinList, productTypes, searchMode, combinedParameters, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            List <PeptideSpectralMatch> allPsms = allPsmsArray.Where(p => p != null).ToList();

            var compactPeptideToProteinPeptideMatching = ((SequencesToActualProteinPeptidesEngineResults) new SequencesToActualProteinPeptidesEngine
                                                              (allPsms, proteinList, fixedModifications, variableModifications, productTypes, new List <DigestionParams> {
                combinedParameters.DigestionParams
            },
                                                              combinedParameters.ReportAllAmbiguity, combinedParameters, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run()).CompactPeptideToProteinPeptideMatching;

            foreach (var huh in allPsms)
            {
                huh.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching);
            }

            allPsms = allPsms.OrderByDescending(b => b.Score)
                      .ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue)
                      .GroupBy(b => (b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();

            new FdrAnalysisEngine(allPsms, searchMode.NumNotches, CommonParameters, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            List <PeptideSpectralMatch> goodIdentifications = allPsms.Where(b => b.FdrInfo.QValueNotch < 0.01 && !b.IsDecoy && b.FullSequence != null).ToList();

            if (!goodIdentifications.Any())
            {
                return(new DataPointAquisitionResults(null, new List <PeptideSpectralMatch>(), new List <LabeledDataPoint>(), new List <LabeledDataPoint>(), 0, 0, 0, 0));
            }

            DataPointAquisitionResults currentResult = (DataPointAquisitionResults) new DataPointAcquisitionEngine(
                goodIdentifications,
                myMsDataFile,
                initPrecTol,
                initProdTol,
                CalibrationParameters.NumFragmentsNeededForEveryIdentification,
                CalibrationParameters.MinMS1IsotopicPeaksNeededForConfirmedIdentification,
                CalibrationParameters.MinMS2IsotopicPeaksNeededForConfirmedIdentification,
                fragmentTypesForCalibration,
                CommonParameters,
                new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            return(currentResult);
        }
Example #12
0
        public static void TestComputePEPValueTopDown()
        {
            //just making sure that topdown data goes through the pep calculator without crashing.
            CommonParameters CommonParameters = new CommonParameters(
                digestionParams: new DigestionParams(protease: "top-down"),
                scoreCutoff: 1,
                assumeOrphanPeaksAreZ1Fragments: false);

            var            variableModifications = new List <Modification>();
            var            fixedModifications    = new List <Modification>();
            List <Protein> proteinList           = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\HeLaFakeTopDown.fasta"), true, DecoyType.Reverse, false, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex,
                                                                                    ProteinDbLoader.UniprotOrganismRegex, out var dbErrors, -1);

            var           origDataFile  = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML");
            MyFileManager myFileManager = new MyFileManager(true);
            var           myMsDataFile  = myFileManager.LoadFile(origDataFile, CommonParameters);

            var searchMode = new SinglePpmAroundZeroSearchMode(5);

            Tolerance DeconvolutionMassTolerance = new PpmTolerance(5);

            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray();

            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null, proteinList, searchMode, CommonParameters, new List <string>()).Run();
            var nonNullPsms = allPsmsArray.Where(p => p != null).ToList();
            List <PeptideSpectralMatch> moreNonNullPSMs = new List <PeptideSpectralMatch>();

            int reps = 3;

            for (int i = 0; i < reps; i++)
            {
                foreach (PeptideSpectralMatch psm in nonNullPsms)
                {
                    moreNonNullPSMs.Add(psm);
                }
            }

            FdrAnalysisResults fdrResultsClassicDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(moreNonNullPSMs.Where(p => p != null).ToList(), 1, CommonParameters, new List <string>()).Run());

            var maxScore = nonNullPsms.Select(n => n.Score).Max();
            PeptideSpectralMatch     maxScorePsm        = nonNullPsms.Where(n => n.Score == maxScore).First();
            Dictionary <string, int> sequenceToPsmCount = new Dictionary <string, int>();
            List <string>            sequences          = new List <string>();

            foreach (PeptideSpectralMatch psm in nonNullPsms)
            {
                var ss = psm.BestMatchingPeptides.Select(b => b.Peptide.FullSequence).ToList();
                sequences.Add(String.Join("|", ss));
            }
            var s = sequences.GroupBy(i => i);

            foreach (var grp in s)
            {
                sequenceToPsmCount.Add(grp.Key, grp.Count());
            }

            Dictionary <string, Dictionary <int, Tuple <double, double> > > fileSpecificRetTimeHI_behavior = new Dictionary <string, Dictionary <int, Tuple <double, double> > >();
            Dictionary <string, Dictionary <int, Tuple <double, double> > > fileSpecificRetTemHI_behaviorModifiedPeptides = new Dictionary <string, Dictionary <int, Tuple <double, double> > >();

            string[] trainingVariables = PsmData.trainingInfos["topDown"];

            int chargeStateMode = 4;

            var(notch, pwsm) = maxScorePsm.BestMatchingPeptides.First();
            var maxPsmData = PEP_Analysis.CreateOnePsmDataEntry(maxScorePsm, sequenceToPsmCount, fileSpecificRetTimeHI_behavior, fileSpecificRetTemHI_behaviorModifiedPeptides, chargeStateMode, pwsm, trainingVariables, notch, !pwsm.Protein.IsDecoy);

            Assert.That(maxScorePsm.PeptidesToMatchingFragments.Count - 1, Is.EqualTo(maxPsmData.Ambiguity));
            Assert.That(maxScorePsm.DeltaScore, Is.EqualTo(maxPsmData.DeltaScore).Within(0.05));
            Assert.That((float)(maxScorePsm.Score - (int)maxScorePsm.Score), Is.EqualTo(maxPsmData.Intensity).Within(0.05));
            Assert.AreEqual(maxPsmData.HydrophobicityZScore, float.NaN);
            Assert.That(maxScorePsm.BestMatchingPeptides.Select(p => p.Peptide).First().MissedCleavages, Is.EqualTo(maxPsmData.MissedCleavagesCount));
            Assert.That(maxScorePsm.BestMatchingPeptides.Select(p => p.Peptide).First().AllModsOneIsNterminus.Values.Count(), Is.EqualTo(maxPsmData.ModsCount));
            Assert.That(maxScorePsm.Notch ?? 0, Is.EqualTo(maxPsmData.Notch));
            Assert.That(maxScorePsm.PsmCount, Is.EqualTo(maxPsmData.PsmCount * reps));
            Assert.That(-Math.Abs(chargeStateMode - maxScorePsm.ScanPrecursorCharge), Is.EqualTo(maxPsmData.PrecursorChargeDiffToMode));
            Assert.AreEqual((float)0, maxPsmData.IsVariantPeptide);
        }
Example #13
0
        public static void TestComputePEPValue()
        {
            var              variableModifications = new List <Modification>();
            var              fixedModifications    = new List <Modification>();
            var              origDataFile          = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML");
            MyFileManager    myFileManager         = new MyFileManager(true);
            CommonParameters CommonParameters      = new CommonParameters(digestionParams: new DigestionParams());
            var              myMsDataFile          = myFileManager.LoadFile(origDataFile, CommonParameters);
            var              searchModes           = new SinglePpmAroundZeroSearchMode(5);
            List <Protein>   proteinList           = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\hela_snip_for_unitTest.fasta"), true, DecoyType.Reverse, false, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex,
                                                                                      ProteinDbLoader.UniprotOrganismRegex, out var dbErrors, -1);
            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, @"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML", CommonParameters).OrderBy(b => b.PrecursorMass).ToArray();

            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null, proteinList, searchModes, CommonParameters, new List <string>()).Run();
            FdrAnalysisResults fdrResultsClassicDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.Where(p => p != null).ToList(), 1, CommonParameters, new List <string>()).Run());

            var nonNullPsms             = allPsmsArray.Where(p => p != null).ToList();
            var nonNullPsmsOriginalCopy = allPsmsArray.Where(p => p != null).ToList();

            var maxScore = nonNullPsms.Select(n => n.Score).Max();
            PeptideSpectralMatch maxScorePsm = nonNullPsms.Where(n => n.Score == maxScore).First();

            Dictionary <string, int> sequenceToPsmCount = new Dictionary <string, int>();

            List <string> sequences = new List <string>();

            foreach (PeptideSpectralMatch psm in nonNullPsms)
            {
                var ss = psm.BestMatchingPeptides.Select(b => b.Peptide.FullSequence).ToList();
                sequences.Add(String.Join("|", ss));
            }

            var s = sequences.GroupBy(i => i);

            foreach (var grp in s)
            {
                sequenceToPsmCount.Add(grp.Key, grp.Count());
            }

            Dictionary <string, Dictionary <int, Tuple <double, double> > > fileSpecificRetTimeHI_behavior = new Dictionary <string, Dictionary <int, Tuple <double, double> > >();
            Dictionary <string, Dictionary <int, Tuple <double, double> > > fileSpecificRetTemHI_behaviorModifiedPeptides = new Dictionary <string, Dictionary <int, Tuple <double, double> > >();

            //average hydrophobicity, standard deviation hydrophobicity
            Tuple <double, double> at = new Tuple <double, double>(33.0, 1.0);

            Dictionary <int, Tuple <double, double> > HI_Time_avg_dev = new Dictionary <int, Tuple <double, double> >
            {
                { 154, at }
            };

            fileSpecificRetTimeHI_behavior.Add(@"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML", HI_Time_avg_dev);

            string[] trainingVariables = new[] { "HydrophobicityZScore", "Intensity", "ScanPrecursorCharge", "DeltaScore", "Notch", "PsmCount", "ModsCount", "MissedCleavagesCount", "Ambiguity", "LongestFragmentIonSeries", "IsVariantPeptide" };

            int chargeStateMode = 4;

            var(notch, pwsm) = maxScorePsm.BestMatchingPeptides.First();
            var maxPsmData = PEP_Analysis.CreateOnePsmDataEntry(maxScorePsm, sequenceToPsmCount, fileSpecificRetTimeHI_behavior, fileSpecificRetTemHI_behaviorModifiedPeptides, chargeStateMode, pwsm, trainingVariables, notch, !pwsm.Protein.IsDecoy);

            Assert.That(maxScorePsm.PeptidesToMatchingFragments.Count - 1, Is.EqualTo(maxPsmData.Ambiguity));
            Assert.That(maxScorePsm.DeltaScore, Is.EqualTo(maxPsmData.DeltaScore).Within(0.05));
            Assert.That((float)(maxScorePsm.Score - (int)maxScorePsm.Score), Is.EqualTo(maxPsmData.Intensity).Within(0.05));
            Assert.That(maxPsmData.HydrophobicityZScore, Is.EqualTo(5.170955).Within(0.05));
            Assert.That(maxScorePsm.BestMatchingPeptides.Select(p => p.Peptide).First().MissedCleavages, Is.EqualTo(maxPsmData.MissedCleavagesCount));
            Assert.That(maxScorePsm.BestMatchingPeptides.Select(p => p.Peptide).First().AllModsOneIsNterminus.Values.Count(), Is.EqualTo(maxPsmData.ModsCount));
            Assert.That(maxScorePsm.Notch ?? 0, Is.EqualTo(maxPsmData.Notch));
            Assert.That(maxScorePsm.PsmCount, Is.EqualTo(maxPsmData.PsmCount));
            Assert.That(-Math.Abs(chargeStateMode - maxScorePsm.ScanPrecursorCharge), Is.EqualTo(maxPsmData.PrecursorChargeDiffToMode));
            Assert.AreEqual((float)0, maxPsmData.IsVariantPeptide);

            PEP_Analysis.ComputePEPValuesForAllPSMsGeneric(nonNullPsms);

            int trueCount = 0;

            foreach (var item in allPsmsArray.Where(p => p != null))
            {
                var b = item.FdrInfo.PEP;
                if (b >= 0.5)
                {
                    trueCount++;
                }
            }

            List <PeptideSpectralMatch> moreNonNullPSMs = new List <PeptideSpectralMatch>();

            for (int i = 0; i < 3; i++)
            {
                foreach (PeptideSpectralMatch psm in nonNullPsms)
                {
                    moreNonNullPSMs.Add(psm);
                }
            }

            string expectedMetrics = "************************************************************\r\n*       Metrics for Determination of PEP Using Binary Classification      \r\n" +
                                     "*-----------------------------------------------------------\r\n*       Accuracy:  1\r\n*       Area Under Curve:  1\r\n*       Area under Precision recall Curve:  1\r\n*       F1Score:  1\r\n" +
                                     "*       LogLoss:  2.60551851621861E-10\r\n*       LogLossReduction:  0.999999999599165\r\n*       PositivePrecision:  1\r\n*       PositiveRecall:  1\r\n*       NegativePrecision:  1\r\n" +
                                     "*       NegativeRecall:  1\r\n*       Count of Ambiguous Peptides Removed:  0\r\n************************************************************\r\n";

            string metrics = PEP_Analysis.ComputePEPValuesForAllPSMsGeneric(moreNonNullPSMs);

            Assert.AreEqual(expectedMetrics, metrics);
            Assert.GreaterOrEqual(32, trueCount);

            //Test Variant Peptide as Input is identified as such as part of PEP calculation input much of the next several lines simply necessry to create a psm.

            var anMzSpectrum             = new MzSpectrum(new double[] { 1, 1 }, new double[] { 2, 2 }, true);
            Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(new MsDataScan(anMzSpectrum, 1, 1, true, Polarity.Negative, 2, null, "", MZAnalyzerType.Orbitrap, 2, null, null, null), 1, 1, "path", new CommonParameters());
            Protein variantProtein       = new Protein("MPEPPPTIDE", "protein3", sequenceVariations: new List <SequenceVariation> {
                new SequenceVariation(4, 6, "PPP", "P", @"1\t50000000\t.\tA\tG\t.\tPASS\tANN=G||||||||||||||||\tGT:AD:DP\t1/1:30,30:30", null)
            });
            PeptideWithSetModifications varPep     = variantProtein.GetVariantProteins().SelectMany(p => p.Digest(CommonParameters.DigestionParams, null, null)).FirstOrDefault();
            PeptideSpectralMatch        variantPSM = new PeptideSpectralMatch(varPep, 0, maxScorePsm.Score, maxScorePsm.ScanIndex, scan, new DigestionParams(), null);

            sequenceToPsmCount = new Dictionary <string, int>();
            sequences          = new List <string>();
            nonNullPsms.Add(variantPSM);
            foreach (PeptideSpectralMatch psm in nonNullPsms)
            {
                var ss = psm.BestMatchingPeptides.Select(b => b.Peptide.FullSequence).ToList();
                sequences.Add(String.Join("|", ss));
            }

            s = sequences.GroupBy(i => i);

            foreach (var grp in s)
            {
                sequenceToPsmCount.Add(grp.Key, grp.Count());
            }
            var(vnotch, vpwsm) = variantPSM.BestMatchingPeptides.First();
            PsmData variantPsmData = PEP_Analysis.CreateOnePsmDataEntry(variantPSM, sequenceToPsmCount, fileSpecificRetTimeHI_behavior, fileSpecificRetTemHI_behaviorModifiedPeptides, chargeStateMode, vpwsm, trainingVariables, vnotch, !maxScorePsm.IsDecoy);

            Assert.AreEqual((float)1, variantPsmData.IsVariantPeptide);
        }
Example #14
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
        {
            // disable quantification if a .mgf is being used
            if (SearchParameters.DoQuantification && currentRawFileList.Any(x => Path.GetExtension(x).Equals(".mgf", StringComparison.OrdinalIgnoreCase)))
            {
                SearchParameters.DoQuantification = false;
            }

            // load modifications
            Status("Loading modifications...", taskId);
            List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList();
            List <ModificationWithMass> fixedModifications    = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList();
            List <string> localizeableModificationTypes       = GlobalVariables.AllModTypesKnown.ToList();

            // what types of fragment ions to search for
            List <ProductType> ionTypes = new List <ProductType>();

            if (CommonParameters.BIons && CommonParameters.AddCompIons)
            {
                ionTypes.Add(ProductType.B);
            }
            else if (CommonParameters.BIons)
            {
                ionTypes.Add(ProductType.BnoB1ions);
            }
            if (CommonParameters.YIons)
            {
                ionTypes.Add(ProductType.Y);
            }
            if (CommonParameters.ZdotIons)
            {
                ionTypes.Add(ProductType.Zdot);
            }
            if (CommonParameters.CIons)
            {
                ionTypes.Add(ProductType.C);
            }

            // load proteins
            List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, SearchParameters.SearchTarget, SearchParameters.DecoyType, localizeableModificationTypes, CommonParameters);

            // write prose settings
            ProseCreatedWhileRunning.Append("The following search settings were used: ");
            ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ?
                                            "maximum peptide length = unspecified; " :
                                            "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; ");
            ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; ");
            ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; ");
            ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");
            ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("report PSM ambiguity = " + CommonParameters.ReportAllAmbiguity + ". ");
            ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. ");

            // start the search task
            MyTaskResults = new MyTaskResults(this);
            List <PeptideSpectralMatch> allPsms         = new List <PeptideSpectralMatch>();
            FlashLFQResults             flashLfqResults = null;

            MyFileManager myFileManager = new MyFileManager(SearchParameters.DisposeOfFileWhenDone);

            var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b));
            HashSet <DigestionParams> ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams));

            int    completedFiles = 0;
            object indexLock      = new object();
            object psmLock        = new object();

            Status("Searching files...", taskId);
            Status("Searching files...", new List <string> {
                taskId, "Individual Spectra Files"
            });

            Dictionary <string, int[]> numMs2SpectraPerFile = new Dictionary <string, int[]>();

            for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
            {
                if (GlobalVariables.StopLoops)
                {
                    break;
                }

                var origDataFile = currentRawFileList[spectraFileIndex];

                // mark the file as in-progress
                StartingDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });

                CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

                MassDiffAcceptor massDiffAcceptor = GetMassDiffAcceptor(combinedParams.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac);

                var thisId = new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                };
                NewCollection(Path.GetFileName(origDataFile), thisId);
                Status("Loading spectra file...", thisId);
                MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams.TopNpeaks, combinedParams.MinRatio, combinedParams.TrimMs1Peaks, combinedParams.TrimMsMsPeaks, combinedParams);
                Status("Getting ms2 scans...", thisId);
                Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray();
                numMs2SpectraPerFile.Add(Path.GetFileNameWithoutExtension(origDataFile), new int[] { myMsDataFile.GetAllScansList().Count(p => p.MsnOrder == 2), arrayOfMs2ScansSortedByMass.Length });
                myFileManager.DoneWithFile(origDataFile);

                var fileSpecificPsms = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length];
                // modern search
                if (SearchParameters.SearchType == SearchType.Modern)
                {
                    for (int currentPartition = 0; currentPartition < combinedParams.TotalPartitions; currentPartition++)
                    {
                        List <CompactPeptide> peptideIndex      = null;
                        List <Protein>        proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions));

                        Status("Getting fragment dictionary...", new List <string> {
                            taskId
                        });
                        var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, ionTypes, currentPartition, SearchParameters.DecoyType, ListOfDigestionParams, combinedParams, SearchParameters.MaxFragmentSize, new List <string> {
                            taskId
                        });
                        List <int>[] fragmentIndex = null;
                        lock (indexLock)
                        {
                            GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, taskId);
                        }

                        Status("Searching files...", taskId);

                        new ModernSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, ionTypes, currentPartition, combinedParams, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run();

                        ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + combinedParams.TotalPartitions + "!", thisId));
                    }
                }
                // nonspecific search
                else if (SearchParameters.SearchType == SearchType.NonSpecific)
                {
                    List <List <ProductType> > terminusSeparatedIons = ProductTypeMethods.SeparateIonsByTerminus(ionTypes);
                    foreach (List <ProductType> terminusSpecificIons in terminusSeparatedIons)
                    {
                        for (int currentPartition = 0; currentPartition < combinedParams.TotalPartitions; currentPartition++)
                        {
                            List <CompactPeptide> peptideIndex      = null;
                            List <Protein>        proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions));

                            List <int>[] fragmentIndex = new List <int> [1];

                            Status("Getting fragment dictionary...", new List <string> {
                                taskId
                            });
                            var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, terminusSpecificIons, currentPartition, SearchParameters.DecoyType, ListOfDigestionParams, combinedParams, SearchParameters.MaxFragmentSize, new List <string> {
                                taskId
                            });
                            lock (indexLock)
                                GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, taskId);

                            Status("Getting precursor dictionary...", new List <string> {
                                taskId
                            });
                            List <CompactPeptide> peptideIndexPrecursor      = null;
                            List <Protein>        proteinListSubsetPrecursor = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions));
                            List <int>[]          fragmentIndexPrecursor     = new List <int> [1];
                            var indexEnginePrecursor = new PrecursorIndexingEngine(proteinListSubsetPrecursor, variableModifications, fixedModifications, terminusSpecificIons, currentPartition, SearchParameters.DecoyType, ListOfDigestionParams, combinedParams, 0, new List <string> {
                                taskId
                            });
                            lock (indexLock)
                                GenerateIndexes(indexEnginePrecursor, dbFilenameList, ref peptideIndexPrecursor, ref fragmentIndexPrecursor, taskId);

                            if (peptideIndex.Count != peptideIndexPrecursor.Count)
                            {
                                throw new MetaMorpheusException("peptideIndex not identical between indexing engines");
                            }

                            Status("Searching files...", taskId);

                            new NonSpecificEnzymeSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, fragmentIndexPrecursor, terminusSpecificIons, currentPartition, combinedParams, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run();

                            ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + combinedParams.TotalPartitions + "!", thisId));
                        }
                    }
                }
                // classic search
                else
                {
                    Status("Starting search...", thisId);
                    new ClassicSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, proteinList, ionTypes, massDiffAcceptor, combinedParams, thisId).Run();

                    ReportProgress(new ProgressEventArgs(100, "Done with search!", thisId));
                }
                lock (psmLock)
                {
                    allPsms.AddRange(fileSpecificPsms.Where(p => p != null));
                }

                completedFiles++;
                FinishedDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> {
                    taskId, "Individual Spectra Files"
                }));
            }

            ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            PostSearchAnalysisParameters parameters = new PostSearchAnalysisParameters();

            parameters.SearchTaskResults             = MyTaskResults;
            parameters.SearchTaskId                  = taskId;
            parameters.SearchParameters              = SearchParameters;
            parameters.ProteinList                   = proteinList;
            parameters.IonTypes                      = ionTypes;
            parameters.AllPsms                       = allPsms;
            parameters.FixedModifications            = fixedModifications;
            parameters.VariableModifications         = variableModifications;
            parameters.ListOfDigestionParams         = ListOfDigestionParams;
            parameters.CurrentRawFileList            = currentRawFileList;
            parameters.MyFileManager                 = myFileManager;
            parameters.NumNotches                    = GetNumNotches(SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac);
            parameters.OutputFolder                  = OutputFolder;
            parameters.IndividualResultsOutputFolder = Path.Combine(OutputFolder, "Individual File Results");
            parameters.FlashLfqResults               = flashLfqResults;
            parameters.FileSettingsList              = fileSettingsList;
            parameters.NumMs2SpectraPerFile          = numMs2SpectraPerFile;
            parameters.DatabaseFilenameList          = dbFilenameList;
            PostSearchAnalysisTask postProcessing = new PostSearchAnalysisTask();

            postProcessing.Parameters       = parameters;
            postProcessing.CommonParameters = CommonParameters;
            return(postProcessing.Run());
        }