Beispiel #1
0
        public static void TestRunSpecificPostSearchAnalysis()
        {
            //code coverage unit test for an unused abstract method in post search analysis
            var task = new PostSearchAnalysisTask();

            task.RunTask(TestContext.CurrentContext.TestDirectory, new List <DbForTask>(), new List <string>(), "");
        }
        public static void TestProteinPrunedWithModSelectionAndVariants()
        {
            var modToWrite    = GlobalVariables.AllModsKnown.Where(p => p.ModificationType == "UniProt" && p.Target.ToString() == "T").First();
            var modToNotWrite = GlobalVariables.AllModsKnown.Where(p => p.ModificationType == "Common Artifact" && p.Target.ToString() == "X").First();
            Dictionary <int, List <Modification> > variantMods = new Dictionary <int, List <Modification> >();

            variantMods.Add(1, new List <Modification>()
            {
                modToNotWrite
            });

            List <SequenceVariation> variants = new List <SequenceVariation> {
                new SequenceVariation(4, 4, "V", "T", @"20\t41168825\t.\tT\tC\t14290.77\t.\tANN=C|missense_variant|MODERATE|PLCG1|ENSG00000124181|transcript|ENST00000244007.7|protein_coding|22/33|c.2438T>C|p.Ile813Thr|2635/5285|2438/3876|813/1291||\tGT:AD:DP:GQ:PL\t1/1:1,392:393:99:14319,1142,0", variantMods)
            };

            var protein1 = new Protein("PEPVIDEKPEPT", "1", oneBasedModifications: new Dictionary <int, List <Modification> > {
                { 1, new List <Modification> {
                      modToNotWrite
                  } }, { 12, new List <Modification> {
                             modToWrite
                         } }
            }, sequenceVariations: variants);
            var protein2 = new Protein("PEPIDPEPT", "2", oneBasedModifications: new Dictionary <int, List <Modification> > {
                { 1, new List <Modification> {
                      modToNotWrite
                  } }, { 9, new List <Modification> {
                             modToWrite
                         } }
            });
            var protein1Variants = protein1.GetVariantProteins(1, 0);

            string path = @"temp";

            var proteinList = new List <Protein> {
                protein1, protein2
            };

            proteinList.AddRange(protein1Variants);


            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), proteinList, path);

            Directory.CreateDirectory(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTestVariant"));

            Dictionary <string, HashSet <Tuple <int, Modification> > > modList = new Dictionary <string, HashSet <Tuple <int, Modification> > >();
            var Hash = new HashSet <Tuple <int, Modification> >
            {
                new Tuple <int, Modification>(1, modToWrite),
                new Tuple <int, Modification>(2, modToNotWrite),
            };

            var db = ProteinDbWriter.WriteXmlDatabase(modList, proteinList, Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTestVariant/fakeDb.xml"));

            var peptideObserved = protein1Variants.First().Digest(new DigestionParams(minPeptideLength: 1), new List <Modification>(), new List <Modification>())
                                  .Where(p => p.BaseSequence == "PEPT").First();
            PostSearchAnalysisParameters testPostTaskParameters = new PostSearchAnalysisParameters();
            CommonParameters             commonParam            = new CommonParameters(useDeltaScore: false);

            double[,] noiseData = new double[10000, 10000];
            noiseData[0, 0]     = 1.0;
            List <Proteomics.Fragmentation.MatchedFragmentIon> matchedFragmentIons = new List <Proteomics.Fragmentation.MatchedFragmentIon>()
            {
            };
            MzSpectrum spectrum = new MzSpectrum(noiseData);
            MsDataScan scan     = new MsDataScan(spectrum, 1, 1, true, Polarity.Unknown, 2, new MzLibUtil.MzRange(10, 1000), "", MZAnalyzerType.Orbitrap, 10000, null, noiseData, "");

            testPostTaskParameters.ProteinList = proteinList;
            testPostTaskParameters.AllPsms     = new List <PeptideSpectralMatch> {
                new PeptideSpectralMatch(peptideObserved, 0, 20, 1, new Ms2ScanWithSpecificMass(scan, 100, 1, @"", commonParam), commonParam, matchedFragmentIons)
            };
            testPostTaskParameters.SearchParameters = new SearchParameters();
            testPostTaskParameters.SearchParameters.WritePrunedDatabase = true;
            testPostTaskParameters.SearchParameters.DoQuantification    = false;
            testPostTaskParameters.SearchParameters.WriteMzId           = false;
            testPostTaskParameters.DatabaseFilenameList = new List <DbForTask>()
            {
                new DbForTask(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/fakeDb.xml"), false)
            };
            testPostTaskParameters.OutputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest");
            Directory.CreateDirectory(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/individual"));
            testPostTaskParameters.IndividualResultsOutputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/individual");
            int[] stuffForSpectraFile = new int[2];
            stuffForSpectraFile[0] = 10;
            stuffForSpectraFile[1] = 10;
            Dictionary <string, int[]> numSpectraPerFile = new Dictionary <string, int[]>();

            numSpectraPerFile.Add("", stuffForSpectraFile);
            testPostTaskParameters.NumMs2SpectraPerFile = numSpectraPerFile;

            MsDataFile myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications>
            {
                peptideObserved
            });
            string mzmlName = @"newMzml.mzML";

            IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, mzmlName, false);

            modList.Add("test", Hash);

            testPostTaskParameters.CurrentRawFileList = new List <string>()
            {
                mzmlName
            };

            SearchTask task5 = new SearchTask
            {
                SearchParameters = new SearchParameters
                {
                    WritePrunedDatabase  = true,
                    SearchTarget         = true,
                    MassDiffAcceptorType = MassDiffAcceptorType.Exact,
                },
                CommonParameters = new CommonParameters()
            };

            var test = task5.RunTask(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest"), new List <DbForTask>()
            {
                new DbForTask(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/fakeDb.xml"), false)
            }, new List <string>()
            {
                mzmlName
            }, "name");

            testPostTaskParameters.SearchTaskResults = test;

            PostSearchAnalysisTask testPostTask = new PostSearchAnalysisTask();

            testPostTask.Parameters             = testPostTaskParameters;
            testPostTask.CommonParameters       = commonParam;
            testPostTask.FileSpecificParameters = new List <(string FileName, CommonParameters Parameters)> {
                ("newMzMl.mzml", commonParam)
            };
            testPostTask.Run();

            var proteinsLoaded = ProteinDbLoader.LoadProteinXML(path, true, DecoyType.None, GlobalVariables.AllModsKnown, false, new List <string>(), out var unknownMods);

            // assert that mods on proteins are the same before/after task is run
            Assert.AreEqual(protein1Variants.First().Accession, proteinsLoaded.First().Accession);
            Assert.AreEqual(protein1Variants.First().OneBasedPossibleLocalizedModifications.Count(), proteinsLoaded.First().OneBasedPossibleLocalizedModifications.Count());
            Assert.AreEqual(protein2.OneBasedPossibleLocalizedModifications.Count(), proteinsLoaded.ElementAt(1).OneBasedPossibleLocalizedModifications.Count());

            // assert that protein pruned DB has correct proteins mods
            var proteinPruned = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/fakeDbproteinPruned.xml"), true, DecoyType.None, GlobalVariables.AllModsKnown, false, new List <string>(), out var unknownMods1);

            Assert.That(proteinPruned.Count().Equals(1));
            Assert.That(proteinPruned.FirstOrDefault().OneBasedPossibleLocalizedModifications.Count().Equals(1));
            // assert that mod-pruned DB has correct proteins and mods
            var modPruned = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/fakeDbpruned.xml"), true, DecoyType.None, GlobalVariables.AllModsKnown, false, new List <string>(), out var unknownMods2);

            Assert.That(modPruned.Count().Equals(2));
            Assert.That(modPruned.ElementAt(0).OneBasedPossibleLocalizedModifications.Count().Equals(1));
            Assert.That(modPruned.ElementAt(1).OneBasedPossibleLocalizedModifications.Count().Equals(1));
        }
Beispiel #3
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
        {
            if (SearchParameters.DoQuantification)
            {
                // disable quantification if a .mgf is being used
                if (currentRawFileList.Any(x => Path.GetExtension(x).Equals(".mgf", StringComparison.OrdinalIgnoreCase)))
                {
                    SearchParameters.DoQuantification = false;
                }
                //if we're doing SILAC, assign and add the silac labels to the residue dictionary
                else if (SearchParameters.SilacLabels != null || SearchParameters.StartTurnoverLabel != null || SearchParameters.EndTurnoverLabel != null)
                {
                    char heavyLabel = 'a'; //char to assign
                    //add the Turnoverlabels to the silacLabels list. They weren't there before just to prevent duplication in the tomls
                    if (SearchParameters.StartTurnoverLabel != null || SearchParameters.EndTurnoverLabel != null)
                    {
                        //original silacLabels object is null, so we need to initialize it
                        SearchParameters.SilacLabels = new List <SilacLabel>();
                        if (SearchParameters.StartTurnoverLabel != null)
                        {
                            var updatedLabel = SilacConversions.UpdateAminoAcidLabel(SearchParameters.StartTurnoverLabel, heavyLabel);
                            heavyLabel = updatedLabel.nextHeavyLabel;
                            SearchParameters.StartTurnoverLabel = updatedLabel.updatedLabel;
                            SearchParameters.SilacLabels.Add(SearchParameters.StartTurnoverLabel);
                        }
                        if (SearchParameters.EndTurnoverLabel != null)
                        {
                            var updatedLabel = SilacConversions.UpdateAminoAcidLabel(SearchParameters.EndTurnoverLabel, heavyLabel);
                            heavyLabel = updatedLabel.nextHeavyLabel;
                            SearchParameters.EndTurnoverLabel = updatedLabel.updatedLabel;
                            SearchParameters.SilacLabels.Add(SearchParameters.EndTurnoverLabel);
                        }
                    }
                    else
                    {
                        //change the silac residues to lower case amino acids (currently null)
                        List <SilacLabel> updatedLabels = new List <SilacLabel>();
                        for (int i = 0; i < SearchParameters.SilacLabels.Count; i++)
                        {
                            var updatedLabel = SilacConversions.UpdateAminoAcidLabel(SearchParameters.SilacLabels[i], heavyLabel);
                            heavyLabel = updatedLabel.nextHeavyLabel;
                            updatedLabels.Add(updatedLabel.updatedLabel);
                        }
                        SearchParameters.SilacLabels = updatedLabels;
                    }
                }
            }
            //if no quant, remove any silac labels that may have been added, because they screw up downstream analysis
            if (!SearchParameters.DoQuantification) //using "if" instead of "else", because DoQuantification can change if it's an mgf
            {
                SearchParameters.SilacLabels = null;
            }

            LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes);

            // load proteins
            List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, SearchParameters.SearchTarget, SearchParameters.DecoyType, localizeableModificationTypes, CommonParameters);

            SanitizeProteinDatabase(proteinList, SearchParameters.TCAmbiguity);

            // load spectral libraries
            var spectralLibrary = LoadSpectralLibraries(taskId, dbFilenameList);

            // write prose settings
            ProseCreatedWhileRunning.Append("The following search settings were used: ");
            ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ?
                                            "maximum peptide length = unspecified; " :
                                            "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif)) + "; ");
            ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; ");
            ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; ");
            ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");
            ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("report PSM ambiguity = " + CommonParameters.ReportAllAmbiguity + ". ");
            ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy)
                                            + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. ");

            // start the search task
            MyTaskResults = new MyTaskResults(this);
            List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>();

            //generate an array to store category specific fdr values (for speedy semi/nonspecific searches)
            int numFdrCategories = (int)(Enum.GetValues(typeof(FdrCategory)).Cast <FdrCategory>().Last() + 1); //+1 because it starts at zero

            List <PeptideSpectralMatch>[] allCategorySpecificPsms = new List <PeptideSpectralMatch> [numFdrCategories];
            for (int i = 0; i < numFdrCategories; i++)
            {
                allCategorySpecificPsms[i] = new List <PeptideSpectralMatch>();
            }

            FlashLfqResults flashLfqResults = null;

            MyFileManager myFileManager = new MyFileManager(SearchParameters.DisposeOfFileWhenDone);

            var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b));

            int    completedFiles = 0;
            object indexLock      = new object();
            object psmLock        = new object();

            Status("Searching files...", taskId);
            Status("Searching files...", new List <string> {
                taskId, "Individual Spectra Files"
            });

            Dictionary <string, int[]> numMs2SpectraPerFile = new Dictionary <string, int[]>();

            for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
            {
                if (GlobalVariables.StopLoops)
                {
                    break;
                }

                var origDataFile = currentRawFileList[spectraFileIndex];

                // mark the file as in-progress
                StartingDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });

                CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

                MassDiffAcceptor massDiffAcceptor = GetMassDiffAcceptor(combinedParams.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac);

                var thisId = new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                };
                NewCollection(Path.GetFileName(origDataFile), thisId);
                Status("Loading spectra file...", thisId);
                MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams);
                Status("Getting ms2 scans...", thisId);
                Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray();
                numMs2SpectraPerFile.Add(Path.GetFileNameWithoutExtension(origDataFile), new int[] { myMsDataFile.GetAllScansList().Count(p => p.MsnOrder == 2), arrayOfMs2ScansSortedByMass.Length });
                myFileManager.DoneWithFile(origDataFile);

                PeptideSpectralMatch[] fileSpecificPsms = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length];

                // modern search
                if (SearchParameters.SearchType == SearchType.Modern)
                {
                    for (int currentPartition = 0; currentPartition < combinedParams.TotalPartitions; currentPartition++)
                    {
                        List <PeptideWithSetModifications> peptideIndex = null;
                        List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count / combinedParams.TotalPartitions,
                                                                                ((currentPartition + 1) * proteinList.Count / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count / combinedParams.TotalPartitions));

                        Status("Getting fragment dictionary...", new List <string> {
                            taskId
                        });
                        var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, SearchParameters.SilacLabels,
                                                             SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, currentPartition, SearchParameters.DecoyType, combinedParams, FileSpecificParameters,
                                                             SearchParameters.MaxFragmentSize, false, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), SearchParameters.TCAmbiguity, new List <string> {
                            taskId
                        });
                        List <int>[] fragmentIndex  = null;
                        List <int>[] precursorIndex = null;

                        lock (indexLock)
                        {
                            GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId);
                        }

                        Status("Searching files...", taskId);

                        new ModernSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, currentPartition,
                                               combinedParams, this.FileSpecificParameters, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run();

                        ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + combinedParams.TotalPartitions + "!", thisId));
                        if (GlobalVariables.StopLoops)
                        {
                            break;
                        }
                    }
                }
                // nonspecific search
                else if (SearchParameters.SearchType == SearchType.NonSpecific)
                {
                    PeptideSpectralMatch[][] fileSpecificPsmsSeparatedByFdrCategory = new PeptideSpectralMatch[numFdrCategories][]; //generate an array of all possible locals
                    for (int i = 0; i < numFdrCategories; i++)                                                                      //only add if we're using for FDR, else ignore it as null.
                    {
                        fileSpecificPsmsSeparatedByFdrCategory[i] = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length];
                    }

                    //create params for N, C, or both if semi
                    List <CommonParameters> paramsToUse = new List <CommonParameters> {
                        combinedParams
                    };
                    if (combinedParams.DigestionParams.SearchModeType == CleavageSpecificity.Semi) //if semi, we need to do both N and C to hit everything
                    {
                        paramsToUse.Clear();
                        List <FragmentationTerminus> terminiToUse = new List <FragmentationTerminus> {
                            FragmentationTerminus.N, FragmentationTerminus.C
                        };
                        foreach (FragmentationTerminus terminus in terminiToUse) //set both termini
                        {
                            paramsToUse.Add(combinedParams.CloneWithNewTerminus(terminus));
                        }
                    }

                    //Compress array of deconvoluted ms2 scans to avoid searching the same ms2 multiple times while still identifying coisolated peptides
                    List <int>[] coisolationIndex = new List <int>[] { new List <int>() };
                    if (arrayOfMs2ScansSortedByMass.Length != 0)
                    {
                        int maxScanNumber = arrayOfMs2ScansSortedByMass.Max(x => x.OneBasedScanNumber);
                        coisolationIndex = new List <int> [maxScanNumber + 1];
                        for (int i = 0; i < arrayOfMs2ScansSortedByMass.Length; i++)
                        {
                            int scanNumber = arrayOfMs2ScansSortedByMass[i].OneBasedScanNumber;
                            if (coisolationIndex[scanNumber] == null)
                            {
                                coisolationIndex[scanNumber] = new List <int> {
                                    i
                                };
                            }
                            else
                            {
                                coisolationIndex[scanNumber].Add(i);
                            }
                        }
                        coisolationIndex = coisolationIndex.Where(x => x != null).ToArray();
                    }

                    //foreach terminus we're going to look at
                    foreach (CommonParameters paramToUse in paramsToUse)
                    {
                        //foreach database partition
                        for (int currentPartition = 0; currentPartition < paramToUse.TotalPartitions; currentPartition++)
                        {
                            List <PeptideWithSetModifications> peptideIndex = null;

                            List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count / paramToUse.TotalPartitions,
                                                                                    ((currentPartition + 1) * proteinList.Count / paramToUse.TotalPartitions) - (currentPartition * proteinList.Count / paramToUse.TotalPartitions));

                            List <int>[] fragmentIndex  = null;
                            List <int>[] precursorIndex = null;

                            Status("Getting fragment dictionary...", new List <string> {
                                taskId
                            });
                            var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, SearchParameters.SilacLabels,
                                                                 SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, currentPartition, SearchParameters.DecoyType, paramToUse, FileSpecificParameters,
                                                                 SearchParameters.MaxFragmentSize, true, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), SearchParameters.TCAmbiguity, new List <string> {
                                taskId
                            });
                            lock (indexLock)
                            {
                                GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId);
                            }

                            Status("Searching files...", taskId);

                            new NonSpecificEnzymeSearchEngine(fileSpecificPsmsSeparatedByFdrCategory, arrayOfMs2ScansSortedByMass, coisolationIndex, peptideIndex, fragmentIndex,
                                                              precursorIndex, currentPartition, paramToUse, this.FileSpecificParameters, variableModifications, massDiffAcceptor,
                                                              SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run();

                            ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + paramToUse.TotalPartitions + "!", thisId));
                            if (GlobalVariables.StopLoops)
                            {
                                break;
                            }
                        }
                    }
                    lock (psmLock)
                    {
                        for (int i = 0; i < allCategorySpecificPsms.Length; i++)
                        {
                            if (allCategorySpecificPsms[i] != null)
                            {
                                allCategorySpecificPsms[i].AddRange(fileSpecificPsmsSeparatedByFdrCategory[i]);
                            }
                        }
                    }
                }
                // classic search
                else
                {
                    Status("Starting search...", thisId);
                    var newClassicSearchEngine = new ClassicSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, SearchParameters.SilacLabels,
                                                                         SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, proteinList, massDiffAcceptor, combinedParams, this.FileSpecificParameters, spectralLibrary, thisId, SearchParameters.WriteSpectralLibrary);
                    newClassicSearchEngine.Run();

                    ReportProgress(new ProgressEventArgs(100, "Done with search!", thisId));
                }

                //look for internal fragments
                if (SearchParameters.MinAllowedInternalFragmentLength != 0)
                {
                    MatchInternalFragmentIons(fileSpecificPsms, arrayOfMs2ScansSortedByMass, combinedParams, SearchParameters.MinAllowedInternalFragmentLength);
                }

                // calculate/set spectral angles if there is a spectral library being used
                if (spectralLibrary != null)
                {
                    Status("Calculating spectral library similarity...", thisId);
                }
                SpectralLibrarySearchFunction.CalculateSpectralAngles(spectralLibrary, fileSpecificPsms, arrayOfMs2ScansSortedByMass, combinedParams);

                lock (psmLock)
                {
                    allPsms.AddRange(fileSpecificPsms);
                }

                completedFiles++;
                FinishedDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> {
                    taskId, "Individual Spectra Files"
                }));
            }

            if (spectralLibrary != null)
            {
                spectralLibrary.CloseConnections();
            }

            ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            int numNotches = GetNumNotches(SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac);

            //resolve category specific fdrs (for speedy semi and nonspecific
            if (SearchParameters.SearchType == SearchType.NonSpecific)
            {
                allPsms = NonSpecificEnzymeSearchEngine.ResolveFdrCategorySpecificPsms(allCategorySpecificPsms, numNotches, taskId, CommonParameters, FileSpecificParameters);
            }

            PostSearchAnalysisParameters parameters = new PostSearchAnalysisParameters
            {
                SearchTaskResults             = MyTaskResults,
                SearchTaskId                  = taskId,
                SearchParameters              = SearchParameters,
                ProteinList                   = proteinList,
                AllPsms                       = allPsms,
                VariableModifications         = variableModifications,
                FixedModifications            = fixedModifications,
                ListOfDigestionParams         = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams)),
                CurrentRawFileList            = currentRawFileList,
                MyFileManager                 = myFileManager,
                NumNotches                    = numNotches,
                OutputFolder                  = OutputFolder,
                IndividualResultsOutputFolder = Path.Combine(OutputFolder, "Individual File Results"),
                FlashLfqResults               = flashLfqResults,
                FileSettingsList              = fileSettingsList,
                NumMs2SpectraPerFile          = numMs2SpectraPerFile,
                DatabaseFilenameList          = dbFilenameList
            };
            PostSearchAnalysisTask postProcessing = new PostSearchAnalysisTask
            {
                Parameters             = parameters,
                FileSpecificParameters = this.FileSpecificParameters,
                CommonParameters       = CommonParameters
            };

            return(postProcessing.Run());
        }
Beispiel #4
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
        {
            // disable quantification if a .mgf is being used
            if (SearchParameters.DoQuantification && currentRawFileList.Any(x => Path.GetExtension(x).Equals(".mgf", StringComparison.OrdinalIgnoreCase)))
            {
                SearchParameters.DoQuantification = false;
            }

            // load modifications
            Status("Loading modifications...", taskId);
            List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList();
            List <ModificationWithMass> fixedModifications    = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList();
            List <string> localizeableModificationTypes       = GlobalVariables.AllModTypesKnown.ToList();

            // what types of fragment ions to search for
            List <ProductType> ionTypes = new List <ProductType>();

            if (CommonParameters.BIons && CommonParameters.AddCompIons)
            {
                ionTypes.Add(ProductType.B);
            }
            else if (CommonParameters.BIons)
            {
                ionTypes.Add(ProductType.BnoB1ions);
            }
            if (CommonParameters.YIons)
            {
                ionTypes.Add(ProductType.Y);
            }
            if (CommonParameters.ZdotIons)
            {
                ionTypes.Add(ProductType.Zdot);
            }
            if (CommonParameters.CIons)
            {
                ionTypes.Add(ProductType.C);
            }

            // load proteins
            List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, SearchParameters.SearchTarget, SearchParameters.DecoyType, localizeableModificationTypes, CommonParameters);

            // write prose settings
            ProseCreatedWhileRunning.Append("The following search settings were used: ");
            ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ?
                                            "maximum peptide length = unspecified; " :
                                            "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; ");
            ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; ");
            ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; ");
            ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");
            ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("report PSM ambiguity = " + CommonParameters.ReportAllAmbiguity + ". ");
            ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. ");

            // start the search task
            MyTaskResults = new MyTaskResults(this);
            List <PeptideSpectralMatch> allPsms         = new List <PeptideSpectralMatch>();
            FlashLFQResults             flashLfqResults = null;

            MyFileManager myFileManager = new MyFileManager(SearchParameters.DisposeOfFileWhenDone);

            var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b));
            HashSet <DigestionParams> ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams));

            int    completedFiles = 0;
            object indexLock      = new object();
            object psmLock        = new object();

            Status("Searching files...", taskId);
            Status("Searching files...", new List <string> {
                taskId, "Individual Spectra Files"
            });

            Dictionary <string, int[]> numMs2SpectraPerFile = new Dictionary <string, int[]>();

            for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
            {
                if (GlobalVariables.StopLoops)
                {
                    break;
                }

                var origDataFile = currentRawFileList[spectraFileIndex];

                // mark the file as in-progress
                StartingDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });

                CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

                MassDiffAcceptor massDiffAcceptor = GetMassDiffAcceptor(combinedParams.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac);

                var thisId = new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                };
                NewCollection(Path.GetFileName(origDataFile), thisId);
                Status("Loading spectra file...", thisId);
                MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams.TopNpeaks, combinedParams.MinRatio, combinedParams.TrimMs1Peaks, combinedParams.TrimMsMsPeaks, combinedParams);
                Status("Getting ms2 scans...", thisId);
                Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray();
                numMs2SpectraPerFile.Add(Path.GetFileNameWithoutExtension(origDataFile), new int[] { myMsDataFile.GetAllScansList().Count(p => p.MsnOrder == 2), arrayOfMs2ScansSortedByMass.Length });
                myFileManager.DoneWithFile(origDataFile);

                var fileSpecificPsms = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length];
                // modern search
                if (SearchParameters.SearchType == SearchType.Modern)
                {
                    for (int currentPartition = 0; currentPartition < combinedParams.TotalPartitions; currentPartition++)
                    {
                        List <CompactPeptide> peptideIndex      = null;
                        List <Protein>        proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions));

                        Status("Getting fragment dictionary...", new List <string> {
                            taskId
                        });
                        var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, ionTypes, currentPartition, SearchParameters.DecoyType, ListOfDigestionParams, combinedParams, SearchParameters.MaxFragmentSize, new List <string> {
                            taskId
                        });
                        List <int>[] fragmentIndex = null;
                        lock (indexLock)
                        {
                            GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, taskId);
                        }

                        Status("Searching files...", taskId);

                        new ModernSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, ionTypes, currentPartition, combinedParams, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run();

                        ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + combinedParams.TotalPartitions + "!", thisId));
                    }
                }
                // nonspecific search
                else if (SearchParameters.SearchType == SearchType.NonSpecific)
                {
                    List <List <ProductType> > terminusSeparatedIons = ProductTypeMethods.SeparateIonsByTerminus(ionTypes);
                    foreach (List <ProductType> terminusSpecificIons in terminusSeparatedIons)
                    {
                        for (int currentPartition = 0; currentPartition < combinedParams.TotalPartitions; currentPartition++)
                        {
                            List <CompactPeptide> peptideIndex      = null;
                            List <Protein>        proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions));

                            List <int>[] fragmentIndex = new List <int> [1];

                            Status("Getting fragment dictionary...", new List <string> {
                                taskId
                            });
                            var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, terminusSpecificIons, currentPartition, SearchParameters.DecoyType, ListOfDigestionParams, combinedParams, SearchParameters.MaxFragmentSize, new List <string> {
                                taskId
                            });
                            lock (indexLock)
                                GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, taskId);

                            Status("Getting precursor dictionary...", new List <string> {
                                taskId
                            });
                            List <CompactPeptide> peptideIndexPrecursor      = null;
                            List <Protein>        proteinListSubsetPrecursor = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions));
                            List <int>[]          fragmentIndexPrecursor     = new List <int> [1];
                            var indexEnginePrecursor = new PrecursorIndexingEngine(proteinListSubsetPrecursor, variableModifications, fixedModifications, terminusSpecificIons, currentPartition, SearchParameters.DecoyType, ListOfDigestionParams, combinedParams, 0, new List <string> {
                                taskId
                            });
                            lock (indexLock)
                                GenerateIndexes(indexEnginePrecursor, dbFilenameList, ref peptideIndexPrecursor, ref fragmentIndexPrecursor, taskId);

                            if (peptideIndex.Count != peptideIndexPrecursor.Count)
                            {
                                throw new MetaMorpheusException("peptideIndex not identical between indexing engines");
                            }

                            Status("Searching files...", taskId);

                            new NonSpecificEnzymeSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, fragmentIndexPrecursor, terminusSpecificIons, currentPartition, combinedParams, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run();

                            ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + combinedParams.TotalPartitions + "!", thisId));
                        }
                    }
                }
                // classic search
                else
                {
                    Status("Starting search...", thisId);
                    new ClassicSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, proteinList, ionTypes, massDiffAcceptor, combinedParams, thisId).Run();

                    ReportProgress(new ProgressEventArgs(100, "Done with search!", thisId));
                }
                lock (psmLock)
                {
                    allPsms.AddRange(fileSpecificPsms.Where(p => p != null));
                }

                completedFiles++;
                FinishedDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> {
                    taskId, "Individual Spectra Files"
                }));
            }

            ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            PostSearchAnalysisParameters parameters = new PostSearchAnalysisParameters();

            parameters.SearchTaskResults             = MyTaskResults;
            parameters.SearchTaskId                  = taskId;
            parameters.SearchParameters              = SearchParameters;
            parameters.ProteinList                   = proteinList;
            parameters.IonTypes                      = ionTypes;
            parameters.AllPsms                       = allPsms;
            parameters.FixedModifications            = fixedModifications;
            parameters.VariableModifications         = variableModifications;
            parameters.ListOfDigestionParams         = ListOfDigestionParams;
            parameters.CurrentRawFileList            = currentRawFileList;
            parameters.MyFileManager                 = myFileManager;
            parameters.NumNotches                    = GetNumNotches(SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac);
            parameters.OutputFolder                  = OutputFolder;
            parameters.IndividualResultsOutputFolder = Path.Combine(OutputFolder, "Individual File Results");
            parameters.FlashLfqResults               = flashLfqResults;
            parameters.FileSettingsList              = fileSettingsList;
            parameters.NumMs2SpectraPerFile          = numMs2SpectraPerFile;
            parameters.DatabaseFilenameList          = dbFilenameList;
            PostSearchAnalysisTask postProcessing = new PostSearchAnalysisTask();

            postProcessing.Parameters       = parameters;
            postProcessing.CommonParameters = CommonParameters;
            return(postProcessing.Run());
        }