Ejemplo n.º 1
0
        static void PreExperimentSetUp()
        {
            ConstructDecoyFasta();
            ConstructIDX();
            if (GlobalVar.IsSimulation)
            //if(false)
            {
                ms2SpectraList = Loader.parseMS2File(InputFileOrganizer.MS2SimulationTestFile).getSpectraArray();
                GlobalVar.ExperimentTotalScans = ms2SpectraList.Count;
                FullPepXMLAndProteinProphetSetup();
                baseLinePpr = ProteinProphetEvaluator.getProteinProphetResult(InputFileOrganizer.OriginalProtXMLFile);

                //so in alex's original code, "original experiment" refers to original experiment without any exclusion or manipulation with this program
                //"baseline comparison" refers to the results after "NoExclusion" run, which is a top 6 or top 12 DDA run, which is not implemented in this program
                //So the two are the same in thie program

                int numMS2Analyzed = (int)GlobalVar.ExperimentTotalScans;
                PerformanceEvaluator.setBaselineComparison(baseLinePpr, numMS2Analyzed, 12);
                PerformanceEvaluator.setOriginalExperiment(baseLinePpr.getNum_proteins_identified());
            }
            log.Debug("Setting up Database");
            database = databaseSetUp(InputFileOrganizer.ExclusionDBFasta);
            log.Debug("Done setting up database.");


            CometSingleSearch.InitializeComet(InputFileOrganizer.IDXDataBase, InputFileOrganizer.CometParamsFile);
            //CometSingleSearch.InitializeComet_NonRealTime("C:\\Coding\\2019LavalleeLab\\temp2\\ModifiedDBSearchFiles\\NoExclusionRealTimeCometSearch.tsv");
            //CometSingleSearch.QualityCheck();
            Console.WriteLine("pre-experimental setup finished");
        }
Ejemplo n.º 2
0
        public static ProteinProphetResult postProcessing(ExclusionProfile exclusionProfile, String experimentName,
                                                          Boolean keepResults)
        {
            String partialCometFileOutputFolder = Path.Combine(InputFileOrganizer.OutputFolderOfTheRun, "PartialCometFile");

            if (!Directory.Exists(partialCometFileOutputFolder))
            {
                Directory.CreateDirectory(partialCometFileOutputFolder);
            }
            String outputCometFile = Path.Combine(partialCometFileOutputFolder,
                                                  experimentName + "_partial" + InputFileOrganizer.PepXMLSuffix);

            PartialPepXMLWriter.writePartialPepXMLFile(InputFileOrganizer.OriginalCometOutput, exclusionProfile.getSpectraUsed(),
                                                       outputCometFile, InputFileOrganizer.MS2SimulationTestFile, InputFileOrganizer.FASTA_FILE, outputCometFile); //TODO was using MZML instead of MS2

            ProteinProphetResult ppr = RunProteinProphet(outputCometFile, InputFileOrganizer.OutputFolderOfTheRun, keepResults);

            //PostProcessingScripts.deleteFile(outputCometFile);
            // delete these files if this flag is false
            //if (!keepResults)
            //{

            //}

            return(ppr);
        }
Ejemplo n.º 3
0
 public BaselineComparison(ProteinProphetResult ppr, int numMS2, int ddaNum)
 {
     proteinsIdentifiedByNoExclusion      = ppr.getProteinsIdentified();
     numProteinsIdentifiedNaiveExperiment = proteinsIdentifiedByNoExclusion.Count;
     totalResourcesNaiveExperiment        = numMS2;
     this.ddaNum = ddaNum;
 }
Ejemplo n.º 4
0
 public String getPerformanceVector(String experimentName, String experimentType, double analysisTime,
                                    double totalRunTime, ProteinProphetResult ppr, int ddaNum, ExclusionProfile exclusionProfile)
 {
     performanceEvaluator.finalizePerformanceEvaluator(experimentName, experimentType, analysisTime, totalRunTime,
                                                       exclusionList, ppr, ddaNum, exclusionProfile);
     return(performanceEvaluator.outputPerformance());
 }
Ejemplo n.º 5
0
 public static void setBaselineComparison(ProteinProphetResult ppr, int numMS2, int ddaNum)
 {
     if (!baselineComparisonSet.ContainsKey(ddaNum))
     {
         BaselineComparison bc = new BaselineComparison(ppr, numMS2, ddaNum);
         baselineComparisonSet.Add(ddaNum, bc);
     }
 }
Ejemplo n.º 6
0
        public static void DoJob()
        {
            String comet             = "C:\\Users\\LavalleeLab\\Documents\\JoshTemp\\RealTimeMS\\TestData\\MS_QC_120min.pep.xml";
            String output            = "C:\\Users\\LavalleeLab\\Documents\\JoshTemp\\RealTimeMS\\TestData\\";
            ProteinProphetResult ppr = PostProcessingScripts.RunProteinProphet(comet, output, true);

            Console.WriteLine(ppr.ToString());
        }
Ejemplo n.º 7
0
 private void setProteinsIdentified(ProteinProphetResult ppr, List <String> proteinsIdentifiedByNoExclusion)
 {
     if (ppr != null)
     {
         List <String> proteinsIdentified = ppr.getProteinsIdentified();
         int           intersect          = compareProteins(proteinsIdentifiedByNoExclusion, proteinsIdentified);
         ChangeValue(Header.NumProteinsIdentified, proteinsIdentified.Count);
         ChangeValue(Header.ProteinsIdentifiedInLimitedDDA, intersect);
     }
 }
Ejemplo n.º 8
0
        public ProteinProphetFile(String protXMLFileName, Dictionary <String, List <String> > proteinsToPeptides,
                                  double fdr_threshold, double protein_probablity_threshold)
        {
            this.protXMLFileName    = protXMLFileName;
            this.proteinsToPeptides = proteinsToPeptides;
            int numProteinsIdentified = proteinsToPeptides.Keys.Count;

            proteinProphetResult = new ProteinProphetResult(fdr_threshold, protein_probablity_threshold,
                                                            getProteinNames());
        }
Ejemplo n.º 9
0
        public static ProteinProphetResult RunProteinProphet(String cometFilePath, String outputFolder, Boolean keepResults)
        {
            Logger.debug("Post processing comet file: ");
            Console.WriteLine("Runnign protein prophet, if program doesn't respond for a long time, try pressing a typing a few keys into the command line");
            String proteinProphetOutput = ProteinProphetSearch(cometFilePath, outputFolder, keepResults);
            ProteinProphetResult ppr    = ProteinProphetEvaluator.getProteinProphetResult(proteinProphetOutput);

            ppr.SetProteinGroup(ProteinProphetEvaluator.ExtractPositiveProteinGroups(proteinProphetOutput));

            return(ppr);
        }
Ejemplo n.º 10
0
        public static void DoJob()
        {
            InputFileOrganizer.FASTA_FILE = "C:\\Coding\\2019LavalleeLab\\temp2\\ExampleDataSet\\uniprot_SwissProt_Human_1_11_2017.fasta";
            //String outputCometFile = "C:\\Coding\\2019LavalleeLab\\temp2\\ModifiedDBSearchFiles\\Result\\ModdedSearch_MLGEGolden_nonCheat_peptideSearchResultIncluded\\PartialCometFile\\1ModdedSearch_MLGE_nonCheat_peptideSearchResultIncluded_partial.pep.xml";
            String outputCometFile   = "C:\\Coding\\2019LavalleeLab\\temp2\\ModifiedDBSearchFiles\\Result\\Modded_MLGEGolden_rtCheatpeptideSearchResultIncluded\\PartialCometFile\\1Modded_MLGEGolden_rtCheat_partial.pep.xml";
            ProteinProphetResult ppr = PostProcessingScripts.RunProteinProphet(outputCometFile, InputFileOrganizer.OutputFolderOfTheRun, true);

            //String proteinProphetFile = "C:\\Coding\\2019LavalleeLab\\temp2\\Output\\ProteinGroup_Combined_rtCheat\\protein_prophet_output\\ProteinGroup_Combined_rtCheat_partial_interact.prot.xml";
            //ProteinProphetResult ppr = ProteinProphetEvaluator.getProteinProphetResult(proteinProphetFile);
            Console.WriteLine(ppr.ToString());
            Console.WriteLine("Protein groups " + ppr.getFilteredProteinGroups().Count);
        }
Ejemplo n.º 11
0
        private void postProcessingCalculations(int ddaNum, ProteinProphetResult ppr, ExclusionProfile exclusionProfile)
        {
            BaselineComparison bc = baselineComparisonSet[ddaNum];
            List <String>      proteinsIdentifiedByNoExclusion = bc.getProteinsIdentifiedByNoExclusion();
            int totalResourcesNaiveExperiment        = bc.getTotalResourcesNaiveExperiment();
            int numProteinsIdentifiedNaiveExperiment = bc.getNumProteinsIdentifiedNaiveExperiment();

            // set proteins identified first
            setProteinsIdentified(ppr, proteinsIdentifiedByNoExclusion);

            int correctlyExcluded = (int)data[Header.EvaluateExclusion_FoundOnCurrentExclusionList]
                                    + (int)data[Header.EvaluateExclusion_FoundOnCurrentObservedExclusionList];
            int incorrectlyExcluded = (int)data[Header.EvaluateExclusion_NotFoundOnExclusionList];

            /*
             * found on past observed, found on past exclusion list, and found on future
             * exclusion list are not incorrect exclusions, they are retention time being
             * predicted incorrectly...
             */
            double ratioIncorrectlyExcludedOverCorrectlyExcluded = takeRatio(incorrectlyExcluded, correctlyExcluded);

            ChangeValue(Header.CorrectlyExcluded, correctlyExcluded);
            ChangeValue(Header.IncorrectlyExcluded, incorrectlyExcluded);
            ChangeValue(Header.RatioIncorrectlyExcludedOverCorrectlyExcluded, ratioIncorrectlyExcludedOverCorrectlyExcluded);

            // Resources saved in total # available MS2 - # ms2 used foreach analysis
            int    resourcesSaved        = totalResourcesNaiveExperiment - (int)data[Header.NumMS2Analyzed];
            double percentResourcesSaved = takeRatio(resourcesSaved, totalResourcesNaiveExperiment);
            double percentResourcesUsed  = 1 - percentResourcesSaved;

            ChangeValue(Header.PercentResourcesSaved, percentResourcesSaved);
            ChangeValue(Header.PercentResourcesUsed, percentResourcesUsed);

            /*-
             * Protein Identification Sensitivity = # proteins identified / # proteins identified in whole experiment
             * Protein Identification Fold Change = # proteins identified / # proteins identified by naive approach
             * Protein Identification Sensitivity Limited DDA = # proteins identified also identified in naive approach / proteins identified by naive approach
             */
            ChangeValue(Header.ProteinIdentificationSensitivity,
                        takeRatio((int)data[Header.NumProteinsIdentified], numProteinsIdentifiedOriginalExperiment));
            ChangeValue(Header.ProteinIdentificationFoldChange,
                        takeRatio((int)data[Header.NumProteinsIdentified], numProteinsIdentifiedNaiveExperiment));
            ChangeValue(Header.ProteinIdentificationSensitivityLimitedDDA,
                        takeRatio((int)data[Header.ProteinsIdentifiedInLimitedDDA], numProteinsIdentifiedNaiveExperiment));

            List <String> inProgramExcludedProteins = exclusionProfile.getDatabase().getExcludedProteins();
            int           proteinOverlap_inProgramExcluded_vs_NoExclusion = compareProteins(inProgramExcludedProteins, proteinsIdentifiedByNoExclusion);

            ChangeValue(Header.NumProteinOverlap_ExcludedProteinsAgainstNoExclusionProteins, proteinOverlap_inProgramExcluded_vs_NoExclusion);
            ChangeValue(Header.ProteinGroupsIdentified, ppr.getFilteredProteinGroups().Count);
        }
        public static String DoJob(ProteinProphetResult ppr, ExclusionProfile exclusionProfile, int experimentNum)
        {
            String       outputFile = Path.Combine(InputFileOrganizer.OutputFolderOfTheRun, "NumberOfPeptidesPerIdentifiedProtein_" + experimentNum + ".txt");
            StreamWriter sw         = new StreamWriter(outputFile);

            sw.WriteLine("Accession\tNumberOfPeptides\tSpectralCount");

            List <String> confidentlyIdentifiedProts = ppr.getProteinsIdentified();

            foreach (String accession in confidentlyIdentifiedProts)
            {
                Protein          prot             = exclusionProfile.getDatabase().getProtein(accession);
                HashSet <String> peptidesObserved = new HashSet <String>();
                foreach (PeptideScore pepEvidence in prot.getPeptideScore())
                {
                    peptidesObserved.Add(pepEvidence.getPeptideSequence());
                }
                sw.WriteLine("{0}\t{1}\t{2}", accession, peptidesObserved.Count, prot.getPeptideScore().Count);
            }

            sw.Close();
            return(outputFile);
        }
Ejemplo n.º 13
0
        public static void DoJob()
        {
            StreamWriter  sw = new StreamWriter(Path.Combine(InputFileOrganizer.OutputFolderOfTheRun, "ExcludedProteinComparison.txt"));
            String        ExcludedProteinFile = "C:\\Coding\\2019LavalleeLab\\temp2\\Output\\Gold_MLGE_nonCheat.txt_output\\ExcludedProteinList.txt";
            StreamReader  sr = new StreamReader(ExcludedProteinFile);
            List <String> inProgramConfidentlyIdentified = new List <String>();
            String        line = sr.ReadLine();

            while (line != null)
            {
                inProgramConfidentlyIdentified.Add(line);
                line = sr.ReadLine();
            }
            String proteinProphetFile = "C:\\Coding\\2019LavalleeLab\\GitProjectRealTimeMS\\TestData\\PreComputedFiles\\MS_QC_120min_interact.prot.xml";
            ProteinProphetResult ppr  = ProteinProphetEvaluator.getProteinProphetResult(proteinProphetFile);
            List <String>        realConfidentIdentified = ppr.getProteinsIdentified();

            List <String> intersection = ListUtil.FindIntersection(inProgramConfidentlyIdentified, realConfidentIdentified);

            sw.WriteLine("In-Program excluded: {0}", inProgramConfidentlyIdentified.Count);
            sw.WriteLine("Real confidently identified: {0}", realConfidentIdentified.Count);
            sw.WriteLine("Intersection: {0}", intersection.Count);
            sw.Close();
        }
Ejemplo n.º 14
0
 public void finalizePerformanceEvaluator(String experimentName, String experimentType, double analysisTime,
                                          double totalRunTime, ExclusionList exclusionList, ProteinProphetResult ppr, int ddaNum, ExclusionProfile exclusionProfile)
 {
     setExperimentName(experimentName, experimentType);
     setExperimentDuration(analysisTime, totalRunTime);
     setExperimentParams();
     setExclusionList(exclusionList);
     postProcessingCalculations(ddaNum, ppr, exclusionProfile);
 }
Ejemplo n.º 15
0
        static String mzml = "C:\\Coding\\2019LavalleeLab\\RealTest_Results_20200219\\MSQC_QE_200ng_HEK_2hr_to_run_200219172225.mzML";        //"C:\\Coding\\2019LavalleeLab\\GoldStandardData\\MZML_Files\\MS_QC_120min.mzml";
        public static void DoJob()
        {
            //comet
            Console.WriteLine("Performing Comet search on full ms2 data");
            String fullCometFile = PostProcessingScripts.CometStandardSearch(ms2File, InputFileOrganizer.OutputFolderOfTheRun, true);

            InputFileOrganizer.OriginalCometOutput = fullCometFile;


            //protein prophet
            Console.WriteLine("Perform a protein prophet search on full pepxml");
            String fullProteinProphetFile = PostProcessingScripts.ProteinProphetSearch(fullCometFile, InputFileOrganizer.OutputFolderOfTheRun, true);

            InputFileOrganizer.OriginalProtXMLFile = fullProteinProphetFile;

            ProteinProphetResult baseLinePpr = ProteinProphetEvaluator.getProteinProphetResult(InputFileOrganizer.OriginalProtXMLFile);

            //load spectra
            Console.WriteLine("loading spectra array");
            List <Spectra> ls = Loader.parseMS2File(ms2File).getSpectraArray();

            List <int> includedSpectra = new List <int>();
            List <int> excludedSpectra = new List <int>();

            StreamReader sr   = new StreamReader(excludedSpectraFile);
            String       line = sr.ReadLine();

            while (line != null)
            {
                int excluded = int.Parse(line);
                excludedSpectra.Add(excluded);
                line = sr.ReadLine();
            }

            foreach (Spectra sp in ls)
            {
                if (!excludedSpectra.Contains(sp.getScanNum()))
                {
                    includedSpectra.Add(sp.getScanNum());
                }
            }
            String outputCometFile = Path.Combine(InputFileOrganizer.OutputFolderOfTheRun, "realTestpartialOut.pep.xml"); //"C:\\Coding\\2019LavalleeLab\\GoldStandardData\\pepxml\\MS_QC_120min_partial.pep.xml";
            String fastaFile       = InputFileOrganizer.FASTA_FILE;                                                       //"C:\\Coding\\2019LavalleeLab\\GoldStandardData\\Database\\uniprot_SwissProt_Human_1_11_2017.fasta";

            PartialPepXMLWriter.writePartialPepXMLFile(fullCometFile, includedSpectra,
                                                       outputCometFile, mzml, fastaFile, outputCometFile);

            String partialProt = PostProcessingScripts.ProteinProphetSearch(outputCometFile, InputFileOrganizer.OutputFolderOfTheRun, true);
            ProteinProphetResult partialPpr = ProteinProphetEvaluator.getProteinProphetResult(partialProt);

            double partialNum = partialPpr.getNum_proteins_identified();
            double totalNum   = baseLinePpr.getNum_proteins_identified();
            double idSens     = partialNum / totalNum * 100.0;

            double includedScanNum = includedSpectra.Count;
            double totalScanNum    = ls.Count;
            double usedResource    = includedScanNum / totalScanNum * 100;
            String line1           = String.Format("includedScans {0} \t totalScanNum {1} \tUsedResources {2}", includedScanNum, totalScanNum, usedResource);
            String line2           = String.Format("partialNum {0} \t totalNum {1} \tidsens {2}", partialNum, totalNum, idSens);

            Console.WriteLine(line1);
            Console.WriteLine(line2);
            WriterClass.writeln(line1);
            WriterClass.writeln(line2);
            WriterClass.CloseWriter();
        }