public static void FilterForConfidentlyIdentifiedProteinOnly()
        {
            List <String> ppr = ProteinProphetEvaluator.extractIdentifiedProteinNames(InputFileOrganizer.OriginalProtXMLFile);
            String        excludedSpectraPerProteinAll = Path.Combine("ProteinTotalSpectraAgainstExcludedSpectra.txt");
            String        outputFile = Path.Combine(InputFileOrganizer.OutputFolderOfTheRun, "FilteredProteinWithExcludedSpectraCount.txt");
            StreamReader  sr         = new StreamReader(excludedSpectraPerProteinAll);
            StreamWriter  sw         = new StreamWriter(outputFile);
            String        line       = sr.ReadLine();

            sw.WriteLine(line);
            line = sr.ReadLine();
            int count = 0;

            while (line != null)
            {
                String protName = line.Split("\t".ToCharArray())[0];
                if (ppr.Contains(protName))
                {
                    sw.WriteLine(line);
                    count++;
                }
                line = sr.ReadLine();
            }
            sw.Close();
            Console.WriteLine(count);
        }
        public static void DoJob()
        {
            String originalProt = "C:\\Coding\\2019LavalleeLab\\GitProjectRealTimeMS\\TestData\\PreComputedFiles\\MS_QC_120min_interact.prot.xml";
            var    protGroups   = ProteinProphetEvaluator.ExtractPositiveProteinGroups(originalProt);

            Console.WriteLine(protGroups.Count);
        }
Exemplo n.º 3
0
        private static void PostExperimentProcessing(Experiment e)
        {
            //WriterClass.writeln(exclusionProfile.ReportFailedCometSearchStatistics());
            WriterClass.Flush();

            if (GlobalVar.IsSimulation)
            {
                ProteinProphetResult ppr;
                if (GlobalVar.isSimulationForFeatureExtraction)
                {
                    ppr = ProteinProphetEvaluator.getProteinProphetResult(InputFileOrganizer.OriginalProtXMLFile);
                }
                else
                {
                    String proteinProphetResultFileName = e.experimentNumber + GlobalVar.experimentName;
                    ppr = PostProcessingScripts.postProcessing(e.exclusionProfile, proteinProphetResultFileName, true);
                }
                e.totalRunTime = getCurrentTime() - e.experimentStartTime;
                String result = e.exclusionProfile.getPerformanceVector(e.experimentName, e.exclusionProfile.getAnalysisType().getDescription()
                                                                        , e.analysisTime, e.totalRunTime, ppr, 12, e.exclusionProfile);
                Console.WriteLine(result);
                Console.WriteLine("Protein groups: " + ppr.getFilteredProteinGroups().Count);
                WriterClass.writeln(result);
                //WriterClass.writeln("Protein groups: "+ ppr.getFilteredProteinGroups().Count) ;
                e.ppr = ppr;
            }
            else
            {
                WriterClass.writeln(e.exclusionProfile.GetPerformanceEvaluator().outputPerformance());
            }
        }
Exemplo n.º 4
0
        static void PreExperimentSetUp()
        {
            ConstructDecoyFasta();
            ConstructIDX();
            if (GlobalVar.IsSimulation)
            //if(false)
            {
                ms2SpectraList = Loader.parseMS2File(InputFileOrganizer.MS2SimulationTestFile).getSpectraArray();
                GlobalVar.ExperimentTotalScans = ms2SpectraList.Count;
                FullPepXMLAndProteinProphetSetup();
                baseLinePpr = ProteinProphetEvaluator.getProteinProphetResult(InputFileOrganizer.OriginalProtXMLFile);

                //so in alex's original code, "original experiment" refers to original experiment without any exclusion or manipulation with this program
                //"baseline comparison" refers to the results after "NoExclusion" run, which is a top 6 or top 12 DDA run, which is not implemented in this program
                //So the two are the same in thie program

                int numMS2Analyzed = (int)GlobalVar.ExperimentTotalScans;
                PerformanceEvaluator.setBaselineComparison(baseLinePpr, numMS2Analyzed, 12);
                PerformanceEvaluator.setOriginalExperiment(baseLinePpr.getNum_proteins_identified());
            }
            log.Debug("Setting up Database");
            database = databaseSetUp(InputFileOrganizer.ExclusionDBFasta);
            log.Debug("Done setting up database.");


            CometSingleSearch.InitializeComet(InputFileOrganizer.IDXDataBase, InputFileOrganizer.CometParamsFile);
            //CometSingleSearch.InitializeComet_NonRealTime("C:\\Coding\\2019LavalleeLab\\temp2\\ModifiedDBSearchFiles\\NoExclusionRealTimeCometSearch.tsv");
            //CometSingleSearch.QualityCheck();
            Console.WriteLine("pre-experimental setup finished");
        }
Exemplo n.º 5
0
        public static void DoJob()
        {
            StreamWriter  sw = new StreamWriter(Path.Combine(InputFileOrganizer.OutputFolderOfTheRun, "ExcludedProteinComparison.txt"));
            String        ExcludedProteinFile = "C:\\Coding\\2019LavalleeLab\\temp2\\Output\\Gold_MLGE_nonCheat.txt_output\\ExcludedProteinList.txt";
            StreamReader  sr = new StreamReader(ExcludedProteinFile);
            List <String> inProgramConfidentlyIdentified = new List <String>();
            String        line = sr.ReadLine();

            while (line != null)
            {
                inProgramConfidentlyIdentified.Add(line);
                line = sr.ReadLine();
            }
            String proteinProphetFile = "C:\\Coding\\2019LavalleeLab\\GitProjectRealTimeMS\\TestData\\PreComputedFiles\\MS_QC_120min_interact.prot.xml";
            ProteinProphetResult ppr  = ProteinProphetEvaluator.getProteinProphetResult(proteinProphetFile);
            List <String>        realConfidentIdentified = ppr.getProteinsIdentified();

            List <String> intersection = ListUtil.FindIntersection(inProgramConfidentlyIdentified, realConfidentIdentified);

            sw.WriteLine("In-Program excluded: {0}", inProgramConfidentlyIdentified.Count);
            sw.WriteLine("Real confidently identified: {0}", realConfidentIdentified.Count);
            sw.WriteLine("Intersection: {0}", intersection.Count);
            sw.Close();
        }
Exemplo n.º 6
0
        static String mzml = "C:\\Coding\\2019LavalleeLab\\RealTest_Results_20200219\\MSQC_QE_200ng_HEK_2hr_to_run_200219172225.mzML";        //"C:\\Coding\\2019LavalleeLab\\GoldStandardData\\MZML_Files\\MS_QC_120min.mzml";
        public static void DoJob()
        {
            //comet
            Console.WriteLine("Performing Comet search on full ms2 data");
            String fullCometFile = PostProcessingScripts.CometStandardSearch(ms2File, InputFileOrganizer.OutputFolderOfTheRun, true);

            InputFileOrganizer.OriginalCometOutput = fullCometFile;


            //protein prophet
            Console.WriteLine("Perform a protein prophet search on full pepxml");
            String fullProteinProphetFile = PostProcessingScripts.ProteinProphetSearch(fullCometFile, InputFileOrganizer.OutputFolderOfTheRun, true);

            InputFileOrganizer.OriginalProtXMLFile = fullProteinProphetFile;

            ProteinProphetResult baseLinePpr = ProteinProphetEvaluator.getProteinProphetResult(InputFileOrganizer.OriginalProtXMLFile);

            //load spectra
            Console.WriteLine("loading spectra array");
            List <Spectra> ls = Loader.parseMS2File(ms2File).getSpectraArray();

            List <int> includedSpectra = new List <int>();
            List <int> excludedSpectra = new List <int>();

            StreamReader sr   = new StreamReader(excludedSpectraFile);
            String       line = sr.ReadLine();

            while (line != null)
            {
                int excluded = int.Parse(line);
                excludedSpectra.Add(excluded);
                line = sr.ReadLine();
            }

            foreach (Spectra sp in ls)
            {
                if (!excludedSpectra.Contains(sp.getScanNum()))
                {
                    includedSpectra.Add(sp.getScanNum());
                }
            }
            String outputCometFile = Path.Combine(InputFileOrganizer.OutputFolderOfTheRun, "realTestpartialOut.pep.xml"); //"C:\\Coding\\2019LavalleeLab\\GoldStandardData\\pepxml\\MS_QC_120min_partial.pep.xml";
            String fastaFile       = InputFileOrganizer.FASTA_FILE;                                                       //"C:\\Coding\\2019LavalleeLab\\GoldStandardData\\Database\\uniprot_SwissProt_Human_1_11_2017.fasta";

            PartialPepXMLWriter.writePartialPepXMLFile(fullCometFile, includedSpectra,
                                                       outputCometFile, mzml, fastaFile, outputCometFile);

            String partialProt = PostProcessingScripts.ProteinProphetSearch(outputCometFile, InputFileOrganizer.OutputFolderOfTheRun, true);
            ProteinProphetResult partialPpr = ProteinProphetEvaluator.getProteinProphetResult(partialProt);

            double partialNum = partialPpr.getNum_proteins_identified();
            double totalNum   = baseLinePpr.getNum_proteins_identified();
            double idSens     = partialNum / totalNum * 100.0;

            double includedScanNum = includedSpectra.Count;
            double totalScanNum    = ls.Count;
            double usedResource    = includedScanNum / totalScanNum * 100;
            String line1           = String.Format("includedScans {0} \t totalScanNum {1} \tUsedResources {2}", includedScanNum, totalScanNum, usedResource);
            String line2           = String.Format("partialNum {0} \t totalNum {1} \tidsens {2}", partialNum, totalNum, idSens);

            Console.WriteLine(line1);
            Console.WriteLine(line2);
            WriterClass.writeln(line1);
            WriterClass.writeln(line2);
            WriterClass.CloseWriter();
        }
Exemplo n.º 7
0
        private static void writeFeatures(List <IdentificationFeatures> idf)
        {
            log.Info("Classifying positive and negative sets");
            // Extract which proteins were confidently identified at 0.01 FDR with protein prophet
            List <String> identifiedProteins = ProteinProphetEvaluator.extractIdentifiedProteinNames(InputFileOrganizer.OriginalProtXMLFile);
            // Extract which proteins were not confidently identified, with a specified FDR
            // threshold
            List <String> negativeTrainingSetProteins = ProteinProphetEvaluator.extractNegativeTrainingSetProteinNames(InputFileOrganizer.OriginalProtXMLFile, 0.25);
            // 2019-05-23 FOUND IT! Here is where we filter the negative training set with
            // above 20% FDR

            // Proteins identified with a 0.01 FDR with protein prophet
            List <IdentificationFeatures> positiveTrainingSet = new List <IdentificationFeatures>();
            // Proteins not identified with a 0.01 FDR protein prophet
            List <IdentificationFeatures> negativeTrainingSet = new List <IdentificationFeatures>();

            List <IdentificationFeatures> nonPositiveTrainingSet = new List <IdentificationFeatures>();


            // Determine which features are in positive or negative training set
            foreach (IdentificationFeatures i in idf)
            {
                String accession = i.getAccession();
                if (i.getCardinality() > 0)
                {
                    if (!accession.StartsWith(GlobalVar.DecoyPrefix))
                    {
                        //if this is a real protein
                        if (identifiedProteins.Contains(accession))
                        {
                            positiveTrainingSet.Add(i);
                        }
                        else
                        {
                            nonPositiveTrainingSet.Add(i);
                        }

                        if (negativeTrainingSetProteins.Contains(accession))
                        {
                            negativeTrainingSet.Add(i);
                        }
                    }
                    else
                    {
                        //if it's a decoy protein
                        negativeTrainingSet.Add(i);
                        nonPositiveTrainingSet.Add(i);
                    }
                }
            }
            WriteIdentificationFeaturesFile(OutputFile_PositiveAndNegative, positiveTrainingSet, negativeTrainingSet);
            WriteIdentificationFeaturesFile(OutputFile_PositiveAndNonPositive, positiveTrainingSet, nonPositiveTrainingSet);

            List <IdentificationFeatures> positiveSetNoDecoy    = new List <IdentificationFeatures>();
            List <IdentificationFeatures> nonPositiveSetNoDecoy = new List <IdentificationFeatures>();

            foreach (IdentificationFeatures i in positiveTrainingSet)
            {
                if (!i.getAccession().Contains(GlobalVar.DecoyPrefix))
                {
                    positiveSetNoDecoy.Add(i);
                }
            }
            foreach (IdentificationFeatures i in nonPositiveTrainingSet)
            {
                if (!i.getAccession().Contains(GlobalVar.DecoyPrefix))
                {
                    nonPositiveSetNoDecoy.Add(i);
                }
            }

            WriteIdentificationFeaturesFile(OutputFile_PositiveAndNonPositive_NoDecoy, positiveSetNoDecoy, nonPositiveSetNoDecoy);
        }