public static ProteinProphetResult postProcessing(ExclusionProfile exclusionProfile, String experimentName,
                                                          Boolean keepResults)
        {
            String partialCometFileOutputFolder = Path.Combine(InputFileOrganizer.OutputFolderOfTheRun, "PartialCometFile");

            if (!Directory.Exists(partialCometFileOutputFolder))
            {
                Directory.CreateDirectory(partialCometFileOutputFolder);
            }
            String outputCometFile = Path.Combine(partialCometFileOutputFolder,
                                                  experimentName + "_partial" + InputFileOrganizer.PepXMLSuffix);

            PartialPepXMLWriter.writePartialPepXMLFile(InputFileOrganizer.OriginalCometOutput, exclusionProfile.getSpectraUsed(),
                                                       outputCometFile, InputFileOrganizer.MS2SimulationTestFile, InputFileOrganizer.FASTA_FILE, outputCometFile); //TODO was using MZML instead of MS2

            ProteinProphetResult ppr = RunProteinProphet(outputCometFile, InputFileOrganizer.OutputFolderOfTheRun, keepResults);

            //PostProcessingScripts.deleteFile(outputCometFile);
            // delete these files if this flag is false
            //if (!keepResults)
            //{

            //}

            return(ppr);
        }
Beispiel #2
0
        public static void StartProcessing(ExclusionProfile exclusionProfile)
        {
            //log.Debug("Loading logistic regression model and creating exclusion profile");
            //exclusionProfile = new MachineLearningGuidedExclusion(InputFileOrganizer.logisticRegressionClassifierSaveFile, ExclusionExplorer.database, GlobalVar.ppmTolerance, GlobalVar.retentionTimeWindowSize);
            //exclusionProfile = new NoraExclusion(database,GlobalVar.XCorr_Threshold, GlobalVar.ppmTolerance, GlobalVar.NumDBThreshold, GlobalVar.retentionTimeWindowSize);
            //exclusionProfile = new RandomExclusion(InputFileOrganizer.logisticRegressionClassifierSaveFile, database, GlobalVar.ppmTolerance, GlobalVar.retentionTimeWindowSize);

            log.Debug("Initiating up DataProcessor Variables");
            reset();

            //Console.ReadKey();

            preExperimentSetupFinished = true;
            while (running || taskCounter > 0) //!parsedSpectra.IsEmpty
            {
                //Database searches any scan in processedScans FIFO queue as long as it's not empty

                Spectra processedSpectra;
                if (parsedSpectra.TryDequeue(out processedSpectra))
                {
                    exclusionProfile.evaluate(processedSpectra);
                    double[] scanArrivalProcessedTime = { processedSpectra.getScanNum(), processedSpectra.getArrivalTime(), getCurrentMiliTime() };
                    scanArrivalAndProcessedTimeList.Add(scanArrivalProcessedTime);
                    Interlocked.Decrement(ref taskCounter);
                }
            }
            log.Info("DataProcessor finished, processed {0} scans", scanIDCounter);
        }
Beispiel #3
0
        internal void DoJob(ExclusionProfile exclusionProfile)
        {
            using (IExactiveInstrumentAccess instrument = Connection.GetFirstInstrument())
            {
                if (instrument == null)
                {
                    Console.WriteLine("Failed to create Instrument, program will now exit");
                    return;
                }

                IMsScanContainer orbitrap = instrument.GetMsScanContainer(0);
                //if (GlobalVar.SeeExclusionFormat)
                //{
                //    m_methods = instrument.Control.Methods;
                //    m_replacementTable = CreateReplacementTable();
                //}

                Console.WriteLine("Waiting 60 seconds for scans on detector " + orbitrap.DetectorClass + "...");
                DataProcessor.reset();
                Thread DataProcessingThread = new Thread(() => DataProcessor.StartProcessing(exclusionProfile));
                //Thread InputHandling = new Thread(() => InputHandler.ReadConsoleInput());
                DataProcessingThread.Start();
                //InputHandling.Start();
                while (!DataProcessor.SetupFinished())
                {
                    Thread.Sleep(500);
                }
                orbitrap.AcquisitionStreamOpening += Orbitrap_AcquisitionStreamOpening;
                orbitrap.AcquisitionStreamClosing += Orbitrap_AcquisitionStreamClosing;
                Console.WriteLine("Waiting on acquisition stream to open");
                while (!acquisitionStreamOpened && !Console.ReadKey().KeyChar.ToString().Equals("y"))
                {
                    Console.WriteLine("Waiting on acquisition stream to open");
                    Thread.CurrentThread.Join(1000);
                }
                Console.WriteLine("MSScan Arrive event listener added");
                orbitrap.MsScanArrived += Orbitrap_MsScanArrived;


                int durationCounter = 0;
                while (durationCounter < GlobalVar.listeningDuration && (ExclusionExplorer.IsListening()))
                {
                    Thread.CurrentThread.Join(1000); //does the same thing as Thread.Sleep() but Join
                                                     //allows standard sendmessage pumping and COM to continue
                    durationCounter++;
                }
                orbitrap.MsScanArrived            -= Orbitrap_MsScanArrived;
                orbitrap.AcquisitionStreamClosing -= Orbitrap_AcquisitionStreamClosing;
                orbitrap.AcquisitionStreamOpening -= Orbitrap_AcquisitionStreamOpening;

                DataProcessor.EndProcessing();
                DataProcessingThread.Join(); //wait until dataProcessor finishes processing/outputing the scan
                                             //queue then returns to Main thread;
            }
        }
Beispiel #4
0
        //writes a list of proteins added to the exclusion list
        private static void WriteExcludedProteinList(ExclusionProfile exclusionProfile)
        {
            List <string> excludedProteins = exclusionProfile.getDatabase().getExcludedProteins();
            String        outputFile       = Path.Combine(InputFileOrganizer.OutputFolderOfTheRun, "ExcludedProteinList.txt");
            StreamWriter  sw = new StreamWriter(outputFile);

            foreach (String prot in excludedProteins)
            {
                sw.WriteLine(prot);
            }
            sw.Close();
        }
        public static String DoJob(ProteinProphetResult ppr, ExclusionProfile exclusionProfile, int experimentNum)
        {
            String       outputFile = Path.Combine(InputFileOrganizer.OutputFolderOfTheRun, "NumberOfPeptidesPerIdentifiedProtein_" + experimentNum + ".txt");
            StreamWriter sw         = new StreamWriter(outputFile);

            sw.WriteLine("Accession\tNumberOfPeptides\tSpectralCount");

            List <String> confidentlyIdentifiedProts = ppr.getProteinsIdentified();

            foreach (String accession in confidentlyIdentifiedProts)
            {
                Protein          prot             = exclusionProfile.getDatabase().getProtein(accession);
                HashSet <String> peptidesObserved = new HashSet <String>();
                foreach (PeptideScore pepEvidence in prot.getPeptideScore())
                {
                    peptidesObserved.Add(pepEvidence.getPeptideSequence());
                }
                sw.WriteLine("{0}\t{1}\t{2}", accession, peptidesObserved.Count, prot.getPeptideScore().Count);
            }

            sw.Close();
            return(outputFile);
        }
Beispiel #6
0
        //static string mzmlFileBaseName="MS_QC_240min";
        public static void ExtractFeatures(String ms2File, out String extractedFeatureSavedFile_posAndNeg, out String extractedFeatureSavedFile_posAndNonPos)
        {
            Console.WriteLine("Extracting features from {0}", ms2File);

            InputFileOrganizer.MS2SimulationTestFile = ms2File;
            //InputFileOrganizer.MZMLSimulationTestFile = mzmlFile;
            String ms2FileBaseName = Path.GetFileNameWithoutExtension(ms2File);

            OutputFile_PositiveAndNegative            = Path.Combine(InputFileOrganizer.OutputFolderOfTheRun, ms2FileBaseName + "_extractedFeatures_PositiveAndNegative.tsv");
            OutputFile_PositiveAndNonPositive         = Path.Combine(InputFileOrganizer.OutputFolderOfTheRun, ms2FileBaseName + "_extractedFeatures_positiveAndNonPositive.tsv");
            OutputFile_PositiveAndNonPositive_NoDecoy = Path.Combine(InputFileOrganizer.OutputFolderOfTheRun, ms2FileBaseName + "_extractedFeatures_positiveAndNonPositive_NoDecoy.tsv");

            //the current feature extraction will include decoy proteins in the database and testing set
            SimulationWithDecoyParamsSetUp();

            //placeholder values, dont matter
            GlobalVar.ppmTolerance            = 1;
            GlobalVar.retentionTimeWindowSize = 1;
            GlobalVar.AccordThreshold         = 1;
            GlobalVar.XCorr_Threshold         = 1;
            GlobalVar.NumDBThreshold          = 1;
            //

            log.Info("Running No Exclusion Simulation");
            ExclusionProfile exclusionProfile = ExclusionExplorer.SingleSimulationRun(ExclusionProfileEnum.NO_EXCLUSION_PROFILE);

            log.Info("Extracting identification feature from exclusion profile");
            List <IdentificationFeatures> idf = exclusionProfile.getFeatures();

            log.Info("Recalibrating stDev");
            idf = IdentificationFeatureExtractionUtil.recalibrateStDev(idf);

            writeFeatures(idf);
            extractedFeatureSavedFile_posAndNeg    = OutputFile_PositiveAndNegative;
            extractedFeatureSavedFile_posAndNonPos = OutputFile_PositiveAndNonPositive;
            Console.WriteLine("Extracted Feature written to {0} and {1}", OutputFile_PositiveAndNegative, OutputFile_PositiveAndNonPositive);
        }
Beispiel #7
0
        public static ExclusionProfile SingleSimulationRun(ExclusionProfileEnum expType)
        {
            PreExperimentSetUp();
            int    experimentNumber = 1;
            double startTime        = getCurrentTime();

            //parameters:
            //GlobalVar.ppmTolerance = 5.0 / 1000000.0;
            //GlobalVar.retentionTimeWindowSize = 1.0;
            //GlobalVar.AccordThreshold = 0.5;
            //GlobalVar.XCorr_Threshold = 1.5;
            //GlobalVar.NumDBThreshold = 2;
            if (GlobalVar.isSimulationForFeatureExtraction == false)
            {
                GlobalVar.ppmTolerance            = GlobalVar.PPM_TOLERANCE_LIST[0];
                GlobalVar.retentionTimeWindowSize = GlobalVar.RETENTION_TIME_WINDOW_LIST[0];
                GlobalVar.AccordThreshold         = GlobalVar.LR_PROBABILITY_THRESHOLD_LIST[0];
                GlobalVar.XCorr_Threshold         = GlobalVar.XCORR_THRESHOLD_LIST[0];
                GlobalVar.NumDBThreshold          = GlobalVar.NUM_DB_THRESHOLD_LIST[0];
            }
            //random
            int numExcluded = 14826;
            int numAnalyzed = 22681;

            //end parameters

            ExclusionProfile exclusionProfile = null;

            switch (expType)
            {
            case ExclusionProfileEnum.NORA_EXCLUSION_PROFILE:
                exclusionProfile = new NoraExclusion(database, GlobalVar.XCorr_Threshold, GlobalVar.ppmTolerance, GlobalVar.NumDBThreshold, GlobalVar.retentionTimeWindowSize);
                break;

            case ExclusionProfileEnum.MACHINE_LEARNING_GUIDED_EXCLUSION_PROFILE:
                exclusionProfile = new MachineLearningGuidedExclusion(InputFileOrganizer.AccordNet_LogisticRegressionClassifier_WeightAndInterceptSavedFile, database, GlobalVar.ppmTolerance, GlobalVar.retentionTimeWindowSize);
                break;

            case ExclusionProfileEnum.RANDOM_EXCLUSION_PROFILE:

                exclusionProfile = new RandomExclusion_Fast(database, ms2SpectraList, numExcluded, numAnalyzed, 12);

                break;

            case ExclusionProfileEnum.NO_EXCLUSION_PROFILE:
                exclusionProfile = new NoExclusion(database, GlobalVar.retentionTimeWindowSize);
                break;

            case ExclusionProfileEnum.MLGE_SEQUENCE_EXCLUSION_PROFILE:
                exclusionProfile = new MLGESequenceExclusion(InputFileOrganizer.AccordNet_LogisticRegressionClassifier_WeightAndInterceptSavedFile, database, GlobalVar.ppmTolerance, GlobalVar.retentionTimeWindowSize);
                break;

            case ExclusionProfileEnum.NORA_SEQUENCE_EXCLUSION_PROFILE:
                exclusionProfile = new NoraSequenceExclusion(database, GlobalVar.XCorr_Threshold, GlobalVar.ppmTolerance, GlobalVar.NumDBThreshold, GlobalVar.retentionTimeWindowSize);
                break;

            case ExclusionProfileEnum.SVMEXCLUSION:
                exclusionProfile = new SVMExclusion(InputFileOrganizer.SVMSavedFile, database, GlobalVar.ppmTolerance, GlobalVar.retentionTimeWindowSize);
                break;
            }

            WriterClass.writeln(exclusionProfile.GetPerformanceEvaluator().getHeader());
            String     experimentName = "EXP_" + experimentNumber + GlobalVar.experimentName;
            Experiment experiment     = new Experiment(exclusionProfile, experimentName, 1, expType, startTime);

            new DataReceiverSimulation().DoJob(exclusionProfile, ms2SpectraList);
            double analysisTime = getCurrentTime() - startTime;

            //WriteScanArrivalProcessedTime(DataProcessor.scanArrivalAndProcessedTimeList);
            //WriteExcludedProteinList(exclusionProfile.getDatabase().getExcludedProteins());

#if IGNORE
            WriteScanArrivalProcessedTime(DataProcessor.spectraNotAdded);

            foreach (double[] ignoredSpectra in DataProcessor.spectraNotAdded)
            {
                int scanNum = ms2SpectraList[(int)ignoredSpectra[0] - 1].getScanNum();
                exclusionProfile.getSpectraUsed().Add(scanNum);
            }
#endif

            //if (expType == ExclusionProfileEnum.NO_EXCLUSION_PROFILE)
            //{
            //	List<ObservedPeptideRtTrackerObject> peptideIDRT = ((NoExclusion)exclusionProfile).peptideIDRT;

            //	//actual arrival time, xcorr, rtCalc predicted RT, corrected RT, offset
            //	WriterClass.writeln("pepSeq\tarrivalTime\txcorr\trtPeak\tcorrectedRT\toffset\trtCalcPredicted\tisPredicted1", writerClassOutputFile.peptideRTTime);
            //	foreach (ObservedPeptideRtTrackerObject observedPeptracker in peptideIDRT)
            //	{

            //		WriterClass.writeln(observedPeptracker.ToString(), writerClassOutputFile.peptideRTTime);
            //	}
            //}
            //if (expType == ExclusionProfileEnum.MACHINE_LEARNING_GUIDED_EXCLUSION_PROFILE)
            //{
            //	List<double[]> peptideIDRT = ((MachineLearningGuidedExclusion)exclusionProfile).peptideIDRT;

            //	//actual arrival time, xcorr, rtCalc predicted RT, corrected RT, offset
            //	WriterClass.writeln("arrivalTime\txcorr\trtPeak\tcorrectedRT\toffset\trtCalcPredicted\tisPredicted1", writerClassOutputFile.peptideRTTime);
            //	foreach(double[] id in peptideIDRT)
            //	{
            //		String str = "";
            //		foreach(double d in id)
            //		{
            //			str = str + "\t" + d;
            //		}
            //		str= str.Trim();
            //		WriterClass.writeln(str, writerClassOutputFile.peptideRTTime);
            //	}
            //}

#if TRACKEXCLUDEDPROTEINFEATURE
            if (expType == ExclusionProfileEnum.MACHINE_LEARNING_GUIDED_EXCLUSION_PROFILE)
            {
                List <object[]> excludedProteinFeatures = ((MachineLearningGuidedExclusion)exclusionProfile).excludedProteinFeatureList;
                WriterClass.writeln("Accession\tCardinality\tHighestXCorr\tMeanXCorr\tMedianXCorr\tStDev", writerClassOutputFile.ExcludedSpectraScanNum);
                foreach (object[] feature in excludedProteinFeatures)
                {
                    String featureStr = "";
                    foreach (object o in feature)
                    {
                        featureStr = featureStr + o.ToString() + "\t";
                    }
                    featureStr = featureStr.Trim();
                    WriterClass.writeln(featureStr, writerClassOutputFile.ExcludedSpectraScanNum);
                }
            }
#endif
            //WriteUnusedSpectra(exclusionProfile);
            //WriteUsedSpectra(exclusionProfile);
            PostExperimentProcessing(experiment);
            //WriteUnusedSpectra(exclusionProfile);

            return(exclusionProfile);
        }
        private void postProcessingCalculations(int ddaNum, ProteinProphetResult ppr, ExclusionProfile exclusionProfile)
        {
            BaselineComparison bc = baselineComparisonSet[ddaNum];
            List <String>      proteinsIdentifiedByNoExclusion = bc.getProteinsIdentifiedByNoExclusion();
            int totalResourcesNaiveExperiment        = bc.getTotalResourcesNaiveExperiment();
            int numProteinsIdentifiedNaiveExperiment = bc.getNumProteinsIdentifiedNaiveExperiment();

            // set proteins identified first
            setProteinsIdentified(ppr, proteinsIdentifiedByNoExclusion);

            int correctlyExcluded = (int)data[Header.EvaluateExclusion_FoundOnCurrentExclusionList]
                                    + (int)data[Header.EvaluateExclusion_FoundOnCurrentObservedExclusionList];
            int incorrectlyExcluded = (int)data[Header.EvaluateExclusion_NotFoundOnExclusionList];

            /*
             * found on past observed, found on past exclusion list, and found on future
             * exclusion list are not incorrect exclusions, they are retention time being
             * predicted incorrectly...
             */
            double ratioIncorrectlyExcludedOverCorrectlyExcluded = takeRatio(incorrectlyExcluded, correctlyExcluded);

            ChangeValue(Header.CorrectlyExcluded, correctlyExcluded);
            ChangeValue(Header.IncorrectlyExcluded, incorrectlyExcluded);
            ChangeValue(Header.RatioIncorrectlyExcludedOverCorrectlyExcluded, ratioIncorrectlyExcludedOverCorrectlyExcluded);

            // Resources saved in total # available MS2 - # ms2 used foreach analysis
            int    resourcesSaved        = totalResourcesNaiveExperiment - (int)data[Header.NumMS2Analyzed];
            double percentResourcesSaved = takeRatio(resourcesSaved, totalResourcesNaiveExperiment);
            double percentResourcesUsed  = 1 - percentResourcesSaved;

            ChangeValue(Header.PercentResourcesSaved, percentResourcesSaved);
            ChangeValue(Header.PercentResourcesUsed, percentResourcesUsed);

            /*-
             * Protein Identification Sensitivity = # proteins identified / # proteins identified in whole experiment
             * Protein Identification Fold Change = # proteins identified / # proteins identified by naive approach
             * Protein Identification Sensitivity Limited DDA = # proteins identified also identified in naive approach / proteins identified by naive approach
             */
            ChangeValue(Header.ProteinIdentificationSensitivity,
                        takeRatio((int)data[Header.NumProteinsIdentified], numProteinsIdentifiedOriginalExperiment));
            ChangeValue(Header.ProteinIdentificationFoldChange,
                        takeRatio((int)data[Header.NumProteinsIdentified], numProteinsIdentifiedNaiveExperiment));
            ChangeValue(Header.ProteinIdentificationSensitivityLimitedDDA,
                        takeRatio((int)data[Header.ProteinsIdentifiedInLimitedDDA], numProteinsIdentifiedNaiveExperiment));

            List <String> inProgramExcludedProteins = exclusionProfile.getDatabase().getExcludedProteins();
            int           proteinOverlap_inProgramExcluded_vs_NoExclusion = compareProteins(inProgramExcludedProteins, proteinsIdentifiedByNoExclusion);

            ChangeValue(Header.NumProteinOverlap_ExcludedProteinsAgainstNoExclusionProteins, proteinOverlap_inProgramExcluded_vs_NoExclusion);
            ChangeValue(Header.ProteinGroupsIdentified, ppr.getFilteredProteinGroups().Count);
        }
 public void finalizePerformanceEvaluator(String experimentName, String experimentType, double analysisTime,
                                          double totalRunTime, ExclusionList exclusionList, ProteinProphetResult ppr, int ddaNum, ExclusionProfile exclusionProfile)
 {
     setExperimentName(experimentName, experimentType);
     setExperimentDuration(analysisTime, totalRunTime);
     setExperimentParams();
     setExclusionList(exclusionList);
     postProcessingCalculations(ddaNum, ppr, exclusionProfile);
 }
Beispiel #10
0
 public Experiment(ExclusionProfile _exclusionProfile, String _experimentName, int _experimentNum, ExclusionProfileEnum _exType, double startTime) : this(_exclusionProfile, _experimentName, _experimentNum, _exType)
 {
     experimentStartTime = startTime;
 }
Beispiel #11
0
 public Experiment(ExclusionProfile _exclusionProfile, String _experimentName, int _experimentNum, ExclusionProfileEnum _exType) : this(_experimentName, _experimentNum, _exType)
 {
     exclusionProfile = _exclusionProfile;
 }