Ejemplo n.º 1
0
        public static IDataView LoadData(MLContext mlContext, String path)
        {
            DataTable dt       = IdentificationFeatureExtractionUtil.loadDataTable(path);
            IDataView dataView = IdentificationFeatureExtractionUtil.transformFeatures(dt, true);

            //TrainTestData splitDataView = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);
            return(dataView);
        }
Ejemplo n.º 2
0
        //static string mzmlFileBaseName="MS_QC_240min";
        public static void ExtractFeatures(String ms2File, out String extractedFeatureSavedFile_posAndNeg, out String extractedFeatureSavedFile_posAndNonPos)
        {
            Console.WriteLine("Extracting features from {0}", ms2File);

            InputFileOrganizer.MS2SimulationTestFile = ms2File;
            //InputFileOrganizer.MZMLSimulationTestFile = mzmlFile;
            String ms2FileBaseName = Path.GetFileNameWithoutExtension(ms2File);

            OutputFile_PositiveAndNegative            = Path.Combine(InputFileOrganizer.OutputFolderOfTheRun, ms2FileBaseName + "_extractedFeatures_PositiveAndNegative.tsv");
            OutputFile_PositiveAndNonPositive         = Path.Combine(InputFileOrganizer.OutputFolderOfTheRun, ms2FileBaseName + "_extractedFeatures_positiveAndNonPositive.tsv");
            OutputFile_PositiveAndNonPositive_NoDecoy = Path.Combine(InputFileOrganizer.OutputFolderOfTheRun, ms2FileBaseName + "_extractedFeatures_positiveAndNonPositive_NoDecoy.tsv");

            //the current feature extraction will include decoy proteins in the database and testing set
            SimulationWithDecoyParamsSetUp();

            //placeholder values, dont matter
            GlobalVar.ppmTolerance            = 1;
            GlobalVar.retentionTimeWindowSize = 1;
            GlobalVar.AccordThreshold         = 1;
            GlobalVar.XCorr_Threshold         = 1;
            GlobalVar.NumDBThreshold          = 1;
            //

            log.Info("Running No Exclusion Simulation");
            ExclusionProfile exclusionProfile = ExclusionExplorer.SingleSimulationRun(ExclusionProfileEnum.NO_EXCLUSION_PROFILE);

            log.Info("Extracting identification feature from exclusion profile");
            List <IdentificationFeatures> idf = exclusionProfile.getFeatures();

            log.Info("Recalibrating stDev");
            idf = IdentificationFeatureExtractionUtil.recalibrateStDev(idf);

            writeFeatures(idf);
            extractedFeatureSavedFile_posAndNeg    = OutputFile_PositiveAndNegative;
            extractedFeatureSavedFile_posAndNonPos = OutputFile_PositiveAndNonPositive;
            Console.WriteLine("Extracted Feature written to {0} and {1}", OutputFile_PositiveAndNegative, OutputFile_PositiveAndNonPositive);
        }
Ejemplo n.º 3
0
        protected void evaluateIdentification(IDs id)
        {
            // check if the peptide is identified or not
            if (id == null)
            {
                performanceEvaluator.countMS2UnidentifiedAnalyzed();
                return;
            }

            Peptide pep = getPeptideFromIdentification(id);             // id is null, it already returned

            // add decoy or non-existent protein connections
            // database.AddProteinFromIdentification(pep, id.getParentProteinAccessions());

            Double xCorr = id.getXCorr();
            Double dCN   = id.getDeltaCN();

            pep.addScore(xCorr, dCN);
#if (!DONTEVALUATE)
            performanceEvaluator.evaluateAnalysis(exclusionList, pep);
#endif

            // exclude this peptide for analysis if the xCorr score is above a threshold
            const double XCORR_THRESHOLD = 2.5;
            // add the peptide to the exclusion list if it is over the xCorr threshold
            if ((xCorr > XCORR_THRESHOLD))
            {
                performanceEvaluator.countPeptidesExcluded();
                log.Debug("xCorrThreshold passed. Peptide added to the exclusion list.");
                exclusionList.addPeptide(pep);
                // calibrates our retention time alignment if the observed time is different
                // from the predicted only if it passes this threshold
                calibrateRetentionTime(pep);
            }

            // Add all the peptides corresponding to the parent protein, if the parent
            // protein is deemed confidently identified by the logisitc regression
            // classifier
            Dictionary <String, Boolean> identificationPredictions = IdentificationFeatureExtractionUtil
                                                                     .assessProteinIdentificationConfidence(pep.getProteins(), lrAccord);

            List <Protein> proteinsToExclude = new List <Protein>();
            foreach (Protein parentProtein in pep.getProteins())
            {
                // prevents repeated exclusion of a protein already excluded
                if ((!parentProtein.IsExcluded()))
                {
                    // determine if parent protein is confidently identified
                    bool isConfidentlyIdentified = identificationPredictions[parentProtein.getAccession()];
                    if (isConfidentlyIdentified)
                    {
                        // exclude all peptides of that protein
                        parentProtein.setExcluded(true);
                        log.Debug("Parent protein " + parentProtein.getAccession() + " is identified confidently "
                                  + parentProtein.getNumDB() + " times!");
                        performanceEvaluator.countProteinsExcluded();
                        proteinsToExclude.Add(parentProtein);
                    }
                }
            }
            exclusionList.addProteins(proteinsToExclude);
        }
        protected void evaluateIdentification(IDs id)
        {
            // check if the peptide is identified or not
            if (id == null)
            {
                performanceEvaluator.countMS2UnidentifiedAnalyzed();
                return;
            }

            Peptide pep = getPeptideFromIdentification(id); // id is null, it already returned

            //log.Info("Peptide Observed Time: {0}\tPredicted Time: {1} -----------------", id.getScanTime(),pep.getRetentionTime().getRetentionTimeStart());


            // add decoy or non-existent protein connections
            // database.AddProteinFromIdentification(pep, id.getParentProteinAccessions());

            Double xCorr = id.getXCorr();
            double dCN   = id.getDeltaCN();

            pep.addScore(xCorr, dCN);
#if (!DONTEVALUATE)
            performanceEvaluator.evaluateAnalysis(exclusionList, pep);
#endif

            //RetentionTime rt = pep.getRetentionTime();
            //if (!rtCalcPredictedRT.Keys.Contains(pep.getSequence()))
            //{
            //	rtCalcPredictedRT.Add(pep.getSequence(), rt.getRetentionTimePeak());
            //}
            //double[] values = new double[] { id.getScanTime(), id.getXCorr(), rt.getRetentionTimePeak(), rt.getRetentionTimeStart() + GlobalVar.retentionTimeWindowSize, RetentionTime.getRetentionTimeOffset(), rtCalcPredictedRT[pep.getSequence()], rt.IsPredicted() ? 1 : 0 };

            // exclude this peptide for analysis if the xCorr score is above a threshold
            const double XCORR_THRESHOLD = 2.5;
            // add the peptide to the exclusion list if it is over the xCorr threshold
            if ((xCorr > XCORR_THRESHOLD))
            {
                performanceEvaluator.countPeptidesExcluded();
                log.Debug("xCorrThreshold passed. Peptide added to the exclusion list.");
                exclusionList.addPeptide(pep);
                // calibrates our retention time alignment if the observed time is different
                // from the predicted only if it passes this threshold
                calibrateRetentionTime(pep);
            }

            // Add all the peptides corresponding to the parent protein, if the parent
            // protein is deemed confidently identified by the logisitc regression
            // classifier
            Dictionary <String, Boolean> identificationPredictions = IdentificationFeatureExtractionUtil
                                                                     .assessProteinIdentificationConfidence(pep.getProteins(), lrAccord);

            List <Protein> proteinsToExclude = new List <Protein>();
            foreach (Protein parentProtein in pep.getProteins())
            {
                // prevents repeated exclusion of a protein already excluded
                if ((!parentProtein.IsExcluded()))
                {
                    // determine if parent protein is confidently identified
                    bool isConfidentlyIdentified = identificationPredictions[parentProtein.getAccession()];
                    if (isConfidentlyIdentified)
                    {
                        // exclude all peptides of that protein
#if TRACKEXCLUDEDPROTEINFEATURE
                        excludedProteinFeatureList.Add(parentProtein.vectorize().ItemArray);
#endif
                        parentProtein.setExcluded(true);
                        log.Debug("Parent protein " + parentProtein.getAccession() + " is identified confidently "
                                  + parentProtein.getNumDB() + " times!");
                        performanceEvaluator.countProteinsExcluded();
                        proteinsToExclude.Add(parentProtein);
                    }
                }
            }
            exclusionList.addProteins(proteinsToExclude);
        }
Ejemplo n.º 5
0
 public DataRow vectorize()
 {
     return(IdentificationFeatureExtractionUtil.extractFeatureVector(accession, peptideScores));
 }
Ejemplo n.º 6
0
 public IdentificationFeatures extractFeatures()
 {
     return(IdentificationFeatureExtractionUtil.extractFeatures(accession, peptideScores));
 }