示例#1
0
        public static DataRow extractFeatureVector(String accession, List <PeptideScore> peptideScores)
        {
            IdentificationFeatures idf    = extractFeatures(accession, peptideScores);
            int    cardinality            = idf.getCardinality();
            Double highestConfidenceScore = idf.getHighestConfidenceScore();
            Double meanConfidenceScore    = idf.getMeanConfidenceScore();
            Double medianConfidenceScore  = idf.getMedianConfidenceScore();
            Double highestDCN             = idf.getHighestDCN();
            Double meanDCN   = idf.getMeanDCN();
            Double medianDCN = idf.getMedianDCN();

                        #if STDEVINCLUDED
            Double stdevConfidenceScore = idf.getStdevConfidenceScore();
            if (cardinality == 0 || cardinality == 1)
            {
                stdevConfidenceScore = DEFAULT_STDEV_MAX;
                //stdevConfidenceScore = max_stdev;
                //if ((int)Math.Round(stdevConfidenceScore) == 0)
                //{
                //    stdevConfidenceScore = DEFAULT_STDEV_MAX;
                //}
            }
            DataRow r = CreateRow(accession, cardinality, highestConfidenceScore, meanConfidenceScore,
                                  medianConfidenceScore, highestDCN, meanDCN, medianDCN,
                                  stdevConfidenceScore);
#else
            DataRow r = CreateRow(accession, cardinality, highestConfidenceScore, meanConfidenceScore,
                                  medianConfidenceScore, highestDCN, meanDCN, medianDCN);
#endif

            return(r);
        }
示例#2
0
        /*
         * Write the identification features used for training the logistic regression
         * classifier
         */
        public static void WriteIdentificationFeaturesFile(String file_path,
                                                           List <IdentificationFeatures> positiveTrainingSet,
                                                           List <IdentificationFeatures> negativeTrainingSet)
        {
            log.Debug("Writing Identification Features to a file...");
            try
            {
                StreamWriter writer = new StreamWriter(file_path);
                log.Debug("File name: " + file_path);

                // Write header TODO remove
                String header = "label," + IdentificationFeatures.getHeader();
                writer.Write(header);

                // in the first column, 1 indicates positive training set
                foreach (IdentificationFeatures i in positiveTrainingSet)
                {
                    writer.Write("\n" + "1," + i.writeToFile());
                    writer.Flush();
                }
                // in the first column, 0 indicates negative training set
                foreach (IdentificationFeatures i in negativeTrainingSet)
                {
                    writer.Write("\n" + "0," + i.writeToFile());
                    writer.Flush();
                }
                writer.Flush();
                writer.Close();
            }
            catch (Exception e)
            {
                Console.WriteLine(e.ToString());
                log.Error("Writing file unsuccessful!!!");
                Console.ReadKey();
                Environment.Exit(0);
            }
            log.Debug("Writing file successful.");
        }
示例#3
0
        public static IdentificationFeatures extractFeatures(String accession, List <PeptideScore> peptideScores)
        {
            int cardinality = peptideScores.Count;

            if (cardinality < 1)
            {
                return(new IdentificationFeatures(accession, cardinality, 0, 0, 0, 0, 0, 0, 0));
            }


            //TODO should this be number of unique peptides or number of peptide scores??
            Double highestConfidenceScore = Double.MinValue;
            Double meanConfidenceScore;
            Double medianConfidenceScore;
            Double stdevConfidenceScore;

            Double highestDCN = Double.MinValue;
            Double meanDCN;
            Double medianDCN;


            double[] scores  = new double[cardinality];
            double[] dCNList = new double[cardinality];
            for (int i = 0; i < peptideScores.Count; i++)
            {
                PeptideScore s = peptideScores[i];
                Double       confidenceScore = s.getXCorr();
                Double       dCN             = s.getdCN();
                scores[i]  = confidenceScore;
                dCNList[i] = dCN;
                // calculate highest confidence score
                if (confidenceScore > highestConfidenceScore)
                {
                    highestConfidenceScore = confidenceScore;
                }
                if (dCN > highestDCN)
                {
                    highestDCN = dCN;
                }
            }


            // calculate mean
            meanConfidenceScore = scores.Average();
            meanDCN             = dCNList.Average();

            // calculate median
            medianConfidenceScore = CalculateMedian(scores);
            medianDCN             = CalculateMedian(dCNList);
            // calculate stdev
            // bias correction set to true
            // that means the stdev formula uses "N-1" as the denominator, I believe.
            // this helps to estimate the variance more accurately for a small N

            stdevConfidenceScore = CalculateStdDev(scores);

            IdentificationFeatures f = new IdentificationFeatures(accession, cardinality, highestConfidenceScore, meanConfidenceScore, medianConfidenceScore,
                                                                  highestDCN, meanDCN, medianDCN, stdevConfidenceScore);

            return(f);
        }