コード例 #1
0
        public List<ResultsGroup>[] run(OpenFileDialog FileLinks)
        {
            List<ResultsGroup>[] AllFinalResults = new List<ResultsGroup>[Convert.ToInt32(FileLinks.FileNames.Count())];
            Int32 Count = 0;
            //Each data file is treated separately, hence the for loop.
            foreach (String filename in FileLinks.FileNames)
            {
                //Get the Parameters.
                ParametersForm parameter = new ParametersForm();
                ParametersForm.ParameterSettings paradata = parameter.GetParameters();

                //Perform the First and second grouping and getting data for the features by the Grouping function.
                List<ResultsGroup> LRLR = new List<ResultsGroup>();
                LRLR = Groupings(filename, paradata);

                //##############Logistic Regression####################
                Features fe = new Features();
                //current features
                Feature featureData = fe.readFeature();
                //default features
                String defaultpath = Application.StartupPath + "\\FeatureDefault.fea";
                Feature defaultData = fe.readFeature(defaultpath);

                //Features that will be used
                Feature finalfeatureData = new Feature();

                //Here are the beta values in logistic regression.
                finalfeatureData.Initial = featureData.Initial * 0.5 + defaultData.Initial * 0.5;
                finalfeatureData.numChargeStates = featureData.numChargeStates * 0.5 + defaultData.numChargeStates * 0.5;
                finalfeatureData.ScanDensity = featureData.ScanDensity * 0.5 + defaultData.ScanDensity * 0.5;
                finalfeatureData.numModiStates = featureData.numModiStates * 0.5 + defaultData.numModiStates * 0.5;
                finalfeatureData.totalVolume = featureData.totalVolume * 0.5 + defaultData.totalVolume * 0.5;
                finalfeatureData.ExpectedA = featureData.ExpectedA * 0.5 + defaultData.ExpectedA * 0.5;
                finalfeatureData.CentroidScan = featureData.CentroidScan * 0.5 + defaultData.CentroidScan * 0.5;
                finalfeatureData.numOfScan = featureData.numOfScan * 0.5 + defaultData.numOfScan * 0.5;
                finalfeatureData.avgSigNoise = featureData.avgSigNoise * 0.5 + defaultData.avgSigNoise * 0.5;

                //Generate scores.
                SupervisedLearner sl = new SupervisedLearner();
                AllFinalResults[Count] = sl.Scorings(LRLR, finalfeatureData, paradata);
                Count++;
            }
            return AllFinalResults;
        }
コード例 #2
0
        private List<Peptide> genFalsePPMSD(String path)
        {
            Features fea = new Features();
            List<Peptide> PP = fea.readtablim(path);
            CompositionHypothesisTabbedForm comp = new CompositionHypothesisTabbedForm();
            String sequence = comp.GetSequenceFromCleavedPeptides(PP);
            List<int> forRandom = new List<int>
            {
               {0},{1},{2}
            };
            List<int> forlength = new List<int>
            {
               {4},{5},{6},{7},{8},{9},{10},{11}
            };

            List<Peptide> finalAns = new List<Peptide>();
            Int32 StartAA = 0;
            Int32 EndAA = 0;
            while (EndAA != sequence.Count())
            {
                forRandom.Shuffle();
                if (forRandom[0] == 1)
                {
                    //add in this fragment
                    Peptide Ans = new Peptide();
                    forlength.Shuffle();
                    Int32 length = forlength[0];
                    EndAA = StartAA + length;
                    if (EndAA > sequence.Count())
                        EndAA = sequence.Count();
                    String Fra = "";
                    Ans.StartAA = Convert.ToInt32(StartAA + 1);
                    Ans.EndAA = Convert.ToInt32(EndAA + 1);
                    for (int i = StartAA; i < EndAA; i++)
                    {
                        Fra = Fra + sequence[i];
                    }
                    StartAA = StartAA + length;
                    Ans.Selected = true;
                    Ans.PeptideIndex = 1;
                    Ans.Mass = getFragmentMass(Fra);
                    Ans.Charge = 0;
                    Ans.Modifications = "";
                    Ans.MissedCleavages = 0;
                    Ans.PreviousAA = "";
                    Ans.NextAA = "";
                    forRandom.Shuffle();
                    Ans.NumGlycosylations = Convert.ToInt32(forRandom[0]);
                    finalAns.Add(Ans);
                }
                else
                {
                    //Skip this fragment
                    forlength.Shuffle();
                    Int32 length = forlength[0];
                    EndAA = StartAA + length;
                    if (EndAA > sequence.Count())
                        EndAA = sequence.Count();
                    StartAA = StartAA + length;
                }
                if (EndAA == sequence.Count() && finalAns.Count() == 0)
                {
                    EndAA = 0;
                    StartAA = 0;
                }
            }
            return finalAns;
        }
コード例 #3
0
        //This class performs logistic regression from data obtained from the Groupings Function
        private Feature FitLogisticRegression(List<ResultsGroup> LRLR)
        {
            int numofMatches = 0;
            //now, put LRLR into a table of arrays so that the regression function can read it.
            Double[][] inputs = new Double[LRLR.Count][];
            Double[] output = new Double[LRLR.Count];
            for (int i = 0; i < LRLR.Count; i++)
            {
                inputs[i] = new Double[] { Convert.ToDouble(LRLR[i].NumChargeStates), Convert.ToDouble(LRLR[i].ScanDensity), Convert.ToDouble(LRLR[i].NumModiStates), Convert.ToDouble(LRLR[i].TotalVolume), Convert.ToDouble(LRLR[i].ExpectedA), Convert.ToDouble(LRLR[i].CentroidScan), Convert.ToDouble(LRLR[i].NumOfScan),  Convert.ToDouble(LRLR[i].AvgSigNoise) };
                output[i] = Convert.ToDouble(LRLR[i].Match);
                if (LRLR[i].Match == true)
                    numofMatches++;
            }

            if (numofMatches < 10)
            {
                Features FeaturesMenu = new Features();
                Feature defaultFeatures = FeaturesMenu.readFeature();
                MessageBox.Show("Warning: there are less than 10 matches. Currently Loaded Features will be used instead.");
                return defaultFeatures;
            }

            //Perform logistic regression to get the Parameters
            LogisticRegression regression = new LogisticRegression(inputs: 8);
            var results = new IterativeReweightedLeastSquares(regression);
            double delta = 0;
            do
            {
                // Perform an iteration
                delta = results.Run(inputs, output);

            } while (delta > 0.001);

            Feature answer = new Feature();
            //Here are the beta values in logistic regression.
            answer.Initial = regression.Coefficients[0];
            answer.numChargeStates = regression.Coefficients[1];
            answer.ScanDensity = regression.Coefficients[2];
            answer.numModiStates = regression.Coefficients[3];
            answer.totalVolume = regression.Coefficients[4];
            answer.ExpectedA = regression.Coefficients[5];
            answer.CentroidScan = regression.Coefficients[6];
            answer.numOfScan = regression.Coefficients[7];
            answer.avgSigNoise = regression.Coefficients[8];
            return answer;
        }
コード例 #4
0
        //This runs the linear regression and generate score for the grouping results
        public List<ResultsGroup> Scorings(List<ResultsGroup> LRLR, Feature featureData, ParametersForm.ParameterSettings paradata)
        {
            //Now, load current features from the software, if it doesn't exist, use default features.
            Features fea = new Features();
            Feature dfeatureData = fea.readFeature();
            String defaultpath = Application.StartupPath + "\\FeatureDefault.fea";
            Feature defaultData = fea.readFeature(defaultpath);
            Double initial = featureData.Initial * 0.9 + dfeatureData.Initial * 0.05 + defaultData.Initial * 0.05;
            Double bnumChargeStates = featureData.numChargeStates * 0.9 + dfeatureData.numChargeStates * 0.05 + defaultData.numChargeStates * 0.05;
            Double bScanDensity = featureData.ScanDensity * 0.9 + dfeatureData.ScanDensity * 0.05 + defaultData.ScanDensity * 0.05;
            Double bnumModiStates = featureData.numModiStates * 0.9 + dfeatureData.numModiStates * 0.05 + defaultData.numModiStates * 0.05;
            Double btotalVolume = featureData.totalVolume * 0.9 + dfeatureData.totalVolume * 0.05 + defaultData.totalVolume * 0.05;
            Double bExpectedA = featureData.ExpectedA * 0.9 + dfeatureData.totalVolume * 0.05 + defaultData.totalVolume * 0.05;
            Double bCentroid = featureData.CentroidScan * 0.9 + dfeatureData.CentroidScan * 0.05 + defaultData.CentroidScan * 0.05;
            Double bnumOfScan = featureData.numOfScan * 0.9 + dfeatureData.numOfScan * 0.05 + defaultData.numOfScan * 0.05;
            Double bavgSigNoise = featureData.avgSigNoise * 0.9 + dfeatureData.avgSigNoise * 0.05 + defaultData.avgSigNoise * 0.05;

            if (dfeatureData.Initial != defaultData.Initial)
            {
            //Here are the beta values in logistic regression. 0.75 is from default, 0.25 is from calculation.
                 initial = featureData.Initial * 0.7 + dfeatureData.Initial * 0.2 + defaultData.Initial * 0.1;
                 bnumChargeStates = featureData.numChargeStates * 0.7 + dfeatureData.numChargeStates * 0.2 + defaultData.numChargeStates * 0.1;
                 bScanDensity = featureData.ScanDensity * 0.7 + dfeatureData.ScanDensity * 0.2 + defaultData.ScanDensity * 0.1;
                 bnumModiStates = featureData.numModiStates * 0.7 + dfeatureData.numModiStates * 0.2 + defaultData.numModiStates * 0.1;
                 btotalVolume = featureData.totalVolume * 0.7 + dfeatureData.totalVolume * 0.2 + defaultData.totalVolume * 0.1;
                 bExpectedA = featureData.ExpectedA * 0.7 + dfeatureData.totalVolume * 0.2 + defaultData.totalVolume * 0.1;
                 bCentroid = featureData.CentroidScan * 0.7 + dfeatureData.CentroidScan * 0.2 + defaultData.CentroidScan * 0.1;
                 bnumOfScan = featureData.numOfScan * 0.7 + dfeatureData.numOfScan * 0.2 + defaultData.numOfScan * 0.1;
                 bavgSigNoise = featureData.avgSigNoise * 0.7 + dfeatureData.avgSigNoise * 0.2 + defaultData.avgSigNoise * 0.1;
            }

            Double e = Math.E;
            try
            {
                //Now calculate the scores for each of them.
                Double scoreInput = new Double();
                Double Score = new Double();
                for (int o = 0; o < LRLR.Count; o++)
                {
                    scoreInput = (initial + bnumChargeStates * Convert.ToDouble(LRLR[o].NumChargeStates) + bScanDensity * Convert.ToDouble(LRLR[o].ScanDensity) + bnumModiStates * Convert.ToDouble(LRLR[o].NumModiStates) + btotalVolume * Convert.ToDouble(LRLR[o].TotalVolume) + bExpectedA * Convert.ToDouble(LRLR[o].ExpectedA) + bCentroid * Convert.ToDouble(LRLR[o].CentroidScan) + bnumOfScan * Convert.ToDouble(LRLR[o].NumOfScan) + bavgSigNoise * Convert.ToDouble(LRLR[o].AvgSigNoise));
                    Double store = Math.Pow(e, (-1 * scoreInput));
                    Score = 1 / (1 + store);
                    if (Score >= 0.5)
                    {
                        store = Math.Pow(e, (-0.6 * scoreInput));
                        Score = (0.8512 / (1 + store)) + 0.1488;
                    }
                    else
                    {
                        store = Math.Pow(e, (-0.6 * scoreInput -0.3));
                        Score = 1 / (1 + store);
                    }

                    LRLR[o].Score = Score;
                }
                //Implement score threshold
                LRLR = LRLR.OrderByDescending(a => a.Score).ToList();

                if (LRLR[0].Score + LRLR[1].Score + LRLR[2].Score > 2.94)
                {
                    scoreInput = (initial + bnumChargeStates * Convert.ToDouble(LRLR[0].NumChargeStates) + bScanDensity * Convert.ToDouble(LRLR[0].ScanDensity) + bnumModiStates * Convert.ToDouble(LRLR[0].NumModiStates) + btotalVolume * Convert.ToDouble(LRLR[0].TotalVolume) + bExpectedA * Convert.ToDouble(LRLR[0].ExpectedA) + bCentroid * Convert.ToDouble(LRLR[0].CentroidScan) + bnumOfScan * Convert.ToDouble(LRLR[0].NumOfScan) + bavgSigNoise * Convert.ToDouble(LRLR[0].AvgSigNoise));
                    scoreInput = scoreInput + (initial + bnumChargeStates * Convert.ToDouble(LRLR[1].NumChargeStates) + bScanDensity * Convert.ToDouble(LRLR[1].ScanDensity) + bnumModiStates * Convert.ToDouble(LRLR[1].NumModiStates) + btotalVolume * Convert.ToDouble(LRLR[1].TotalVolume) + bExpectedA * Convert.ToDouble(LRLR[1].ExpectedA) + bCentroid * Convert.ToDouble(LRLR[1].CentroidScan) + bnumOfScan * Convert.ToDouble(LRLR[1].NumOfScan) + bavgSigNoise * Convert.ToDouble(LRLR[1].AvgSigNoise));
                    scoreInput = scoreInput + (initial + bnumChargeStates * Convert.ToDouble(LRLR[2].NumChargeStates) + bScanDensity * Convert.ToDouble(LRLR[2].ScanDensity) + bnumModiStates * Convert.ToDouble(LRLR[2].NumModiStates) + btotalVolume * Convert.ToDouble(LRLR[2].TotalVolume) + bExpectedA * Convert.ToDouble(LRLR[2].ExpectedA) + bCentroid * Convert.ToDouble(LRLR[2].CentroidScan) + bnumOfScan * Convert.ToDouble(LRLR[2].NumOfScan) + bavgSigNoise * Convert.ToDouble(LRLR[2].AvgSigNoise));
                    scoreInput = scoreInput / 3;
                    Double n = -2.9444389791664404600090274318879 / scoreInput;
                    for (int o = 0; o < LRLR.Count; o++)
                    {
                        if (LRLR[o].Score >= 0.57444251681)
                        {
                            scoreInput = (initial + bnumChargeStates * Convert.ToDouble(LRLR[o].NumChargeStates) + bScanDensity * Convert.ToDouble(LRLR[o].ScanDensity) + bnumModiStates * Convert.ToDouble(LRLR[o].NumModiStates) + btotalVolume * Convert.ToDouble(LRLR[o].TotalVolume) + bExpectedA * Convert.ToDouble(LRLR[o].ExpectedA) + bCentroid * Convert.ToDouble(LRLR[o].CentroidScan) + bnumOfScan * Convert.ToDouble(LRLR[o].NumOfScan) + bavgSigNoise * Convert.ToDouble(LRLR[o].AvgSigNoise));
                            Double store = Math.Pow(e, (n* scoreInput));
                            Score = (0.8512 / (1 + store)) + 0.1488;
                            LRLR[o].Score = Score;
                        }
                    }
                }

                Int32 scoreCutOff = LRLR.Count() + 1;
                for (int t = 0; t < LRLR.Count(); t++)
                {
                    if (LRLR[t].Score < paradata.MinScoreThreshold)
                    {
                        scoreCutOff = t;
                        break;
                    }
                }
                if (scoreCutOff != LRLR.Count() + 1)
                {
                    LRLR.RemoveRange(scoreCutOff, LRLR.Count() - scoreCutOff);
                }
            }
            catch
            {
                for (int o = 0; o < LRLR.Count; o++)
                {
                    LRLR[o].Score = 0;
                }
            }

            return LRLR;
        }
コード例 #5
0
        //This function draws the ROC curve by the TP and FP rates calculated from the score.
        private void scoreBasedGraph()
        {
            CompositionHypothesisTabbedForm comp = new CompositionHypothesisTabbedForm();

            Features FT = new Features();
            String currentpath = Application.StartupPath + "\\FeatureCurrent.fea";
            Feature Fea = FT.readFeature(currentpath);
            //Create the list of random composition hypotesis for testing FDR.
            //ObtainTrue Position data.
            this.drawGraph(AllFinalResults, "");

            //Finally populate the Resulting datagridview and the combobox1

            comboBox2.Invoke(new MethodInvoker(delegate
            {
                for (int i = 0; i < TF.Count; i++)
                {
                    comboBox2.Items.Add(TF[i].TableName);
                }
                comboBox2.SelectedIndex = 0;
            }));
            dataGridView2.Invoke(new MethodInvoker(delegate
            {
                dataGridView2.DataSource = TF[0];
            }));
        }
コード例 #6
0
        //This function draws the ROC curve by the TP and FP rates calculated from the score.
        private void scoreBasedGraph()
        {
            CompositionHypothesisTabbedForm comp = new CompositionHypothesisTabbedForm();

            Features FT = new Features();
            String currentpath = Application.StartupPath + "\\FeatureCurrent.fea";
            Feature Fea = FT.readFeature(currentpath);
            //Create the list of random composition hypotesis for testing FDR.
            //ObtainTrue Position data.
            this.drawGraph(ResultGroups, " ");

            //Checkbox1 is default features.####################################
            List<ResultsGroup>[] DefaultFeature = new List<ResultsGroup>[DeconData.FileNames.Count()];
            String path = Application.StartupPath + "\\FeatureDefault.fea";
            Feature DeFea = FT.readFeature(path);
            if (checkBox1.Checked == true)
            {
                SupervisedLearner SL = new SupervisedLearner();
                List<ResultsGroup>[] TrueDATADefault = SL.EvaluateFeature(DeconData, CompositionHypothesisList, DeFea);

                this.drawGraph(TrueDATADefault, " Default Features");
            }

            //################################################
            //Checkbox2 is unsupervised Learning. It is a bit different from supervised learning, so it is hard-coded here.
            UnsupervisedLearner unsupervisedLearner = new UnsupervisedLearner();
            if (checkBox2.Checked == true)
            {
                List<ResultsGroup>[] USLTrueDATA = unsupervisedLearner.evaluate(DeconData, Fea);
                //ROC curve needs match to perform, so we will use the match list from Supervised learning and apply them to USLDATA.
                for (int i = 0; i < DeconData.FileNames.Count(); i++)
                {
                    ResultGroups[i] = ResultGroups[i].OrderByDescending(a => a.DeconRow.MonoisotopicMassWeight).ToList();
                    USLTrueDATA[i] = USLTrueDATA[i].OrderByDescending(b => b.DeconRow.MonoisotopicMassWeight).ToList();
                    int USllasttruematch = 0;
                    for (int j = 0; j < ResultGroups[i].Count; j++)
                    {
                        if (ResultGroups[i][j].Match == true)
                        {
                            for (int k = USllasttruematch; k < USLTrueDATA[i].Count; k++)
                            {
                                if (USLTrueDATA[i][k].DeconRow.MonoisotopicMassWeight < ResultGroups[i][j].DeconRow.MonoisotopicMassWeight)
                                {
                                    USllasttruematch = k;
                                    break;
                                }
                                if (USLTrueDATA[i][k].DeconRow.MonoisotopicMassWeight == ResultGroups[i][j].DeconRow.MonoisotopicMassWeight)
                                {
                                    USLTrueDATA[i][k].Match = true;
                                    USLTrueDATA[i][k].PredictedComposition = ResultGroups[i][j].PredictedComposition;
                                    USllasttruematch = k + 1;
                                    break;
                                }
                                if (USLTrueDATA[i][k].DeconRow.MonoisotopicMassWeight > ResultGroups[i][j].DeconRow.MonoisotopicMassWeight)
                                {
                                    USLTrueDATA[i][k].Match = false;
                                }
                            }
                        }
                    }
                }

                //Now that both of the data got their matchs, draw the graph
                this.drawGraph(USLTrueDATA, " Unsupervised Learning");
            }
            //#############################unsupervised learning part ends#################

            //Finally populate the Resulting datagridview and the combobox1

            comboBox2.Invoke(new MethodInvoker(delegate
            {
                for (int i = 0; i < TF.Count; i++)
                {
                    comboBox2.Items.Add(TF[i].TableName);
                }
                comboBox2.SelectedIndex = 0;
            }));
            dataGridView2.Invoke(new MethodInvoker(delegate
            {
                dataGridView2.DataSource = TF[0];
            }));
        }