public List<ResultsGroup>[] run(OpenFileDialog FileLinks) { List<ResultsGroup>[] AllFinalResults = new List<ResultsGroup>[Convert.ToInt32(FileLinks.FileNames.Count())]; Int32 Count = 0; //Each data file is treated separately, hence the for loop. foreach (String filename in FileLinks.FileNames) { //Get the Parameters. ParametersForm parameter = new ParametersForm(); ParametersForm.ParameterSettings paradata = parameter.GetParameters(); //Perform the First and second grouping and getting data for the features by the Grouping function. List<ResultsGroup> LRLR = new List<ResultsGroup>(); LRLR = Groupings(filename, paradata); //##############Logistic Regression#################### Features fe = new Features(); //current features Feature featureData = fe.readFeature(); //default features String defaultpath = Application.StartupPath + "\\FeatureDefault.fea"; Feature defaultData = fe.readFeature(defaultpath); //Features that will be used Feature finalfeatureData = new Feature(); //Here are the beta values in logistic regression. finalfeatureData.Initial = featureData.Initial * 0.5 + defaultData.Initial * 0.5; finalfeatureData.numChargeStates = featureData.numChargeStates * 0.5 + defaultData.numChargeStates * 0.5; finalfeatureData.ScanDensity = featureData.ScanDensity * 0.5 + defaultData.ScanDensity * 0.5; finalfeatureData.numModiStates = featureData.numModiStates * 0.5 + defaultData.numModiStates * 0.5; finalfeatureData.totalVolume = featureData.totalVolume * 0.5 + defaultData.totalVolume * 0.5; finalfeatureData.ExpectedA = featureData.ExpectedA * 0.5 + defaultData.ExpectedA * 0.5; finalfeatureData.CentroidScan = featureData.CentroidScan * 0.5 + defaultData.CentroidScan * 0.5; finalfeatureData.numOfScan = featureData.numOfScan * 0.5 + defaultData.numOfScan * 0.5; finalfeatureData.avgSigNoise = featureData.avgSigNoise * 0.5 + defaultData.avgSigNoise * 0.5; //Generate scores. SupervisedLearner sl = new SupervisedLearner(); AllFinalResults[Count] = sl.Scorings(LRLR, finalfeatureData, paradata); Count++; } return AllFinalResults; }
private List<Peptide> genFalsePPMSD(String path) { Features fea = new Features(); List<Peptide> PP = fea.readtablim(path); CompositionHypothesisTabbedForm comp = new CompositionHypothesisTabbedForm(); String sequence = comp.GetSequenceFromCleavedPeptides(PP); List<int> forRandom = new List<int> { {0},{1},{2} }; List<int> forlength = new List<int> { {4},{5},{6},{7},{8},{9},{10},{11} }; List<Peptide> finalAns = new List<Peptide>(); Int32 StartAA = 0; Int32 EndAA = 0; while (EndAA != sequence.Count()) { forRandom.Shuffle(); if (forRandom[0] == 1) { //add in this fragment Peptide Ans = new Peptide(); forlength.Shuffle(); Int32 length = forlength[0]; EndAA = StartAA + length; if (EndAA > sequence.Count()) EndAA = sequence.Count(); String Fra = ""; Ans.StartAA = Convert.ToInt32(StartAA + 1); Ans.EndAA = Convert.ToInt32(EndAA + 1); for (int i = StartAA; i < EndAA; i++) { Fra = Fra + sequence[i]; } StartAA = StartAA + length; Ans.Selected = true; Ans.PeptideIndex = 1; Ans.Mass = getFragmentMass(Fra); Ans.Charge = 0; Ans.Modifications = ""; Ans.MissedCleavages = 0; Ans.PreviousAA = ""; Ans.NextAA = ""; forRandom.Shuffle(); Ans.NumGlycosylations = Convert.ToInt32(forRandom[0]); finalAns.Add(Ans); } else { //Skip this fragment forlength.Shuffle(); Int32 length = forlength[0]; EndAA = StartAA + length; if (EndAA > sequence.Count()) EndAA = sequence.Count(); StartAA = StartAA + length; } if (EndAA == sequence.Count() && finalAns.Count() == 0) { EndAA = 0; StartAA = 0; } } return finalAns; }
//This class performs logistic regression from data obtained from the Groupings Function private Feature FitLogisticRegression(List<ResultsGroup> LRLR) { int numofMatches = 0; //now, put LRLR into a table of arrays so that the regression function can read it. Double[][] inputs = new Double[LRLR.Count][]; Double[] output = new Double[LRLR.Count]; for (int i = 0; i < LRLR.Count; i++) { inputs[i] = new Double[] { Convert.ToDouble(LRLR[i].NumChargeStates), Convert.ToDouble(LRLR[i].ScanDensity), Convert.ToDouble(LRLR[i].NumModiStates), Convert.ToDouble(LRLR[i].TotalVolume), Convert.ToDouble(LRLR[i].ExpectedA), Convert.ToDouble(LRLR[i].CentroidScan), Convert.ToDouble(LRLR[i].NumOfScan), Convert.ToDouble(LRLR[i].AvgSigNoise) }; output[i] = Convert.ToDouble(LRLR[i].Match); if (LRLR[i].Match == true) numofMatches++; } if (numofMatches < 10) { Features FeaturesMenu = new Features(); Feature defaultFeatures = FeaturesMenu.readFeature(); MessageBox.Show("Warning: there are less than 10 matches. Currently Loaded Features will be used instead."); return defaultFeatures; } //Perform logistic regression to get the Parameters LogisticRegression regression = new LogisticRegression(inputs: 8); var results = new IterativeReweightedLeastSquares(regression); double delta = 0; do { // Perform an iteration delta = results.Run(inputs, output); } while (delta > 0.001); Feature answer = new Feature(); //Here are the beta values in logistic regression. answer.Initial = regression.Coefficients[0]; answer.numChargeStates = regression.Coefficients[1]; answer.ScanDensity = regression.Coefficients[2]; answer.numModiStates = regression.Coefficients[3]; answer.totalVolume = regression.Coefficients[4]; answer.ExpectedA = regression.Coefficients[5]; answer.CentroidScan = regression.Coefficients[6]; answer.numOfScan = regression.Coefficients[7]; answer.avgSigNoise = regression.Coefficients[8]; return answer; }
//This runs the linear regression and generate score for the grouping results public List<ResultsGroup> Scorings(List<ResultsGroup> LRLR, Feature featureData, ParametersForm.ParameterSettings paradata) { //Now, load current features from the software, if it doesn't exist, use default features. Features fea = new Features(); Feature dfeatureData = fea.readFeature(); String defaultpath = Application.StartupPath + "\\FeatureDefault.fea"; Feature defaultData = fea.readFeature(defaultpath); Double initial = featureData.Initial * 0.9 + dfeatureData.Initial * 0.05 + defaultData.Initial * 0.05; Double bnumChargeStates = featureData.numChargeStates * 0.9 + dfeatureData.numChargeStates * 0.05 + defaultData.numChargeStates * 0.05; Double bScanDensity = featureData.ScanDensity * 0.9 + dfeatureData.ScanDensity * 0.05 + defaultData.ScanDensity * 0.05; Double bnumModiStates = featureData.numModiStates * 0.9 + dfeatureData.numModiStates * 0.05 + defaultData.numModiStates * 0.05; Double btotalVolume = featureData.totalVolume * 0.9 + dfeatureData.totalVolume * 0.05 + defaultData.totalVolume * 0.05; Double bExpectedA = featureData.ExpectedA * 0.9 + dfeatureData.totalVolume * 0.05 + defaultData.totalVolume * 0.05; Double bCentroid = featureData.CentroidScan * 0.9 + dfeatureData.CentroidScan * 0.05 + defaultData.CentroidScan * 0.05; Double bnumOfScan = featureData.numOfScan * 0.9 + dfeatureData.numOfScan * 0.05 + defaultData.numOfScan * 0.05; Double bavgSigNoise = featureData.avgSigNoise * 0.9 + dfeatureData.avgSigNoise * 0.05 + defaultData.avgSigNoise * 0.05; if (dfeatureData.Initial != defaultData.Initial) { //Here are the beta values in logistic regression. 0.75 is from default, 0.25 is from calculation. initial = featureData.Initial * 0.7 + dfeatureData.Initial * 0.2 + defaultData.Initial * 0.1; bnumChargeStates = featureData.numChargeStates * 0.7 + dfeatureData.numChargeStates * 0.2 + defaultData.numChargeStates * 0.1; bScanDensity = featureData.ScanDensity * 0.7 + dfeatureData.ScanDensity * 0.2 + defaultData.ScanDensity * 0.1; bnumModiStates = featureData.numModiStates * 0.7 + dfeatureData.numModiStates * 0.2 + defaultData.numModiStates * 0.1; btotalVolume = featureData.totalVolume * 0.7 + dfeatureData.totalVolume * 0.2 + defaultData.totalVolume * 0.1; bExpectedA = featureData.ExpectedA * 0.7 + dfeatureData.totalVolume * 0.2 + defaultData.totalVolume * 0.1; bCentroid = featureData.CentroidScan * 0.7 + dfeatureData.CentroidScan * 0.2 + defaultData.CentroidScan * 0.1; bnumOfScan = featureData.numOfScan * 0.7 + dfeatureData.numOfScan * 0.2 + defaultData.numOfScan * 0.1; bavgSigNoise = featureData.avgSigNoise * 0.7 + dfeatureData.avgSigNoise * 0.2 + defaultData.avgSigNoise * 0.1; } Double e = Math.E; try { //Now calculate the scores for each of them. Double scoreInput = new Double(); Double Score = new Double(); for (int o = 0; o < LRLR.Count; o++) { scoreInput = (initial + bnumChargeStates * Convert.ToDouble(LRLR[o].NumChargeStates) + bScanDensity * Convert.ToDouble(LRLR[o].ScanDensity) + bnumModiStates * Convert.ToDouble(LRLR[o].NumModiStates) + btotalVolume * Convert.ToDouble(LRLR[o].TotalVolume) + bExpectedA * Convert.ToDouble(LRLR[o].ExpectedA) + bCentroid * Convert.ToDouble(LRLR[o].CentroidScan) + bnumOfScan * Convert.ToDouble(LRLR[o].NumOfScan) + bavgSigNoise * Convert.ToDouble(LRLR[o].AvgSigNoise)); Double store = Math.Pow(e, (-1 * scoreInput)); Score = 1 / (1 + store); if (Score >= 0.5) { store = Math.Pow(e, (-0.6 * scoreInput)); Score = (0.8512 / (1 + store)) + 0.1488; } else { store = Math.Pow(e, (-0.6 * scoreInput -0.3)); Score = 1 / (1 + store); } LRLR[o].Score = Score; } //Implement score threshold LRLR = LRLR.OrderByDescending(a => a.Score).ToList(); if (LRLR[0].Score + LRLR[1].Score + LRLR[2].Score > 2.94) { scoreInput = (initial + bnumChargeStates * Convert.ToDouble(LRLR[0].NumChargeStates) + bScanDensity * Convert.ToDouble(LRLR[0].ScanDensity) + bnumModiStates * Convert.ToDouble(LRLR[0].NumModiStates) + btotalVolume * Convert.ToDouble(LRLR[0].TotalVolume) + bExpectedA * Convert.ToDouble(LRLR[0].ExpectedA) + bCentroid * Convert.ToDouble(LRLR[0].CentroidScan) + bnumOfScan * Convert.ToDouble(LRLR[0].NumOfScan) + bavgSigNoise * Convert.ToDouble(LRLR[0].AvgSigNoise)); scoreInput = scoreInput + (initial + bnumChargeStates * Convert.ToDouble(LRLR[1].NumChargeStates) + bScanDensity * Convert.ToDouble(LRLR[1].ScanDensity) + bnumModiStates * Convert.ToDouble(LRLR[1].NumModiStates) + btotalVolume * Convert.ToDouble(LRLR[1].TotalVolume) + bExpectedA * Convert.ToDouble(LRLR[1].ExpectedA) + bCentroid * Convert.ToDouble(LRLR[1].CentroidScan) + bnumOfScan * Convert.ToDouble(LRLR[1].NumOfScan) + bavgSigNoise * Convert.ToDouble(LRLR[1].AvgSigNoise)); scoreInput = scoreInput + (initial + bnumChargeStates * Convert.ToDouble(LRLR[2].NumChargeStates) + bScanDensity * Convert.ToDouble(LRLR[2].ScanDensity) + bnumModiStates * Convert.ToDouble(LRLR[2].NumModiStates) + btotalVolume * Convert.ToDouble(LRLR[2].TotalVolume) + bExpectedA * Convert.ToDouble(LRLR[2].ExpectedA) + bCentroid * Convert.ToDouble(LRLR[2].CentroidScan) + bnumOfScan * Convert.ToDouble(LRLR[2].NumOfScan) + bavgSigNoise * Convert.ToDouble(LRLR[2].AvgSigNoise)); scoreInput = scoreInput / 3; Double n = -2.9444389791664404600090274318879 / scoreInput; for (int o = 0; o < LRLR.Count; o++) { if (LRLR[o].Score >= 0.57444251681) { scoreInput = (initial + bnumChargeStates * Convert.ToDouble(LRLR[o].NumChargeStates) + bScanDensity * Convert.ToDouble(LRLR[o].ScanDensity) + bnumModiStates * Convert.ToDouble(LRLR[o].NumModiStates) + btotalVolume * Convert.ToDouble(LRLR[o].TotalVolume) + bExpectedA * Convert.ToDouble(LRLR[o].ExpectedA) + bCentroid * Convert.ToDouble(LRLR[o].CentroidScan) + bnumOfScan * Convert.ToDouble(LRLR[o].NumOfScan) + bavgSigNoise * Convert.ToDouble(LRLR[o].AvgSigNoise)); Double store = Math.Pow(e, (n* scoreInput)); Score = (0.8512 / (1 + store)) + 0.1488; LRLR[o].Score = Score; } } } Int32 scoreCutOff = LRLR.Count() + 1; for (int t = 0; t < LRLR.Count(); t++) { if (LRLR[t].Score < paradata.MinScoreThreshold) { scoreCutOff = t; break; } } if (scoreCutOff != LRLR.Count() + 1) { LRLR.RemoveRange(scoreCutOff, LRLR.Count() - scoreCutOff); } } catch { for (int o = 0; o < LRLR.Count; o++) { LRLR[o].Score = 0; } } return LRLR; }
//This function draws the ROC curve by the TP and FP rates calculated from the score. private void scoreBasedGraph() { CompositionHypothesisTabbedForm comp = new CompositionHypothesisTabbedForm(); Features FT = new Features(); String currentpath = Application.StartupPath + "\\FeatureCurrent.fea"; Feature Fea = FT.readFeature(currentpath); //Create the list of random composition hypotesis for testing FDR. //ObtainTrue Position data. this.drawGraph(AllFinalResults, ""); //Finally populate the Resulting datagridview and the combobox1 comboBox2.Invoke(new MethodInvoker(delegate { for (int i = 0; i < TF.Count; i++) { comboBox2.Items.Add(TF[i].TableName); } comboBox2.SelectedIndex = 0; })); dataGridView2.Invoke(new MethodInvoker(delegate { dataGridView2.DataSource = TF[0]; })); }
//This function draws the ROC curve by the TP and FP rates calculated from the score. private void scoreBasedGraph() { CompositionHypothesisTabbedForm comp = new CompositionHypothesisTabbedForm(); Features FT = new Features(); String currentpath = Application.StartupPath + "\\FeatureCurrent.fea"; Feature Fea = FT.readFeature(currentpath); //Create the list of random composition hypotesis for testing FDR. //ObtainTrue Position data. this.drawGraph(ResultGroups, " "); //Checkbox1 is default features.#################################### List<ResultsGroup>[] DefaultFeature = new List<ResultsGroup>[DeconData.FileNames.Count()]; String path = Application.StartupPath + "\\FeatureDefault.fea"; Feature DeFea = FT.readFeature(path); if (checkBox1.Checked == true) { SupervisedLearner SL = new SupervisedLearner(); List<ResultsGroup>[] TrueDATADefault = SL.EvaluateFeature(DeconData, CompositionHypothesisList, DeFea); this.drawGraph(TrueDATADefault, " Default Features"); } //################################################ //Checkbox2 is unsupervised Learning. It is a bit different from supervised learning, so it is hard-coded here. UnsupervisedLearner unsupervisedLearner = new UnsupervisedLearner(); if (checkBox2.Checked == true) { List<ResultsGroup>[] USLTrueDATA = unsupervisedLearner.evaluate(DeconData, Fea); //ROC curve needs match to perform, so we will use the match list from Supervised learning and apply them to USLDATA. for (int i = 0; i < DeconData.FileNames.Count(); i++) { ResultGroups[i] = ResultGroups[i].OrderByDescending(a => a.DeconRow.MonoisotopicMassWeight).ToList(); USLTrueDATA[i] = USLTrueDATA[i].OrderByDescending(b => b.DeconRow.MonoisotopicMassWeight).ToList(); int USllasttruematch = 0; for (int j = 0; j < ResultGroups[i].Count; j++) { if (ResultGroups[i][j].Match == true) { for (int k = USllasttruematch; k < USLTrueDATA[i].Count; k++) { if (USLTrueDATA[i][k].DeconRow.MonoisotopicMassWeight < ResultGroups[i][j].DeconRow.MonoisotopicMassWeight) { USllasttruematch = k; break; } if (USLTrueDATA[i][k].DeconRow.MonoisotopicMassWeight == ResultGroups[i][j].DeconRow.MonoisotopicMassWeight) { USLTrueDATA[i][k].Match = true; USLTrueDATA[i][k].PredictedComposition = ResultGroups[i][j].PredictedComposition; USllasttruematch = k + 1; break; } if (USLTrueDATA[i][k].DeconRow.MonoisotopicMassWeight > ResultGroups[i][j].DeconRow.MonoisotopicMassWeight) { USLTrueDATA[i][k].Match = false; } } } } } //Now that both of the data got their matchs, draw the graph this.drawGraph(USLTrueDATA, " Unsupervised Learning"); } //#############################unsupervised learning part ends################# //Finally populate the Resulting datagridview and the combobox1 comboBox2.Invoke(new MethodInvoker(delegate { for (int i = 0; i < TF.Count; i++) { comboBox2.Items.Add(TF[i].TableName); } comboBox2.SelectedIndex = 0; })); dataGridView2.Invoke(new MethodInvoker(delegate { dataGridView2.DataSource = TF[0]; })); }