//This is used by the Feature Class to generate Features public Feature obtainFeatures(OpenFileDialog FileLinks, List<CompositionHypothesisEntry> comhyp) { List<Double> Ini = new List<Double>(); List<Double> nCS = new List<Double>(); List<Double> SD = new List<Double>(); List<Double> nMS = new List<Double>(); List<Double> tV = new List<Double>(); List<Double> EA = new List<Double>(); List<Double> CS = new List<Double>(); List<Double> NS = new List<Double>(); List<Double> SN = new List<Double>(); //Each data file is treated separately, hence the for loop. foreach (String filename in FileLinks.FileNames) { //Get the Parameters. ParametersForm parameter = new ParametersForm(); ParametersForm.ParameterSettings paradata = parameter.GetParameters(); //Perform the First and second grouping, matching and getting data for the features by the Grouping function. Double Mas = new Double(); Mas = adductMass(comhyp); List<ResultsGroup> LRLR = new List<ResultsGroup>(); LRLR = Groupings(filename, paradata, Mas, comhyp); //Error prevention if (LRLR.Count == 1) { MessageBox.Show("There is no match between the hypothesis and the data. Unable to generate results from the file:" + filename); continue; } //##############Logistic Regression#################### //Perform logistic regression to get the Parameters Feature featureData = new Feature(); featureData = FitLogisticRegression(LRLR); Ini.Add(featureData.Initial); nCS.Add(featureData.numChargeStates); SD.Add(featureData.ScanDensity); nMS.Add(featureData.numModiStates); tV.Add(featureData.totalVolume); EA.Add(featureData.ExpectedA); CS.Add(featureData.CentroidScan); NS.Add(featureData.numOfScan); SN.Add(featureData.avgSigNoise); } // Get the average of all features. Feature finalans = new Feature(); finalans.Initial = Ini.Average(); finalans.numChargeStates = nCS.Average(); finalans.ScanDensity = SD.Average(); finalans.numModiStates = nMS.Average(); finalans.totalVolume = tV.Average(); finalans.ExpectedA = EA.Average(); finalans.CentroidScan = CS.Average(); finalans.numOfScan = NS.Average(); finalans.avgSigNoise = SN.Average(); return finalans; }
public List<ResultsGroup>[] run(OpenFileDialog FileLinks) { List<ResultsGroup>[] AllFinalResults = new List<ResultsGroup>[Convert.ToInt32(FileLinks.FileNames.Count())]; Int32 Count = 0; //Each data file is treated separately, hence the for loop. foreach (String filename in FileLinks.FileNames) { //Get the Parameters. ParametersForm parameter = new ParametersForm(); ParametersForm.ParameterSettings paradata = parameter.GetParameters(); //Perform the First and second grouping and getting data for the features by the Grouping function. List<ResultsGroup> LRLR = new List<ResultsGroup>(); LRLR = Groupings(filename, paradata); //##############Logistic Regression#################### Features fe = new Features(); //current features Feature featureData = fe.readFeature(); //default features String defaultpath = Application.StartupPath + "\\FeatureDefault.fea"; Feature defaultData = fe.readFeature(defaultpath); //Features that will be used Feature finalfeatureData = new Feature(); //Here are the beta values in logistic regression. finalfeatureData.Initial = featureData.Initial * 0.5 + defaultData.Initial * 0.5; finalfeatureData.numChargeStates = featureData.numChargeStates * 0.5 + defaultData.numChargeStates * 0.5; finalfeatureData.ScanDensity = featureData.ScanDensity * 0.5 + defaultData.ScanDensity * 0.5; finalfeatureData.numModiStates = featureData.numModiStates * 0.5 + defaultData.numModiStates * 0.5; finalfeatureData.totalVolume = featureData.totalVolume * 0.5 + defaultData.totalVolume * 0.5; finalfeatureData.ExpectedA = featureData.ExpectedA * 0.5 + defaultData.ExpectedA * 0.5; finalfeatureData.CentroidScan = featureData.CentroidScan * 0.5 + defaultData.CentroidScan * 0.5; finalfeatureData.numOfScan = featureData.numOfScan * 0.5 + defaultData.numOfScan * 0.5; finalfeatureData.avgSigNoise = featureData.avgSigNoise * 0.5 + defaultData.avgSigNoise * 0.5; //Generate scores. SupervisedLearner sl = new SupervisedLearner(); AllFinalResults[Count] = sl.Scorings(LRLR, finalfeatureData, paradata); Count++; } return AllFinalResults; }
public List<ResultsGroup>[] evaluate(OpenFileDialog FileLinks, Feature featureData) { List<ResultsGroup>[] AllFinalResults = new List<ResultsGroup>[Convert.ToInt32(FileLinks.FileNames.Count())]; Int32 Count = 0; //Each data file is treated separately, hence the for loop. foreach (String filename in FileLinks.FileNames) { //Get the Parameters. ParametersForm parameterForm = new ParametersForm(); ParametersForm.ParameterSettings parameters = parameterForm.GetParameters(); //Perform the First and second grouping and getting data for the features by the Grouping function. List<ResultsGroup> LRLR = new List<ResultsGroup>(); LRLR = Groupings(filename, parameters); //Generate scores. SupervisedLearner sl = new SupervisedLearner(); AllFinalResults[Count] = sl.Scorings(LRLR, featureData, parameters); Count++; } return AllFinalResults; }
//This is used by the Features class to evaluate the features public List<ResultsGroup>[] EvaluateFeature(OpenFileDialog FileLinks, List<CompositionHypothesisEntry> comhyp, Feature dfeatureData) { //Initialize storage variables. List<ResultsGroup>[] AllFinalResults = new List<ResultsGroup>[Convert.ToInt32(FileLinks.FileNames.Count())]; Int32 Count = 0; //Each data file is treated separately, hence the for loop. foreach (String filename in FileLinks.FileNames) { //Get the Parameters. ParametersForm parameter = new ParametersForm(); ParametersForm.ParameterSettings paradata = parameter.GetParameters(); //Perform the First and second grouping, matching and getting data for the features by the Grouping function. Double Mas = new Double(); Mas = adductMass(comhyp); List<ResultsGroup> LRLR = new List<ResultsGroup>(); LRLR = Groupings(filename, paradata, Mas, comhyp); //Error prevention if (LRLR.Count == 1) { MessageBox.Show("There is no match between the hypothesis and the data. Unable to generate results from the file:" + filename); List<ResultsGroup> FinalResult = LRLR; AllFinalResults[Count] = FinalResult; Count++; continue; } //##############Logistic Regression#################### Feature featureData = FitLogisticRegression(LRLR); //Generate scores. AllFinalResults[Count] = Scorings(LRLR, featureData, paradata); Count++; } return AllFinalResults; }
public static StreamWriter WriteResultsToStream(StreamWriter writer, List<ResultsGroup> results, List<String> elementNames, List<String> moleculeNames) { String header = "Score,MassSpec MW,Compound Key,PeptideSequence,PPM Error,#ofAdduct,#ofCharges,#ofScans,ScanDensity,Avg A:A+2 Error,A:A+2 Ratio,Total Volume,Signal to Noise Ratio,Centroid Scan Error,Centroid Scan,MaxScanNumber,MinScanNumber"; foreach(var element in elementNames){ header += "," + element; Console.WriteLine(element); } header += ",Hypothesis MW"; foreach (var name in moleculeNames) { header += ("," + name); Console.WriteLine(name); } header += ",Adduct/Replacement,Adduct Amount,PeptideModification,PeptideMissedCleavage#,#ofGlycanAttachmentToPeptide,StartAA,EndAA,ProteinID"; ParametersForm pr = new ParametersForm(); ParametersForm.ParameterSettings parameterInfo = pr.GetParameters(); writer.WriteLine(header); for (int i = 0; i < results.Count; i++) { ResultsGroup result = results[i]; DeconRow observed = result.DeconRow; CompositionHypothesisEntry hypothesis = result.PredictedComposition; Console.WriteLine(hypothesis); //If this is a prediction, emit in one format if (hypothesis.MassWeight != 0) { double ppmError = ((observed.MonoisotopicMassWeight - hypothesis.MassWeight) / observed.MonoisotopicMassWeight); ppmError *= 1000000; writer.Write(result.Score + "," + observed.MonoisotopicMassWeight + "," + hypothesis.CompoundComposition + "," + hypothesis.PepSequence + "," + ppmError + "," + result.NumModiStates + "," + result.NumChargeStates + "," + result.NumOfScan + "," + result.ScanDensity + "," + result.ExpectedA + "," + (observed.MonoisotopicAbundance / (observed.MonoisotopicPlus2Abundance + 1)) + "," + result.TotalVolume + "," + observed.SignalNoiseRatio + "," + result.CentroidScan + "," + observed.ScanNum + "," + result.MaxScanNum + "," + result.MinScanNum ); for (int j = 0; j < elementNames.Count; j++) { writer.Write("," + hypothesis.ElementAmount[j]); } writer.Write("," + hypothesis.MassWeight); for (int j = 0; j < moleculeNames.Count; j++) { writer.Write("," + hypothesis.eqCounts[j]); } writer.WriteLine("," + hypothesis.AddRep + "," + hypothesis.AdductNum + "," + hypothesis.PepModification + "," + hypothesis.MissedCleavages + "," + hypothesis.NumGlycosylations + "," + hypothesis.StartAA + "," + hypothesis.EndAA + "," + hypothesis.ProteinID); } else { writer.Write(result.Score + "," + result.DeconRow.MonoisotopicMassWeight + "," + 0 + "" + "," + "," + 0 + "," + result.NumModiStates + "," + result.NumChargeStates + "," + result.NumOfScan + "," + result.ScanDensity + "," + result.ExpectedA + "," + (result.DeconRow.MonoisotopicAbundance / (result.DeconRow.MonoisotopicPlus2Abundance + 1)) + "," + result.TotalVolume + "," + result.DeconRow.SignalNoiseRatio + "," + result.CentroidScan + "," + result.DeconRow.ScanNum + "," + result.MaxScanNum + "," + result.MinScanNum); for (int s = 0; s < elementNames.Count(); s++) { writer.Write("," + 0); } writer.Write("," + 0); for (int s = 0; s < moleculeNames.Count(); s++) { writer.Write("," + 0); } writer.WriteLine("," + "N/A" + "," + 0 + "," + "" + "," + 0 + "," + 0 + "," + 0 + "," + 0); } } writer.Flush(); return writer; }
public List<DeconRow> getdata(String DeconData) { ParametersForm parad = new ParametersForm(); ParametersForm.ParameterSettings paradata = parad.GetParameters(); //try // { FileStream fileinput = new FileStream(DeconData, FileMode.Open, FileAccess.Read); StreamReader readdata = new StreamReader(fileinput); //The first line in the file contains the column names, we don't need it. readdata.ReadLine(); while (readdata.Peek() >= 0) { DeconRow Row = new DeconRow(); String Line = readdata.ReadLine(); String[] column = Line.Split(','); Row.ScanNum = Convert.ToInt32(column[0]); Row.charge = Convert.ToInt32(column[1]); Row.abundance = Convert.ToInt32(column[2]); Row.mz = Convert.ToDouble(column[3]); Row.fit = Convert.ToDouble(column[4]); Row.average_mw = Convert.ToDouble(column[5]); Row.MonoisotopicMassWeight = Convert.ToDouble(column[6]); Row.mostabundant_mw = Convert.ToDouble(column[7]); Row.fwhm = Convert.ToDouble(column[8]); Row.SignalNoiseRatio = Convert.ToDouble(column[9]); Row.MonoisotopicAbundance = Convert.ToInt32(column[10]); Double mp2a = Convert.ToDouble(column[11]); Row.MonoisotopicPlus2Abundance = Convert.ToInt32(mp2a); //Flag maybe empty, so, special treatment. if (column[12] == "") { Row.flag = 0; } else { Row.flag = Convert.ToInt32(column[12]); } if (Convert.ToInt32(column.Count()) == 14) Row.interference_sore = Convert.ToDouble(column[13]); else Row.interference_sore = 0; //Check if the data are within the boundaries of the Parameters if (Row.abundance >= paradata.DataNoiseTheshold) { if (Row.MonoisotopicMassWeight <= paradata.MolecularWeightUpperBound) { if (Row.MonoisotopicMassWeight >= paradata.MolecularWeightLowerBound) { DeconDATA.Add(Row); } } } } fileinput.Close(); // } // catch(Exception ex) // { // MessageBox.Show("Error: Could not read DeconTools Data file from disk. Original error: " + ex.Message); // } return DeconDATA; }