public void LogarithmRegressionRegressTest() { // This is the same data from the example available at // http://mathbits.com/MathBits/TISection/Statistics2/logarithmic.htm // Declare your inputs and output data double[] inputs = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; double[] outputs = { 6, 9.5, 13, 15, 16.5, 17.5, 18.5, 19, 19.5, 19.7, 19.8 }; // Transform inputs to logarithms double[] logx = Matrix.Log(inputs); // Compute a simple linear regression var lr = new SimpleLinearRegression(); // Compute with the log-transformed data double error = lr.Regress(logx, outputs); // Get an expression representing the learned regression model // We just have to remember that 'x' will actually mean 'log(x)' string result = lr.ToString("N4", CultureInfo.InvariantCulture); // Result will be "y(x) = 6.1082x + 6.0993" Assert.AreEqual(2.8760006026675797, error); Assert.AreEqual(6.1081800414945704, lr.Slope); Assert.AreEqual(6.0993411396126653, lr.Intercept); Assert.AreEqual("y(x) = 6.1082x + 6.0993", result); }
static void Main(string[] args) { DataTable tableAttHp= new ExcelReader("HsAttHp.xlsx").GetWorksheet("Sheet1"); double[][] tableAttHpMatrix = tableAttHp.ToArray<double>(); DataTable tableCost = new ExcelReader("HsCost.xlsx").GetWorksheet("Sheet1"); double[] tableCostMatrix = tableCost.Columns[0].ToArray<double>(); //double[,] scores = Accord.Statistics.Tools.ZScores(tableAttHpMatrix); //double[,] centered = Accord.Statistics.Tools.Center(tableAttHpMatrix); //double[,] standard = Accord.Statistics.Tools.Standardize(tableAttHpMatrix); //foreach (double i in scores ) { Console.WriteLine(i); } //Console.ReadKey(); //foreach (double i in centered) { Console.WriteLine(i); } //Console.ReadKey(); //foreach (double i in standard) { Console.WriteLine(i); } // Plot the data //ScatterplotBox.Show("Hs", tableAttHpMatrix, tableCostMatrix).Hold(); var target = new MultipleLinearRegression(2, true); double error = target.Regress(tableAttHpMatrix, tableCostMatrix); double a = target.Coefficients[0]; // a = 0 double b = target.Coefficients[1]; // b = 0 double c = target.Coefficients[2]; // c = 1 Console.WriteLine(a + " " + b + " " + c); Console.ReadKey(); double[] inputs = { 2005, 2006, 2007, 2008, 2009,2010,2011 }; double[] outputs = { 12,19,29,37,45,23,33 }; // Create a new simple linear regression SimpleLinearRegression regression = new SimpleLinearRegression(); // Compute the linear regression regression.Regress(inputs, outputs); // Compute the output for a given input. The double y = regression.Compute(85); // The answer will be 28.088 // We can also extract the slope and the intercept term // for the line. Those will be -0.26 and 50.5, respectively. double s = regression.Slope; double cut = regression.Intercept; Console.WriteLine(s+"x+"+ cut); Console.ReadKey(); }
public bool Learn() { try { regression = this.ols.Learn(Inputs, Outputs); IsTrained = true; return(true); } catch (Exception e) { throw new Exception( "Failed to learn using specified training data." + Environment.NewLine + "Inner exception : " + e.Message ); } // return this as IAlgorithm; }
public SimpleLinearRegression() { this.Name = "Simple Linear Regression"; this.Type = AlgorithmType.Regression; this.IsTrained = false; this.PredictionType = typeof(double); this.ResultType = typeof(double); this.Inputs = null; this.Outputs = null; this.TestValue = null; this.Result = null; // initialise seed value for Accord framework Generator.Seed = new Random().Next(); // set up linear regression using OrdinaryLeastSquares this.Regression = new Accord.Statistics.Models.Regression.Linear.SimpleLinearRegression(); this.ols = new OrdinaryLeastSquares(); }
private void btnOK_Click(object sender, EventArgs e) { //将Input放在X轴,OutPut放在Y轴 var GraphPane = zedGraph.GraphPane; GraphPane.CurveList.Clear(); GraphPane.XAxis.Title.Text = cmbInputField.Text; GraphPane.YAxis.Title.Text = cmbOutputField.Text; //获得Input,Output列表 double[] inliersX = new double[mongoCol.Count()]; double[] inliersY = new double[mongoCol.Count()]; int Cnt = 0; foreach (var item in mongoCol.FindAllAs<BsonDocument>()) { inliersX[Cnt] = item[cmbInputField.Text].AsInt32; inliersY[Cnt] = item[cmbOutputField.Text].AsInt32; Cnt++; } var myCurve = GraphPane.AddCurve("Point", new PointPairList(inliersX, inliersY), Color.Blue, SymbolType.Default); myCurve.Line.IsVisible = false; myCurve.Symbol.Fill = new Fill(Color.Blue); //线性回归 // Create a new simple linear regression var regression = new SimpleLinearRegression(); // Compute the linear regression regression.Regress(inliersX, inliersY); double[] InputX = new double[2]; double[] OutputY = new double[2]; InputX[0] = 0; InputX[1] = inliersX.Max(); OutputY[0] = regression.Compute(0); OutputY[1] = regression.Compute(inliersX.Max()); myCurve = GraphPane.AddCurve("Regression:" + regression.ToString(), new PointPairList(InputX, OutputY), Color.Blue, SymbolType.Default); myCurve.Line.IsVisible = true; myCurve.Line.Color = Color.Red; //更新坐标轴和图表 zedGraph.AxisChange(); zedGraph.Invalidate(); }
/// <summary> /// Construct a new Simple Linear Regression algorithm, using the specified training data. /// </summary> /// <param name="inputList">Use inputList as rows with equal numbers of featurs, which used for learning.</param> /// <param name="outputList">Use outputList as the rows that define the result column for each</param> public SimpleLinearRegression(List <double> inputList, List <double> outputList) { Name = "Simple Linear Regression"; Type = AlgorithmType.Regression; IsTrained = false; PredictionType = typeof(double); ResultType = typeof(double); Inputs = null; Outputs = null; TestValue = null; Result = null; // initialise seed value for Accord framework Generator.Seed = new Random().Next(); // Process training data LoadTrainingData(inputList, outputList); // set up linear regression using OrdinaryLeastSquares regression = new Accord.Statistics.Models.Regression.Linear.SimpleLinearRegression(); ols = new OrdinaryLeastSquares(); }
public void RegressTest() { // Let's say we have some univariate, continuous sets of input data, // and a corresponding univariate, continuous set of output data, such // as a set of points in R². A simple linear regression is able to fit // a line relating the input variables to the output variables in which // the minimum-squared-error of the line and the actual output points // is minimum. // Declare some sample test data. double[] inputs = { 80, 60, 10, 20, 30 }; double[] outputs = { 20, 40, 30, 50, 60 }; // Create a new simple linear regression SimpleLinearRegression regression = new SimpleLinearRegression(); // Compute the linear regression regression.Regress(inputs, outputs); // Compute the output for a given input. The double y = regression.Compute(85); // The answer will be 28.088 // We can also extract the slope and the intercept term // for the line. Those will be -0.26 and 50.5, respectively. double s = regression.Slope; double c = regression.Intercept; // Expected slope and intercept double eSlope = -0.264706; double eIntercept = 50.588235; Assert.AreEqual(28.088235294117649, y, 1e-10); Assert.AreEqual(eSlope, s, 1e-5); Assert.AreEqual(eIntercept, c, 1e-5); Assert.IsFalse(double.IsNaN(y)); }
/// <summary> /// Constructs a new LinearRegression machine. /// </summary> public AISimpleLinearRegression(List <double> inputList, List <double> outputList) { // validation if (inputList == null || outputList == null) { throw new ArgumentNullException("Neither the input list nor the output list can be NULL"); } // initialise seed value Generator.Seed = new Random().Next(); // process input and output lists into arrays inputs = inputList.ToArray(); outputs = outputList.ToArray(); // set up linear regression using OLS regression = new Accord.Statistics.Models.Regression.Linear.SimpleLinearRegression(); ols = new OrdinaryLeastSquares(); // nulls testValue = new double(); result = new double(); this.learned = false; }
public void ToStringTest() { // Issue 51: SimpleLinearRegression regression = new SimpleLinearRegression(); var x = new double[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; var y = new double[] { 1, 6, 17, 34, 57, 86, 121, 162, 209, 262, 321 }; regression.Regress(x, y); { string expected = "y(x) = 32x + -44"; expected = expected.Replace(".", System.Globalization.CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator); string actual = regression.ToString(); Assert.AreEqual(expected, actual); } { string expected = "y(x) = 32x + -44"; string actual = regression.ToString(null, System.Globalization.CultureInfo.GetCultureInfo("en-US")); Assert.AreEqual(expected, actual); } { string expected = "y(x) = 32.0x + -44.0"; string actual = regression.ToString("N1", System.Globalization.CultureInfo.GetCultureInfo("en-US")); Assert.AreEqual(expected, actual); } { string expected = "y(x) = 32,00x + -44,00"; string actual = regression.ToString("N2", System.Globalization.CultureInfo.GetCultureInfo("pt-BR")); Assert.AreEqual(expected, actual); } }
private void btnCompute_Click(object sender, EventArgs e) { DataTable dataTable = dgvAnalysisSource.DataSource as DataTable; if (dataTable == null) return; // Gather the available data double[][] data = dataTable.ToArray(); // First, fit simple linear regression directly for comparison reasons. double[] x = data.GetColumn(0); // Extract the independent variable double[] y = data.GetColumn(1); // Extract the dependent variable // Create a simple linear regression var regression = new SimpleLinearRegression(); // Estimate a line passing through the (x, y) points double sumOfSquaredErrors = regression.Regress(x, y); // Now, compute the values predicted by the // regression for the original input points double[] commonOutput = regression.Compute(x); // Now, fit simple linear regression using RANSAC int maxTrials = (int)numMaxTrials.Value; int minSamples = (int)numSamples.Value; double probability = (double)numProbability.Value; double errorThreshold = (double)numThreshold.Value; // Create a RANSAC algorithm to fit a simple linear regression var ransac = new RANSAC<SimpleLinearRegression>(minSamples) { Probability = probability, Threshold = errorThreshold, MaxEvaluations = maxTrials, // Define a fitting function Fitting = delegate(int[] sample) { // Retrieve the training data double[] inputs = x.Submatrix(sample); double[] outputs = y.Submatrix(sample); // Build a Simple Linear Regression model var r = new SimpleLinearRegression(); r.Regress(inputs, outputs); return r; }, // Define a check for degenerate samples Degenerate = delegate(int[] sample) { // In this case, we will not be performing such checks. return false; }, // Define a inlier detector function Distances = delegate(SimpleLinearRegression r, double threshold) { List<int> inliers = new List<int>(); for (int i = 0; i < x.Length; i++) { // Compute error for each point double error = r.Compute(x[i]) - y[i]; // If the squared error is below the given threshold, // the point is considered to be an inlier. if (error * error < threshold) inliers.Add(i); } return inliers.ToArray(); } }; // Now that the RANSAC hyperparameters have been specified, we can // compute another regression model using the RANSAC algorithm: int[] inlierIndices; SimpleLinearRegression robustRegression = ransac.Compute(data.Length, out inlierIndices); if (robustRegression == null) { lbStatus.Text = "RANSAC failed. Please try again after adjusting its parameters."; return; // the RANSAC algorithm did not find any inliers and no model was created } // Compute the output of the model fitted by RANSAC double[] ransacOutput = robustRegression.Compute(x); // Create scatter plot comparing the outputs from the standard // linear regression and the RANSAC-fitted linear regression. CreateScatterplot(graphInput, x, y, commonOutput, ransacOutput, x.Submatrix(inlierIndices), y.Submatrix(inlierIndices)); lbStatus.Text = "Regression created! Please compare the RANSAC " + "regression (blue) with the simple regression (in red)."; }
/// <summary> /// Creates a new linear regression directly from data points. /// </summary> /// /// <param name="x">The input vectors <c>x</c>.</param> /// <param name="y">The output vectors <c>y</c>.</param> /// /// <returns>A linear regression f(x) that most approximates y.</returns> /// public static SimpleLinearRegression FromData(double[] x, double[] y) { SimpleLinearRegression regression = new SimpleLinearRegression(); regression.Regress(x, y); return regression; }
private void btnSampleRunAnalysis_Click(object sender, EventArgs e) { DataTable dataTable = dgvAnalysisSource.DataSource as DataTable; if (dataTable == null) return; // Gather the available data double[][] data = dataTable.ToArray(); // First, fit simple linear regression directly for comparison reasons. double[] x = data.GetColumn(0); // Extract the independent variable double[] y = data.GetColumn(1); // Extract the dependent variable // Create a simple linear regression SimpleLinearRegression slr = new SimpleLinearRegression(); slr.Regress(x, y); // Compute the simple linear regression output double[] slrY = slr.Compute(x); // Now, fit simple linear regression using RANSAC int maxTrials = (int)numMaxTrials.Value; int minSamples = (int)numSamples.Value; double probability = (double)numProbability.Value; double errorThreshold = (double)numThreshold.Value; // Create a RANSAC algorithm to fit a simple linear regression var ransac = new RANSAC<SimpleLinearRegression>(minSamples); ransac.Probability = probability; ransac.Threshold = errorThreshold; ransac.MaxEvaluations = maxTrials; // Set the RANSAC functions to evaluate and test the model ransac.Fitting = // Define a fitting function delegate(int[] sample) { // Retrieve the training data double[] inputs = x.Submatrix(sample); double[] outputs = y.Submatrix(sample); // Build a Simple Linear Regression model var r = new SimpleLinearRegression(); r.Regress(inputs, outputs); return r; }; ransac.Degenerate = // Define a check for degenerate samples delegate(int[] sample) { // In this case, we will not be performing such checks. return false; }; ransac.Distances = // Define a inlier detector function delegate(SimpleLinearRegression r, double threshold) { List<int> inliers = new List<int>(); for (int i = 0; i < x.Length; i++) { // Compute error for each point double error = r.Compute(x[i]) - y[i]; // If the squared error is below the given threshold, // the point is considered to be an inlier. if (error * error < threshold) inliers.Add(i); } return inliers.ToArray(); }; // Finally, try to fit the regression model using RANSAC int[] idx; SimpleLinearRegression rlr = ransac.Compute(data.Length, out idx); // Check if RANSAC was able to build a consistent model if (rlr == null) { return; // RANSAC was unsucessful, just return. } else { // Compute the output of the model fitted by RANSAC double[] rlrY = rlr.Compute(x); // Create scatterplot comparing the outputs from the standard // linear regression and the RANSAC-fitted linear regression. CreateScatterplot(graphInput, x, y, slrY, rlrY, x.Submatrix(idx), y.Submatrix(idx)); } }
//this "Grouping" function performs the grouping. private List<ResultsGroup> Groupings(String filename, ParametersForm.ParameterSettings modelParameters, Double Mas, List<CompositionHypothesisEntry> comhyp) { GetDeconData DeconDATA1 = new GetDeconData(); List<string> elementIDs = new List<string>(); List<string> molename = new List<string>(); for (int i = 0; i < comhyp.Count(); i++ ) { if (comhyp[i].ElementNames.Count > 0) { for (int j = 0; j < comhyp[i].ElementNames.Count(); j++) { elementIDs.Add(comhyp[i].ElementNames[j]); } for (int j = 0; j < comhyp[i].MoleculeNames.Count(); j++) { molename.Add(comhyp[i].MoleculeNames[j]); } break; } } List<DeconRow> sortedDeconData = new List<DeconRow>();; sortedDeconData = DeconDATA1.getdata(filename); //First, sort the list descendingly by its abundance. sortedDeconData = sortedDeconData.OrderByDescending(a => a.abundance).ToList(); //################Second, create a new list to store results from the first grouping.############### List<ResultsGroup> fgResults = new List<ResultsGroup>(); ResultsGroup GR2 = new ResultsGroup(); Int32 currentMaxBin = new Int32(); currentMaxBin = 1; GR2.DeconRow = sortedDeconData[0]; GR2.MostAbundant = true; GR2.NumOfScan = 1; GR2.MinScanNum = sortedDeconData[0].ScanNum; GR2.MaxScanNum = sortedDeconData[0].ScanNum; GR2.ChargeStateList = new List<int>(); GR2.ChargeStateList.Add(sortedDeconData[0].charge); GR2.AvgSigNoiseList = new List<Double>(); GR2.AvgSigNoiseList.Add(sortedDeconData[0].SignalNoiseRatio); GR2.AvgAA2List = new List<double>(); GR2.AvgAA2List.Add(sortedDeconData[0].MonoisotopicAbundance / (sortedDeconData[0].MonoisotopicPlus2Abundance + 1)); GR2.ScanNumList = new List<Int32>(); GR2.ScanNumList.Add(sortedDeconData[0].ScanNum); GR2.NumModiStates = 1; GR2.TotalVolume = sortedDeconData[0].abundance * sortedDeconData[0].fwhm; GR2.ListAbundance = new List<double>(); GR2.ListAbundance.Add(sortedDeconData[0].abundance); GR2.ListMonoMassWeight = new List<double>(); GR2.ListMonoMassWeight.Add(sortedDeconData[0].MonoisotopicMassWeight); fgResults.Add(GR2); for (int j = 1; j < sortedDeconData.Count; j++) { for (int i = 0; i < fgResults.Count; i++) { //Obtain grouping error. Note: its in ppm, so it needs to be multiplied by 0.000001. Double GroupingError = fgResults[i].DeconRow.MonoisotopicMassWeight * modelParameters.GroupingErrorEG * 0.000001; if ((sortedDeconData[j].MonoisotopicMassWeight < (fgResults[i].DeconRow.MonoisotopicMassWeight + GroupingError) && (sortedDeconData[j].MonoisotopicMassWeight > (fgResults[i].DeconRow.MonoisotopicMassWeight - GroupingError)))) { if (fgResults[i].MaxScanNum < sortedDeconData[j].ScanNum) { fgResults[i].MaxScanNum = sortedDeconData[j].ScanNum; } else if (fgResults[i].MinScanNum > sortedDeconData[j].ScanNum) { fgResults[i].MinScanNum = sortedDeconData[j].ScanNum; } fgResults[i].NumOfScan = fgResults[i].NumOfScan + 1; fgResults[i].ScanNumList.Add(sortedDeconData[j].ScanNum); fgResults[i].TotalVolume = fgResults[i].TotalVolume + sortedDeconData[j].abundance * sortedDeconData[j].fwhm; fgResults[i].ChargeStateList.Add(sortedDeconData[j].charge); fgResults[i].AvgSigNoiseList.Add(sortedDeconData[j].SignalNoiseRatio); fgResults[i].AvgAA2List.Add(sortedDeconData[j].MonoisotopicAbundance / (sortedDeconData[j].MonoisotopicPlus2Abundance + 1)); fgResults[i].ListAbundance.Add(sortedDeconData[j].abundance); fgResults[i].ListMonoMassWeight.Add(sortedDeconData[j].MonoisotopicMassWeight); break; } if (i == fgResults.Count - 1) { ResultsGroup GR = new ResultsGroup(); currentMaxBin = currentMaxBin + 1; GR.DeconRow = sortedDeconData[j]; GR.MostAbundant = true; GR.NumOfScan = 1; GR.MinScanNum = sortedDeconData[j].ScanNum; GR.MaxScanNum = sortedDeconData[j].ScanNum; GR.ChargeStateList = new List<int>(); GR.ChargeStateList.Add(sortedDeconData[j].charge); GR.AvgSigNoiseList = new List<Double>(); GR.AvgSigNoiseList.Add(sortedDeconData[j].SignalNoiseRatio); GR.AvgAA2List = new List<double>(); GR.AvgAA2List.Add(sortedDeconData[j].MonoisotopicAbundance / (sortedDeconData[j].MonoisotopicPlus2Abundance + 1)); GR.ScanNumList = new List<int>(); GR.ScanNumList.Add(sortedDeconData[j].ScanNum); GR.NumModiStates = 1; GR.TotalVolume = sortedDeconData[j].abundance * sortedDeconData[j].fwhm; GR.ListAbundance = new List<double>(); GR.ListAbundance.Add(sortedDeconData[j].abundance); GR.ListMonoMassWeight = new List<double>(); GR.ListMonoMassWeight.Add(sortedDeconData[j].MonoisotopicMassWeight); fgResults.Add(GR); } } } //Lastly calculate the Average Weighted Abundance for (int y = 0; y < fgResults.Count(); y++) { Double sumofTopPart = 0; for (int z = 0; z < fgResults[y].ListMonoMassWeight.Count(); z++) { sumofTopPart = sumofTopPart + fgResults[y].ListMonoMassWeight[z] * fgResults[y].ListAbundance[z]; } fgResults[y].DeconRow.MonoisotopicMassWeight = sumofTopPart / fgResults[y].ListAbundance.Sum(); } //######################## Here is the second grouping. ################################ fgResults = fgResults.OrderBy(o => o.DeconRow.MonoisotopicMassWeight).ToList(); if (Mas != 0) { for (int i = 0; i < fgResults.Count - 1; i++) { if (fgResults[i].MostAbundant == true) { int numModStates = 1; for (int j = i + 1; j < fgResults.Count; j++) { Double AdductTolerance = fgResults[i].DeconRow.MonoisotopicMassWeight * modelParameters.AdductToleranceEA * 0.000001; if ((fgResults[i].DeconRow.MonoisotopicMassWeight >= (fgResults[j].DeconRow.MonoisotopicMassWeight - Mas * numModStates - AdductTolerance)) && (fgResults[i].DeconRow.MonoisotopicMassWeight <= (fgResults[j].DeconRow.MonoisotopicMassWeight - Mas * numModStates + AdductTolerance))) { //obtain max and min scan number if (fgResults[i].MaxScanNum < fgResults[j].MaxScanNum) { fgResults[i].MaxScanNum = fgResults[j].MaxScanNum; } else { fgResults[i].MaxScanNum = fgResults[i].MaxScanNum; } if (fgResults[i].MinScanNum > fgResults[j].MinScanNum) { fgResults[i].MinScanNum = fgResults[j].MinScanNum; } else { fgResults[i].MinScanNum = fgResults[i].MinScanNum; } //numOfScan fgResults[i].NumOfScan = fgResults[i].NumOfScan + fgResults[j].NumOfScan; fgResults[i].ScanNumList.AddRange(fgResults[j].ScanNumList); //ChargeStateList for (int h = 0; h < fgResults[j].ChargeStateList.Count; h++) { fgResults[i].ChargeStateList.Add(fgResults[j].ChargeStateList[h]); } //avgSigNoiseList for (int h = 0; h < fgResults[j].AvgSigNoiseList.Count; h++) { fgResults[i].AvgSigNoiseList.Add(fgResults[j].AvgSigNoiseList[h]); } //avgAA2List for (int h = 0; h < fgResults[j].AvgAA2List.Count; h++) { fgResults[i].AvgAA2List.Add(fgResults[j].AvgAA2List[h]); } //numModiStates numModStates++; fgResults[i].NumModiStates = fgResults[i].NumModiStates + 1; fgResults[j].MostAbundant = false; //TotalVolume fgResults[i].TotalVolume = fgResults[i].TotalVolume + fgResults[j].TotalVolume; if (fgResults[i].DeconRow.abundance < fgResults[j].DeconRow.abundance) { fgResults[i].DeconRow = fgResults[j].DeconRow; numModStates = 1; } } else if (fgResults[i].DeconRow.MonoisotopicMassWeight < (fgResults[j].DeconRow.MonoisotopicMassWeight - (Mas + AdductTolerance * 2) * numModStates)) { //save running time. Since the list is sorted, any other mass below won't match as an adduct. break; } } } } } else { for (int i = 0; i < fgResults.Count; i++) { fgResults[i].NumModiStates = 0; } } List<ResultsGroup> sgResults = new List<ResultsGroup>(); //Implement the scan number threshold fgResults = fgResults.OrderByDescending(a => a.NumOfScan).ToList(); Int32 scanCutOff = fgResults.Count() + 1; for (int t = 0; t < fgResults.Count(); t++) { if (fgResults[t].NumOfScan < modelParameters.MinScanNumber) { scanCutOff = t; break; } } if (scanCutOff != fgResults.Count() + 1) { fgResults.RemoveRange(scanCutOff, fgResults.Count() - scanCutOff); } //############# This is the matching part. It matches the composition hypothesis with the grouped decon data.############ String[] MolNames = new String[17]; //These numOfMatches and lists are used to fit the linear regression model for Expect A: A+2. They are put here to decrease the already-int running time. Int32 numOfMatches = new Int32(); List<Double> moleWeightforA = new List<Double>(); List<Double> AARatio = new List<Double>(); //Used to obtain all available bins for centroid scan error. //Read the other lines for compTable data. fgResults = fgResults.OrderByDescending(a => a.DeconRow.MonoisotopicMassWeight).ToList(); comhyp = comhyp.OrderByDescending(b => b.MassWeight).ToList(); bool hasMatch = false; int lastMatch = 0; for (int j = 0; j < fgResults.Count; j++) { if (fgResults[j].MostAbundant == true) { lastMatch = lastMatch - 4; if (lastMatch < 0) lastMatch = 0; for (int i = lastMatch; i < comhyp.Count; i++) { Double MatchingError = comhyp[i].MassWeight * modelParameters.MatchErrorEM * 0.000001; if ((fgResults[j].DeconRow.MonoisotopicMassWeight <= (comhyp[i].MassWeight + MatchingError)) && (fgResults[j].DeconRow.MonoisotopicMassWeight >= (comhyp[i].MassWeight - MatchingError))) { ResultsGroup GR = new ResultsGroup(); GR = matchPassbyValue(fgResults[j], comhyp[i]); sgResults.Add(GR); //Stuffs for feature numOfMatches++; moleWeightforA.Add(fgResults[j].DeconRow.MonoisotopicMassWeight); AARatio.Add(fgResults[j].AvgAA2List.Average()); lastMatch = i + 1; hasMatch = true; continue; } //Since the data is sorted, there are no more matches below that row, break it. if (fgResults[j].DeconRow.MonoisotopicMassWeight > (comhyp[i].MassWeight + MatchingError)) { if (hasMatch == false) { ResultsGroup GR = new ResultsGroup(); CompositionHypothesisEntry comhypi = new CompositionHypothesisEntry(); GR = fgResults[j]; GR.Match = false; GR.PredictedComposition = comhypi; sgResults.Add(GR); lastMatch = i; break; } else { hasMatch = false; break; } } } } } //##############Last part, this is to calculate the feature data needed for logistic regression################### //Expected A and Centroid Scan Error need linear regression. The models are built here separately. //In the this model. output is the Y axis and input is X. SimpleLinearRegression AA2regression = new SimpleLinearRegression(); List<double> aainput = new List<double>(); List<double> aaoutput = new List<double>(); //Centroid Scan Error List<double> ccinput = new List<double>(); List<double> ccoutput = new List<double>(); if (numOfMatches > 3) { for (int i = 0; i < sgResults.Count; i++) { if (sgResults[i].Match == true) { if (sgResults[i].AvgAA2List.Average() != 0) { aainput.Add(sgResults[i].DeconRow.MonoisotopicMassWeight); aaoutput.Add(sgResults[i].AvgAA2List.Average()); } if (sgResults[i].DeconRow.abundance > 250) { ccoutput.Add(sgResults[i].DeconRow.ScanNum); ccinput.Add(sgResults[i].DeconRow.MonoisotopicMassWeight); } } } } else { for (int i = 0; i < sgResults.Count; i++) { if (sgResults[i].AvgAA2List.Average() != 0) { aainput.Add(sgResults[i].DeconRow.MonoisotopicMassWeight); aaoutput.Add(sgResults[i].AvgAA2List.Average()); } if (sgResults[i].DeconRow.abundance > 250) { ccoutput.Add(sgResults[i].ScanNumList.Average()); ccinput.Add(sgResults[i].DeconRow.MonoisotopicMassWeight); } } } SimpleLinearRegression CSEregression = new SimpleLinearRegression(); CSEregression.Regress(ccinput.ToArray(), ccoutput.ToArray()); AA2regression.Regress(aainput.ToArray(), aaoutput.ToArray()); //The remaining features and input them into the grouping results for (int i = 0; i < sgResults.Count; i++) { //ScanDensiy is: Number of scan divided by (max scan number – min scan number) Double ScanDensity = new Double(); Int32 MaxScanNumber = sgResults[i].MaxScanNum; Int32 MinScanNumber = sgResults[i].MinScanNum; Double NumOfScan = sgResults[i].NumOfScan; List<Int32> numChargeStatesList = sgResults[i].ChargeStateList.Distinct().ToList(); Int32 numChargeStates = numChargeStatesList.Count; Double numModiStates = sgResults[i].NumModiStates; if ((MaxScanNumber - MinScanNumber) != 0) ScanDensity = NumOfScan / (MaxScanNumber - MinScanNumber + 15); else ScanDensity = 0; //Use this scandensity for all molecules in this grouping. sgResults[i].NumChargeStates = numChargeStates; sgResults[i].ScanDensity = ScanDensity; sgResults[i].NumModiStates = numModiStates; sgResults[i].CentroidScanLR = CSEregression.Compute(sgResults[i].DeconRow.MonoisotopicMassWeight); sgResults[i].CentroidScan = Math.Abs(sgResults[i].ScanNumList.Average() - sgResults[i].CentroidScanLR); sgResults[i].ExpectedA = Math.Abs(sgResults[i].AvgAA2List.Average() - AA2regression.Compute(sgResults[i].DeconRow.MonoisotopicMassWeight)); sgResults[i].AvgSigNoise = sgResults[i].AvgSigNoiseList.Average(); } for (int i = 0; i < sgResults.Count(); i++ ) { sgResults[i].PredictedComposition.ElementNames.Clear(); sgResults[i].PredictedComposition.MoleculeNames.Clear(); if (i == sgResults.Count() - 1) { sgResults[0].PredictedComposition.ElementNames = elementIDs; sgResults[0].PredictedComposition.MoleculeNames = molename; } } return sgResults; }
//this Grouping function performs the grouping. private List<ResultsGroup> Groupings(String filename, ParametersForm.ParameterSettings paradata) { GetDeconData DeconDATA1 = new GetDeconData(); List<DeconRow> sortedDeconData = new List<DeconRow>(); sortedDeconData = DeconDATA1.getdata(filename); //First, sort the list descendingly by its abundance. sortedDeconData = sortedDeconData.OrderByDescending(a => a.abundance).ToList(); //################Second, create a new list to store results from the first grouping.############### List<ResultsGroup> fgResults = new List<ResultsGroup>(); ResultsGroup GR2 = new ResultsGroup(); GR2.PredictedComposition = new CompositionHypothesisEntry(); Int32 currentMaxBin = new Int32(); currentMaxBin = 1; GR2.DeconRow = sortedDeconData[0]; GR2.MostAbundant = true; GR2.NumOfScan = 1; GR2.MinScanNum = sortedDeconData[0].ScanNum; GR2.MaxScanNum = sortedDeconData[0].ScanNum; GR2.ChargeStateList = new List<int>(); GR2.ChargeStateList.Add(sortedDeconData[0].charge); GR2.AvgSigNoiseList = new List<Double>(); GR2.AvgSigNoiseList.Add(sortedDeconData[0].SignalNoiseRatio); GR2.AvgAA2List = new List<double>(); GR2.AvgAA2List.Add(sortedDeconData[0].MonoisotopicAbundance / (sortedDeconData[0].MonoisotopicPlus2Abundance + 1)); GR2.ScanNumList = new List<Int32>(); GR2.ScanNumList.Add(sortedDeconData[0].ScanNum); GR2.NumModiStates = 1; GR2.TotalVolume = sortedDeconData[0].abundance * sortedDeconData[0].fwhm; GR2.ListAbundance = new List<double>(); GR2.ListAbundance.Add(sortedDeconData[0].abundance); GR2.ListMonoMassWeight = new List<double>(); GR2.ListMonoMassWeight.Add(sortedDeconData[0].MonoisotopicMassWeight); fgResults.Add(GR2); for (int j = 1; j < sortedDeconData.Count; j++) { for (int i = 0; i < fgResults.Count; i++) { //Obtain grouping error. Note: its in ppm, so it needs to be multiplied by 0.000001. Double GroupingError = fgResults[i].DeconRow.MonoisotopicMassWeight * paradata.GroupingErrorEG * 0.000001; if ((sortedDeconData[j].MonoisotopicMassWeight < (fgResults[i].DeconRow.MonoisotopicMassWeight + GroupingError) && (sortedDeconData[j].MonoisotopicMassWeight > (fgResults[i].DeconRow.MonoisotopicMassWeight - GroupingError)))) { if (fgResults[i].MaxScanNum < sortedDeconData[j].ScanNum) { fgResults[i].MaxScanNum = sortedDeconData[j].ScanNum; } else if (fgResults[i].MinScanNum > sortedDeconData[j].ScanNum) { fgResults[i].MinScanNum = sortedDeconData[j].ScanNum; } fgResults[i].NumOfScan = fgResults[i].NumOfScan + 1; fgResults[i].ScanNumList.Add(sortedDeconData[j].ScanNum); fgResults[i].TotalVolume = fgResults[i].TotalVolume + sortedDeconData[j].abundance * sortedDeconData[j].fwhm; fgResults[i].ChargeStateList.Add(sortedDeconData[j].charge); fgResults[i].AvgSigNoiseList.Add(sortedDeconData[j].SignalNoiseRatio); fgResults[i].AvgAA2List.Add(sortedDeconData[j].MonoisotopicAbundance / (sortedDeconData[j].MonoisotopicPlus2Abundance + 1)); fgResults[i].ListAbundance.Add(sortedDeconData[j].abundance); fgResults[i].ListMonoMassWeight.Add(sortedDeconData[j].MonoisotopicMassWeight); break; } if (i == fgResults.Count - 1) { ResultsGroup GR = new ResultsGroup(); GR.PredictedComposition = new CompositionHypothesisEntry(); currentMaxBin = currentMaxBin + 1; GR.DeconRow = sortedDeconData[j]; GR.MostAbundant = true; GR.NumOfScan = 1; GR.MinScanNum = sortedDeconData[j].ScanNum; GR.MaxScanNum = sortedDeconData[j].ScanNum; GR.ChargeStateList = new List<int>(); GR.ChargeStateList.Add(sortedDeconData[j].charge); GR.AvgSigNoiseList = new List<Double>(); GR.AvgSigNoiseList.Add(sortedDeconData[j].SignalNoiseRatio); GR.AvgAA2List = new List<double>(); GR.AvgAA2List.Add(sortedDeconData[j].MonoisotopicAbundance / (sortedDeconData[j].MonoisotopicPlus2Abundance + 1)); GR.ScanNumList = new List<int>(); GR.ScanNumList.Add(sortedDeconData[j].ScanNum); GR.NumModiStates = 1; GR.TotalVolume = sortedDeconData[j].abundance * sortedDeconData[j].fwhm; GR.ListAbundance = new List<double>(); GR.ListAbundance.Add(sortedDeconData[j].abundance); GR.ListMonoMassWeight = new List<double>(); GR.ListMonoMassWeight.Add(sortedDeconData[j].MonoisotopicMassWeight); fgResults.Add(GR); } } } //Lastly calculate the Average Weighted Abundance for (int y = 0; y < fgResults.Count(); y++) { Double sumofTopPart = 0; for (int z = 0; z < fgResults[y].ListMonoMassWeight.Count(); z++) { sumofTopPart = sumofTopPart + fgResults[y].ListMonoMassWeight[z] * fgResults[y].ListAbundance[z]; } fgResults[y].DeconRow.MonoisotopicMassWeight = sumofTopPart / fgResults[y].ListAbundance.Sum(); } //######################## Here is the second grouping for NH3. ################################ fgResults = fgResults.OrderBy(o => o.DeconRow.MonoisotopicMassWeight).ToList(); for (int i = 0; i < fgResults.Count - 1; i++) { if (fgResults[i].MostAbundant == true) { int numModStates = 1; for (int j = i + 1; j < fgResults.Count; j++) { Double AdductTolerance = fgResults[i].DeconRow.MonoisotopicMassWeight * paradata.AdductToleranceEA * 0.000001; if ((fgResults[i].DeconRow.MonoisotopicMassWeight >= (fgResults[j].DeconRow.MonoisotopicMassWeight - 17.02654911 * numModStates - AdductTolerance)) && (fgResults[i].DeconRow.MonoisotopicMassWeight <= (fgResults[j].DeconRow.MonoisotopicMassWeight - 17.02654911 * numModStates + AdductTolerance))) { //obtain max and min scan number if (fgResults[i].MaxScanNum < fgResults[j].MaxScanNum) { fgResults[i].MaxScanNum = fgResults[j].MaxScanNum; } else { fgResults[i].MaxScanNum = fgResults[i].MaxScanNum; } if (fgResults[i].MinScanNum > fgResults[j].MinScanNum) { fgResults[i].MinScanNum = fgResults[j].MinScanNum; } else { fgResults[i].MinScanNum = fgResults[i].MinScanNum; } //numOfScan fgResults[i].NumOfScan = fgResults[i].NumOfScan + fgResults[j].NumOfScan; fgResults[i].ScanNumList.AddRange(fgResults[j].ScanNumList); //ChargeStateList for (int h = 0; h < fgResults[j].ChargeStateList.Count; h++) { fgResults[i].ChargeStateList.Add(fgResults[j].ChargeStateList[h]); } //avgSigNoiseList for (int h = 0; h < fgResults[j].AvgSigNoiseList.Count; h++) { fgResults[i].AvgSigNoiseList.Add(fgResults[j].AvgSigNoiseList[h]); } //avgAA2List for (int h = 0; h < fgResults[j].AvgAA2List.Count; h++) { fgResults[i].AvgAA2List.Add(fgResults[j].AvgAA2List[h]); } //numModiStates numModStates++; fgResults[i].NumModiStates = fgResults[i].NumModiStates + 1; fgResults[j].MostAbundant = false; //TotalVolume fgResults[i].TotalVolume = fgResults[i].TotalVolume + fgResults[j].TotalVolume; if (fgResults[i].DeconRow.abundance < fgResults[j].DeconRow.abundance) { fgResults[i].DeconRow = fgResults[j].DeconRow; numModStates = 1; } } else if (fgResults[i].DeconRow.MonoisotopicMassWeight < (fgResults[j].DeconRow.MonoisotopicMassWeight - (17.02654911 + AdductTolerance * 2) * numModStates)) { //save running time. Since the list is sorted, any other mass below won't match as an adduct. break; } } } } //Implement the scan number threshold fgResults = fgResults.OrderByDescending(a => a.NumOfScan).ToList(); Int32 scanCutOff = fgResults.Count() + 1; for (int t = 0; t < fgResults.Count(); t++) { if (fgResults[t].NumOfScan < paradata.MinScanNumber) { scanCutOff = t; break; } } if (scanCutOff != fgResults.Count() + 1) { fgResults.RemoveRange(scanCutOff, fgResults.Count() - scanCutOff); } for (int i = 0; i < fgResults.Count(); i++) { fgResults[i].Match = false; } //##############Last part, this is to calculate the feature data needed for logistic regression################### //Expected A and Centroid Scan Error need linear regression. The models are built here separately. //In the this model. output is the Y axis and input is X. SimpleLinearRegression AA2regression = new SimpleLinearRegression(); List<double> aainput = new List<double>(); List<double> aaoutput = new List<double>(); //Centroid Scan Error List<double> ccinput = new List<double>(); List<double> ccoutput = new List<double>(); for (int i = 0; i < fgResults.Count; i++) { if (fgResults[i].AvgAA2List.Average() != 0) { aainput.Add(fgResults[i].DeconRow.MonoisotopicMassWeight); aaoutput.Add(fgResults[i].AvgAA2List.Average()); } if (fgResults[i].DeconRow.abundance > 250) { ccoutput.Add(fgResults[i].ScanNumList.Average()); ccinput.Add(fgResults[i].DeconRow.MonoisotopicMassWeight); } } SimpleLinearRegression CSEregression = new SimpleLinearRegression(); CSEregression.Regress(ccinput.ToArray(), ccoutput.ToArray()); AA2regression.Regress(aainput.ToArray(), aaoutput.ToArray()); //The remaining features and input them into the grouping results for (int i = 0; i < fgResults.Count; i++) { //ScanDensiy is: Number of scan divided by (max scan number – min scan number) Double ScanDensity = new Double(); Int32 MaxScanNumber = fgResults[i].MaxScanNum; Int32 MinScanNumber = fgResults[i].MinScanNum; Double NumOfScan = fgResults[i].NumOfScan; List<Int32> numChargeStatesList = fgResults[i].ChargeStateList.Distinct().ToList(); Int32 numChargeStates = numChargeStatesList.Count; Double numModiStates = fgResults[i].NumModiStates; if ((MaxScanNumber - MinScanNumber) != 0) ScanDensity = NumOfScan / (MaxScanNumber - MinScanNumber + 15); else ScanDensity = 0; //Use this scandensity for all molecules in this grouping. fgResults[i].NumChargeStates = numChargeStates; fgResults[i].ScanDensity = ScanDensity; fgResults[i].NumModiStates = numModiStates; fgResults[i].CentroidScanLR = CSEregression.Compute(fgResults[i].DeconRow.MonoisotopicMassWeight); fgResults[i].CentroidScan = Math.Abs(fgResults[i].ScanNumList.Average() - fgResults[i].CentroidScanLR); fgResults[i].ExpectedA = Math.Abs(fgResults[i].AvgAA2List.Average() - AA2regression.Compute(fgResults[i].DeconRow.MonoisotopicMassWeight)); fgResults[i].AvgSigNoise = fgResults[i].AvgSigNoiseList.Average(); } return fgResults; }
public void zero_inliers_test() { // Fix the random number generator Accord.Math.Random.Generator.Seed = 0; double[,] data = // This is the same data used in the RANSAC sample app { { 1.0, 0.79 }, { 3, 2.18 }, { 5, 5.99 }, { 7.0, 7.65 }, { 9.0, 9.55 }, { 11, 11.89 }, { 13, 13.73 }, { 15.0, 14.77 }, { 17.0, 18.00 }, { 1.2, 1.45 }, { 1.5, 1.18 }, { 1.8, 1.92 }, { 2.1, 1.47 }, { 2.4, 2.41 }, { 2.7, 2.35 }, { 3.0, 3.41 }, { 3.3, 3.78 }, { 3.6, 3.21 }, { 3.9, 4.76 }, { 4.2, 5.03 }, { 4.5, 4.19 }, { 4.8, 3.81 }, { 5.1, 6.07 }, { 5.4, 5.74 }, { 5.7, 6.39 }, { 6, 6.11 }, { 6.3, 6.86 }, { 6.6, 6.35 }, { 6.9, 7.9 }, { 7.2, 8.04 }, { 7.5, 8.48 }, { 7.8, 8.07 }, { 8.1, 8.22 }, { 8.4, 8.41 }, { 8.7, 9.4 }, { 9, 8.8 }, { 9.3, 8.44 }, { 9.6, 9.32 }, { 9.9, 9.18 }, { 10.2, 9.86 }, { 10.5, 10.16 }, { 10.8, 10.28 }, { 11.1, 11.07 }, { 11.4, 11.66 }, { 11.7, 11.13 }, { 12, 11.55 }, { 12.3, 12.62 }, { 12.6, 12.27 }, { 12.9, 12.33 }, { 13.2, 12.37 }, { 13.5, 12.75 }, { 13.8, 14.44 }, { 14.1, 14.71 }, { 14.4, 13.72 }, { 14.7, 14.54 }, { 15, 14.67 }, { 15.3, 16.04 }, { 15.6, 15.21 }, { 1, 3.9 }, { 2, 11.5 }, { 3.0, 13.0 }, { 4, 0.9 }, { 5, 5.5 }, { 6, 16.2 }, { 7.0, 0.8 }, { 8, 9.4 }, { 9, 9.5 }, { 10, 17.5 }, { 11.0, 6.3 }, { 12, 12.6 }, { 13, 1.5 }, { 14, 1.5 }, { 2.0, 10 }, { 3, 9 }, { 15, 2 }, { 15.5, 1.2 }, }; // First, fit simple linear regression directly for comparison reasons. double[] x = data.GetColumn(0); // Extract the independent variable double[] y = data.GetColumn(1); // Extract the dependent variable // Create a simple linear regression var regression = new SimpleLinearRegression(); // Estimate a line passing through the (x, y) points double sumOfSquaredErrors = regression.Regress(x, y); // Now, compute the values predicted by the // regression for the original input points double[] commonOutput = regression.Compute(x); // Now, fit simple linear regression using RANSAC int maxTrials = 1000; int minSamples = 20; double probability = 0.950; double errorThreshold = 1000; int count = 0; // Create a RANSAC algorithm to fit a simple linear regression var ransac = new RANSAC<SimpleLinearRegression>(minSamples) { Probability = probability, Threshold = errorThreshold, MaxEvaluations = maxTrials, // Define a fitting function Fitting = delegate(int[] sample) { // Retrieve the training data double[] inputs = x.Submatrix(sample); double[] outputs = y.Submatrix(sample); // Build a Simple Linear Regression model var r = new SimpleLinearRegression(); r.Regress(inputs, outputs); return r; }, // Define a check for degenerate samples Degenerate = delegate(int[] sample) { // In this case, we will not be performing such checks. return false; }, // Define a inlier detector function Distances = delegate(SimpleLinearRegression r, double threshold) { count++; List<int> inliers = new List<int>(); // Generate 0 inliers twice, then proceed as normal if (count > 2) { for (int i = 0; i < x.Length; i++) { // Compute error for each point double error = r.Compute(x[i]) - y[i]; // If the squared error is below the given threshold, // the point is considered to be an inlier. if (error * error < threshold) inliers.Add(i); } } return inliers.ToArray(); } }; // Now that the RANSAC hyperparameters have been specified, we can // compute another regression model using the RANSAC algorithm: int[] inlierIndices; SimpleLinearRegression robustRegression = ransac.Compute(data.Rows(), out inlierIndices); // Compute the output of the model fitted by RANSAC double[] ransacOutput = robustRegression.Compute(x); Assert.AreEqual(ransac.TrialsNeeded, 0); Assert.AreEqual(ransac.TrialsPerformed, 3); string a = inlierIndices.ToCSharp(); string b = ransacOutput.ToCSharp(); int[] expectedInliers = new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75 }; double[] expectedOutput = new double[] { 4.62124895918799, 5.37525473445784, 6.12926050972769, 6.88326628499754, 7.63727206026739, 8.39127783553724, 9.14528361080709, 9.89928938607694, 10.6532951613468, 4.69664953671498, 4.80975040300545, 4.92285126929593, 5.03595213558641, 5.14905300187689, 5.26215386816736, 5.37525473445784, 5.48835560074832, 5.6014564670388, 5.71455733332927, 5.82765819961975, 5.94075906591023, 6.05385993220071, 6.16696079849118, 6.28006166478166, 6.39316253107214, 6.50626339736262, 6.61936426365309, 6.73246512994357, 6.84556599623405, 6.95866686252453, 7.071767728815, 7.18486859510548, 7.29796946139596, 7.41107032768644, 7.52417119397691, 7.63727206026739, 7.75037292655787, 7.86347379284835, 7.97657465913882, 8.0896755254293, 8.20277639171978, 8.31587725801026, 8.42897812430073, 8.54207899059121, 8.65517985688169, 8.76828072317216, 8.88138158946264, 8.99448245575312, 9.1075833220436, 9.22068418833408, 9.33378505462455, 9.44688592091503, 9.55998678720551, 9.67308765349599, 9.78618851978646, 9.89928938607694, 10.0123902523674, 10.1254911186579, 4.62124895918799, 4.99825184682292, 5.37525473445784, 5.75225762209277, 6.12926050972769, 6.50626339736262, 6.88326628499754, 7.26026917263247, 7.63727206026739, 8.01427494790232, 8.39127783553724, 8.76828072317216, 9.14528361080709, 9.52228649844202, 4.99825184682292, 5.37525473445784, 9.89928938607694, 10.0877908298944 }; Assert.IsTrue(inlierIndices.IsEqual(expectedInliers)); Assert.IsTrue(ransacOutput.IsEqual(expectedOutput, 1e-10)); }