/// <summary> /// Centers data to have mean zero along axis 0. This is here because /// nearly all linear models will want their data to be centered. /// If sample_weight is not None, then the weighted mean of X and y /// is zero, and not the mean itself /// </summary> /// <param name="x"></param> /// <param name="y"></param> /// <param name="fitIntercept"></param> /// <param name="normalize"></param> /// <param name="sampleWeight"></param> internal static CenterDataResult CenterData( Matrix<double> x, Matrix<double> y, bool fitIntercept, bool normalize = false, Vector<double> sampleWeight = null) { Vector<double> xMean; Vector<double> yMean = new DenseVector(y.ColumnCount); Vector<double> xStd; if (fitIntercept) { if (x is SparseMatrix) { xMean = DenseVector.Create(x.ColumnCount, i => 0.0); xStd = DenseVector.Create(x.ColumnCount, i => 1.0); } else { if (sampleWeight == null) { xMean = x.MeanOfEveryColumn(); } else { xMean = x.MulColumnVector(sampleWeight).SumOfEveryColumn().Divide(sampleWeight.Sum()); } x = x.SubtractRowVector(xMean); if (normalize) { xStd = new DenseVector(x.ColumnCount); foreach (var row in x.RowEnumerator()) { xStd.Add(row.Item2.PointwiseMultiply(row.Item2), xStd); } xStd.MapInplace(Math.Sqrt); for (int i = 0; i < xStd.Count; i++) { if (xStd[i] == 0) { xStd[i] = 1; } } x.DivRowVector(xStd, x); } else { xStd = DenseVector.Create(x.ColumnCount, i => 1.0); } } if (sampleWeight == null) { yMean = y.MeanOfEveryColumn(); } else { yMean = y.MulColumnVector(sampleWeight).SumOfEveryColumn() / sampleWeight.Sum(); } y = y.Clone(); y = y.SubtractRowVector(yMean); } else { xMean = DenseVector.Create(x.ColumnCount, i => 0); xStd = DenseVector.Create(x.ColumnCount, i => 1); } return new CenterDataResult { X = x, Y = y, xMean = xMean, yMean = yMean, xStd = xStd }; }