public double CheckNewBasisFast(Basis basis, Basis basisReflected, double newKnotVal, Func <double[][], double[], double[]> solver, ref double [][] transformedData) { if (transformedData == null) { List <Basis> tempNewBasises = new List <Basis>(this.Basises); tempNewBasises.Add(basis); tempNewBasises.Add(basisReflected); transformedData = Recalc(tempNewBasises, Regressors); } else { int ncol = transformedData[0].Length - 2; int nrow = transformedData.Length; bool b1Warning = true; bool b2Warning = true; double b1Temp = 0.0; double b2Temp = 0.0; for (int i = 0; i < nrow; i++) { transformedData[i][ncol] += basis.CalcFastDependedOnPrevious(Regressors[i], newKnotVal, basis.Hinges[basis.Hinges.Count - 1].Value, i); transformedData[i][ncol + 1] += basisReflected.CalcFastDependedOnPrevious(Regressors[i], newKnotVal, basis.Hinges[basis.Hinges.Count - 1].Value, i); if (b1Warning) { b1Warning = b1Temp == transformedData[i][ncol]; b1Temp = transformedData[i][ncol]; } if (b2Warning) { b2Warning = b2Temp == transformedData[i][ncol]; b2Temp = transformedData[i][ncol]; } } if (b1Warning || b2Warning) { return(double.MaxValue); } //Console.WriteLine("Warn {0} {1} !" + ++_warns, b1Temp, b2Temp); } //var tempNewRegressionCoefficients = RegressionToolkit.CalculateLeastSquares(transformedData, Y); var tempNewRegressionCoefficients = solver(transformedData, Y); var tempNewpredicted = RegressionToolkit.Predict(tempNewRegressionCoefficients.ToArray(), transformedData); var tempNewRSS = RegressionToolkit.CalcRSS(tempNewpredicted.ToArray(), Y); return(tempNewRSS); }
//public double[] YTransformed { get; set; } public double CheckNewBasis(Basis basis, Basis basisReflected) { List <Basis> tempNewBasises = new List <Basis>(this.Basises); tempNewBasises.Add(basis); tempNewBasises.Add(basisReflected); var transformedData = Recalc(tempNewBasises, Regressors); var tempNewRegressionCoefficients = RegressionToolkit.CalculateLeastSquares(transformedData, Y); var tempNewpredicted = RegressionToolkit.Predict(tempNewRegressionCoefficients.ToArray(), transformedData); var tempNewRSS = RegressionToolkit.CalcRSS(tempNewpredicted.ToArray(), Y); return(tempNewRSS); }
public void Recalc() { RegressorsTransformed = Recalc(this.Basises, Regressors); //_regressionCoefficients = RegressionToolkit.CalculateLeastSquares(RegressorsTransformed, Y); //regressionCoefficients = PrepareAndCalcCholesskyFull(RegressorsTransformed, Y).ToList(); //Задать V нужный размер _v = ResizeV(this._v, RegressorsTransformed[0].Length); //А C можно пересчитать прямо тут _c = __calcC(RegressorsTransformed); _regressionCoefficients = PrepareAndCalcCholesskyNewColumns(RegressorsTransformed, Y).ToList(); var predicted = RegressionToolkit.Predict(_regressionCoefficients.ToArray(), RegressorsTransformed); _RSS = RegressionToolkit.CalcRSS(predicted.ToArray(), Y); _RSq = RegressionToolkit.CalcRSq(predicted.ToArray(), Y); }
public double[] PrepareAndCalcCholesskyFull(double[][] x, double[] y) { List <double> bMeans; _v = __calcV(x, out bMeans); _c = __calcC(x); for (int i = 0; i < _v.Length; i++) { _v[i][i] += 0.001; } var regressionCoefficients = RegressionToolkit.CalculateCholesskyRegression(_v, _c); regressionCoefficients[0] = y.Average(); for (int i = 1; i < regressionCoefficients.Count; i++) { regressionCoefficients[0] -= regressionCoefficients[i] * bMeans[i]; } return(regressionCoefficients.ToArray()); }
public double[] PrepareAndCalcCholesskyNewColumns(double[][] x, double[] y) { //Это должно вызываться после пересчета базисов с новым узлом if (_v.Length != x[0].Length || _v[0].Length != x[0].Length || _c.Length != x[0].Length || _v.Length < 3) { //TODO: Не надо пересчитывать полностью, а только добавленные на предыдущей операции колонки _v = __calcV(x, out xhat); } else { //ПЕРЕСЧИТАТЬ ТОЛЬКО ПОСЛЕДНИЕ ДВА СТОЛБЦА И ПОСЛЕДНИЕ ДВЕ КОЛОНКИ V int f0 = _v.Length - 2; int f1 = _v.Length - 1; xhat[f0] = __calcMean(x, f0); xhat[f1] = __calcMean(x, f1); //Зануляем то, что будем считать for (int i = f0; i <= f1; i++) //две колонки { for (int j = 0; j < _v.Length; j++) { _v[i][j] = 0.0; _v[j][i] = 0.0; } } //считаем for (int k = 0; k < x.Length; k++) { for (int i = f0; i <= f1; i++) //две колонки { for (int j = 0; j <= i; j++) // все фичи (Bj) { _v[i][j] += x[k][j] * (x[k][i] - xhat[i]); if (i != j) { _v[j][i] += x[k][i] * (x[k][j] - xhat[j]); } } } } } //ВЗЯТО ИЗ СТАРОЙ РЕАЛИЗАЦИИ (пока) for (int i = 0; i < _v.Length; i++) { _v[i][i] += 0.001; } //СОПОСТАВЛЕНИЕ //for (int i = 0; i < _v.Length; i++) // for (int j = 0; j < _v.Length; j++) // { // var d1 = _v[i][j] - VDEBUG[i][j]; // var d2 = _v[j][i] - VDEBUG[j][i]; // if (Math.Abs(d1) >= 0.1 || Math.Abs(d2) >= 0.1) // { // Console.WriteLine("DEBUG : {0}", d1); // Console.WriteLine("DEBUG : {0}", d2); // } // } _c = __calcC(x); var regressionCoefficients = RegressionToolkit.CalculateCholesskyRegression(_v, _c); regressionCoefficients[0] = y.Average(); for (int i = 1; i < regressionCoefficients.Count; i++) { regressionCoefficients[0] -= regressionCoefficients[i] * xhat[i]; } return(regressionCoefficients.ToArray()); }
public List <double> Predict(string value) { m = new Model(GetX(value), GetColumnDouble("mpg").ToArray()); //hinge test var xs = GetX("mpg"); int MAX_HINGES_IN_BASIS = 30; int MAX_BASISES = 15; double MAX_DELTA_RSS = 0.00000001; int DATASET_ROWS = m.Y.Length; //B0 m.AddBasis(new Basis(null, null, 1.0, DATASET_ROWS), null); do { int solutions = 0; for (int i = 0; i < m.Basises.Count; i++) { double PotentialRSS = m.RSS; bool betterFound = false; int varN = 0; int valN = 0; double[][] bData; //There is one restriction put on the formation of model terms: each input //can appear at most once in a product. for (int j = 0; j < m.Regressors[0].Length; j++) { if (m.Basises[i].IsInputAppearsInProduct(j)) { continue; } if (m.Basises[i].HingesCount >= MAX_HINGES_IN_BASIS) { break; } Hinge h = new Hinge(j, 0.0); Hinge hReflected = h.ConstructNegative(); Basis b = new Basis(m.Basises[i], h, DATASET_ROWS); Basis bReflected = new Basis(m.Basises[i], hReflected, DATASET_ROWS); bData = null; int[] kOrdered = m.GetArrayOrder(m.GetRegressorsColumn(j)); m.uValue1 = double.MinValue; m.uValue2 = double.MinValue; for (int k = m.Regressors.Length - 2; k >= 0; k--) { //каков индекс в регрессорах k по убыванию элемента int ki0 = kOrdered[k]; int ki1 = kOrdered[k + 1]; //kOrdered[ki1] должно быть больше или равно нулевого, у нас же убывающий порядок double k0 = m.Regressors[ki0][j]; double k1 = m.Regressors[ki1][j]; double kdiff = k1 - k0; if (kdiff < 0) { throw new Exception("t should be <= u !"); } //k0 -- t //k1 -- u h.Value = k0; hReflected.Value = k0; //double rss = m.CheckNewBasisCholessky(b, bReflected); double rss = m.CheckNewBasisCholeskyFast(b, bReflected, k1, ref bData); //double rss = m.CheckNewBasisEquation52(b, bReflected, 0.0, ref bData); if (rss == double.MaxValue) { continue; } Console.WriteLine("Cholessky rss = " + rss); // rss = m.CheckNewBasisFast(b, bReflected, 0.0, ref bData); //Console.WriteLine("Fast rss = " + rss); //double rss = m.CheckNewBasis(b, bReflected); if (rss < PotentialRSS) { PotentialRSS = rss; varN = j; valN = k; betterFound = true; } } } if (betterFound) { solutions++; Hinge winnerHinge = new Hinge(varN, m.Regressors[valN][varN]); Hinge winnerHingeReflected = winnerHinge.ConstructNegative(); Basis winnerBasis = new Basis(m.Basises[i], winnerHinge, DATASET_ROWS); Basis winnerBasisReflected = new Basis(m.Basises[i], winnerHingeReflected, DATASET_ROWS); m.AddBasis(winnerBasis, winnerBasisReflected); if (m.Basises.Count >= MAX_BASISES) { break; } } } if (solutions == 0) { break; //no solutions anymore which decrease RSS } if (m.Basises.Count >= MAX_BASISES) { break; } if (m.Basises.Any(b => b.HingesCount > MAX_HINGES_IN_BASIS)) { break; } }while (true); //Pruning pass double [] GCVs = new double[m.Basises.Count]; using (System.IO.StreamWriter t = new StreamWriter("output.txt")) { t.WriteLine(RegressionToolkit.DoubleToR(m.RegressorsTransformed)); t.WriteLine(RegressionToolkit.DoubleToR(m.Y)); } do { double lowestGCV = 1000000.0; int lowestGCVIndex = 1; Basis[] tempBasises = new Basis[m.Basises.Count]; m.Basises.CopyTo(tempBasises); for (int i = 1; i < m.Basises.Count; i++) { m.RemoveBasisAt(i); if (m.GCV < lowestGCV) { lowestGCV = m.GCV; lowestGCVIndex = i; } m.ResetBasis(tempBasises); //using (System.IO.StreamWriter t = new StreamWriter("output.txt")) //{ // t.WriteLine(RegressionToolkit.DoubleToR(m.RegressorsTransformed)); //} } m.RemoveBasisAt(lowestGCVIndex); if (m.Basises.Count == 3) { break; } }while (true); Console.WriteLine(Model._warns); Console.WriteLine(RegressionToolkit._bad); Console.WriteLine(RegressionToolkit._good); return(new List <double>()); }