Esempio n. 1
0
        public double CheckNewBasisFast(Basis basis, Basis basisReflected, double newKnotVal, Func <double[][], double[], double[]> solver, ref double [][] transformedData)
        {
            if (transformedData == null)
            {
                List <Basis> tempNewBasises = new List <Basis>(this.Basises);
                tempNewBasises.Add(basis);
                tempNewBasises.Add(basisReflected);
                transformedData = Recalc(tempNewBasises, Regressors);
            }
            else
            {
                int ncol = transformedData[0].Length - 2;
                int nrow = transformedData.Length;

                bool b1Warning = true;
                bool b2Warning = true;

                double b1Temp = 0.0;
                double b2Temp = 0.0;

                for (int i = 0; i < nrow; i++)
                {
                    transformedData[i][ncol]     += basis.CalcFastDependedOnPrevious(Regressors[i], newKnotVal, basis.Hinges[basis.Hinges.Count - 1].Value, i);
                    transformedData[i][ncol + 1] += basisReflected.CalcFastDependedOnPrevious(Regressors[i], newKnotVal, basis.Hinges[basis.Hinges.Count - 1].Value, i);

                    if (b1Warning)
                    {
                        b1Warning = b1Temp == transformedData[i][ncol];
                        b1Temp    = transformedData[i][ncol];
                    }

                    if (b2Warning)
                    {
                        b2Warning = b2Temp == transformedData[i][ncol];
                        b2Temp    = transformedData[i][ncol];
                    }
                }

                if (b1Warning || b2Warning)
                {
                    return(double.MaxValue);
                }
                //Console.WriteLine("Warn {0} {1} !" + ++_warns, b1Temp, b2Temp);
            }

            //var tempNewRegressionCoefficients = RegressionToolkit.CalculateLeastSquares(transformedData, Y);
            var tempNewRegressionCoefficients = solver(transformedData, Y);
            var tempNewpredicted = RegressionToolkit.Predict(tempNewRegressionCoefficients.ToArray(), transformedData);
            var tempNewRSS       = RegressionToolkit.CalcRSS(tempNewpredicted.ToArray(), Y);

            return(tempNewRSS);
        }
Esempio n. 2
0
        //public double[] YTransformed { get; set; }

        public double CheckNewBasis(Basis basis, Basis basisReflected)
        {
            List <Basis> tempNewBasises = new List <Basis>(this.Basises);

            tempNewBasises.Add(basis);
            tempNewBasises.Add(basisReflected);

            var transformedData = Recalc(tempNewBasises, Regressors);
            var tempNewRegressionCoefficients = RegressionToolkit.CalculateLeastSquares(transformedData, Y);
            var tempNewpredicted = RegressionToolkit.Predict(tempNewRegressionCoefficients.ToArray(), transformedData);
            var tempNewRSS       = RegressionToolkit.CalcRSS(tempNewpredicted.ToArray(), Y);

            return(tempNewRSS);
        }
Esempio n. 3
0
        public void Recalc()
        {
            RegressorsTransformed = Recalc(this.Basises, Regressors);
            //_regressionCoefficients = RegressionToolkit.CalculateLeastSquares(RegressorsTransformed, Y);
            //regressionCoefficients = PrepareAndCalcCholesskyFull(RegressorsTransformed, Y).ToList();

            //Задать V нужный размер
            _v = ResizeV(this._v, RegressorsTransformed[0].Length);
            //А C можно пересчитать прямо тут
            _c = __calcC(RegressorsTransformed);

            _regressionCoefficients = PrepareAndCalcCholesskyNewColumns(RegressorsTransformed, Y).ToList();
            var predicted = RegressionToolkit.Predict(_regressionCoefficients.ToArray(), RegressorsTransformed);

            _RSS = RegressionToolkit.CalcRSS(predicted.ToArray(), Y);
            _RSq = RegressionToolkit.CalcRSq(predicted.ToArray(), Y);
        }
Esempio n. 4
0
        public double[] PrepareAndCalcCholesskyFull(double[][] x, double[] y)
        {
            List <double> bMeans;

            _v = __calcV(x, out bMeans);
            _c = __calcC(x);

            for (int i = 0; i < _v.Length; i++)
            {
                _v[i][i] += 0.001;
            }

            var regressionCoefficients = RegressionToolkit.CalculateCholesskyRegression(_v, _c);

            regressionCoefficients[0] = y.Average();

            for (int i = 1; i < regressionCoefficients.Count; i++)
            {
                regressionCoefficients[0] -= regressionCoefficients[i] * bMeans[i];
            }

            return(regressionCoefficients.ToArray());
        }
Esempio n. 5
0
        public double[] PrepareAndCalcCholesskyNewColumns(double[][] x, double[] y)
        {
            //Это должно вызываться после пересчета базисов с новым узлом
            if (_v.Length != x[0].Length ||
                _v[0].Length != x[0].Length ||
                _c.Length != x[0].Length ||
                _v.Length < 3)
            {
                //TODO: Не надо пересчитывать полностью, а только добавленные на предыдущей операции колонки
                _v = __calcV(x, out xhat);
            }
            else
            {
                //ПЕРЕСЧИТАТЬ ТОЛЬКО ПОСЛЕДНИЕ ДВА СТОЛБЦА И ПОСЛЕДНИЕ ДВЕ КОЛОНКИ V
                int f0 = _v.Length - 2;
                int f1 = _v.Length - 1;

                xhat[f0] = __calcMean(x, f0);
                xhat[f1] = __calcMean(x, f1);
                //Зануляем то, что будем считать
                for (int i = f0; i <= f1; i++) //две колонки
                {
                    for (int j = 0; j < _v.Length; j++)
                    {
                        _v[i][j] = 0.0;
                        _v[j][i] = 0.0;
                    }
                }

                //считаем
                for (int k = 0; k < x.Length; k++)
                {
                    for (int i = f0; i <= f1; i++)   //две колонки
                    {
                        for (int j = 0; j <= i; j++) // все фичи (Bj)
                        {
                            _v[i][j] += x[k][j] * (x[k][i] - xhat[i]);
                            if (i != j)
                            {
                                _v[j][i] += x[k][i] * (x[k][j] - xhat[j]);
                            }
                        }
                    }
                }
            }
            //ВЗЯТО ИЗ СТАРОЙ РЕАЛИЗАЦИИ (пока)
            for (int i = 0; i < _v.Length; i++)
            {
                _v[i][i] += 0.001;
            }

            //СОПОСТАВЛЕНИЕ
            //for (int i = 0; i < _v.Length; i++)
            //    for (int j = 0; j < _v.Length; j++)
            //    {
            //        var d1 = _v[i][j] - VDEBUG[i][j];
            //        var d2 = _v[j][i] - VDEBUG[j][i];
            //        if (Math.Abs(d1) >= 0.1 || Math.Abs(d2) >= 0.1)
            //        {
            //            Console.WriteLine("DEBUG : {0}", d1);
            //            Console.WriteLine("DEBUG : {0}", d2);
            //        }
            //    }

            _c = __calcC(x);

            var regressionCoefficients = RegressionToolkit.CalculateCholesskyRegression(_v, _c);

            regressionCoefficients[0] = y.Average();

            for (int i = 1; i < regressionCoefficients.Count; i++)
            {
                regressionCoefficients[0] -= regressionCoefficients[i] * xhat[i];
            }

            return(regressionCoefficients.ToArray());
        }
Esempio n. 6
0
        public List <double> Predict(string value)
        {
            m = new Model(GetX(value), GetColumnDouble("mpg").ToArray());

            //hinge test
            var xs = GetX("mpg");


            int    MAX_HINGES_IN_BASIS = 30;
            int    MAX_BASISES         = 15;
            double MAX_DELTA_RSS       = 0.00000001;
            int    DATASET_ROWS        = m.Y.Length;

            //B0
            m.AddBasis(new Basis(null, null, 1.0, DATASET_ROWS), null);

            do
            {
                int solutions = 0;

                for (int i = 0; i < m.Basises.Count; i++)
                {
                    double PotentialRSS = m.RSS;

                    bool betterFound = false;

                    int varN = 0;
                    int valN = 0;

                    double[][] bData;

                    //There is one restriction put on the formation of model terms: each input
                    //can appear at most once in a product.
                    for (int j = 0; j < m.Regressors[0].Length; j++)
                    {
                        if (m.Basises[i].IsInputAppearsInProduct(j))
                        {
                            continue;
                        }

                        if (m.Basises[i].HingesCount >= MAX_HINGES_IN_BASIS)
                        {
                            break;
                        }

                        Hinge h          = new Hinge(j, 0.0);
                        Hinge hReflected = h.ConstructNegative();

                        Basis b          = new Basis(m.Basises[i], h, DATASET_ROWS);
                        Basis bReflected = new Basis(m.Basises[i], hReflected, DATASET_ROWS);

                        bData = null;

                        int[] kOrdered = m.GetArrayOrder(m.GetRegressorsColumn(j));
                        m.uValue1 = double.MinValue;
                        m.uValue2 = double.MinValue;
                        for (int k = m.Regressors.Length - 2; k >= 0; k--)
                        {
                            //каков индекс в регрессорах k по убыванию элемента
                            int ki0 = kOrdered[k];
                            int ki1 = kOrdered[k + 1];     //kOrdered[ki1] должно быть больше или равно нулевого, у нас же убывающий порядок

                            double k0 = m.Regressors[ki0][j];
                            double k1 = m.Regressors[ki1][j];

                            double kdiff = k1 - k0;

                            if (kdiff < 0)
                            {
                                throw new Exception("t should be <= u !");
                            }
                            //k0  -- t
                            //k1  -- u
                            h.Value          = k0;
                            hReflected.Value = k0;

                            //double rss = m.CheckNewBasisCholessky(b, bReflected);
                            double rss = m.CheckNewBasisCholeskyFast(b, bReflected, k1, ref bData);
                            //double rss = m.CheckNewBasisEquation52(b, bReflected, 0.0, ref bData);
                            if (rss == double.MaxValue)
                            {
                                continue;
                            }
                            Console.WriteLine("Cholessky rss = " + rss);
                            // rss = m.CheckNewBasisFast(b, bReflected, 0.0, ref bData);
                            //Console.WriteLine("Fast rss = " + rss);
                            //double rss = m.CheckNewBasis(b, bReflected);

                            if (rss < PotentialRSS)
                            {
                                PotentialRSS = rss;
                                varN         = j;
                                valN         = k;
                                betterFound  = true;
                            }
                        }
                    }

                    if (betterFound)
                    {
                        solutions++;

                        Hinge winnerHinge          = new Hinge(varN, m.Regressors[valN][varN]);
                        Hinge winnerHingeReflected = winnerHinge.ConstructNegative();

                        Basis winnerBasis          = new Basis(m.Basises[i], winnerHinge, DATASET_ROWS);
                        Basis winnerBasisReflected = new Basis(m.Basises[i], winnerHingeReflected, DATASET_ROWS);

                        m.AddBasis(winnerBasis, winnerBasisReflected);
                        if (m.Basises.Count >= MAX_BASISES)
                        {
                            break;
                        }
                    }
                }


                if (solutions == 0)
                {
                    break;                 //no solutions anymore which decrease RSS
                }
                if (m.Basises.Count >= MAX_BASISES)
                {
                    break;
                }
                if (m.Basises.Any(b => b.HingesCount > MAX_HINGES_IN_BASIS))
                {
                    break;
                }
            }while (true);


            //Pruning pass


            double [] GCVs = new double[m.Basises.Count];

            using (System.IO.StreamWriter t = new StreamWriter("output.txt"))
            {
                t.WriteLine(RegressionToolkit.DoubleToR(m.RegressorsTransformed));
                t.WriteLine(RegressionToolkit.DoubleToR(m.Y));
            }

            do
            {
                double  lowestGCV      = 1000000.0;
                int     lowestGCVIndex = 1;
                Basis[] tempBasises    = new Basis[m.Basises.Count];
                m.Basises.CopyTo(tempBasises);

                for (int i = 1; i < m.Basises.Count; i++)
                {
                    m.RemoveBasisAt(i);
                    if (m.GCV < lowestGCV)
                    {
                        lowestGCV      = m.GCV;
                        lowestGCVIndex = i;
                    }
                    m.ResetBasis(tempBasises);
                    //using (System.IO.StreamWriter t = new StreamWriter("output.txt"))
                    //{
                    //    t.WriteLine(RegressionToolkit.DoubleToR(m.RegressorsTransformed));
                    //}
                }
                m.RemoveBasisAt(lowestGCVIndex);

                if (m.Basises.Count == 3)
                {
                    break;
                }
            }while (true);

            Console.WriteLine(Model._warns);
            Console.WriteLine(RegressionToolkit._bad);
            Console.WriteLine(RegressionToolkit._good);
            return(new List <double>());
        }