コード例 #1
0
        //this is asych for the calling Task.WhenAll
        //but does not necessarily need internal asych awaits
        public async Task RunAlgorithmAsync(List <List <double> > data)
        {
            try
            {
                //minimal data requirement is first five cols
                if (_colNames.Count() < 5 ||
                    _mathTerms.Count() == 0)
                {
                    ErrorMessage = "Regression requires at least one dependent variable and one independent variable.";
                    return;
                }
                if (data.Count() < 5)
                {
                    //185 same as other analysis
                    ErrorMessage = "Regression requires at least 2 rows of observed data and 3 rows of scoring data.";
                    return;
                }
                //convert data to a Math.Net Matrix
                //v185 uses same ci technique as algos 2,3 and 4 -last 3 rows are used to generate ci
                List <List <double> > dataci = data.Skip(data.Count - _scoreRows).ToList();
                data.Reverse();
                List <List <double> > dataobs = data.Skip(_scoreRows).ToList();
                dataobs.Reverse();
                //actual observed values
                Vector <double> y  = Shared.GetYData(dataobs);
                Matrix <double> x  = Shared.GetDoubleMatrix(dataobs, _colNames, _depColNames);
                Matrix <double> ci = Shared.GetDoubleMatrix(dataci, _colNames, _depColNames);

                //model expected values - get the coefficents
                //use normal equations regression
                Vector <double> p = MultipleRegression.NormalEquations(x, y);
                //but note that this runs without errors in more cases but still does not give good results
                //Vector<double> p = MultipleRegression.QR(x, y);

                if (p.Count() != ci.Row(_scoreRows - 1).Count())
                {
                    //185 same as other analysis
                    ErrorMessage = "The scoring and training datasets have different numbers of columns.";
                    return;
                }
                //get the predicted yhats
                Vector <double> yhat = GetYHatandSetQTPred(y.Count, x, p, ci.Row(_scoreRows - 1).ToArray());
                //get the durbin-watson d statistic
                double d   = GetDurbinWatson(y, yhat);
                double SSE = 0;
                //sum of the square of the error (between the predicted, p, and observed, y);
                SSE = Distance.SSD(yhat, y);
                double rSquared = GoodnessOfFit.RSquared(yhat, y);
                //sum of the square of the regression (between the predicted, p, and observed mean, statsY.Mean);
                double SSR = 0;
                for (int i = 0; i < yhat.Count(); i++)
                {
                    SSR += Math.Pow((yhat[i] - y.Mean()), 2);
                }
                //set joint vars properties
                //degrees freedom
                double dfR         = x.ColumnCount - 1;
                double dfE         = x.RowCount - x.ColumnCount;
                int    idfR        = x.ColumnCount - 1;
                int    idfE        = x.RowCount - x.ColumnCount;
                double s2          = SSE / dfE;
                double s           = Math.Sqrt(s2);
                double MSR         = SSR / dfR;
                double MSE         = SSE / dfE;
                double FValue      = MSR / MSE;
                double adjRSquared = 1 - ((x.RowCount - 1) * (MSE / (SSE + SSR)));
                double pValue      = Shared.GetPValueForFDist(idfR, idfE, FValue);

                //correct 2 tailed t test
                //double TCritialValue = ExcelFunctions.TInv(0.05, idfE);
                //so do this
                double dbCI           = CalculatorHelpers.GetConfidenceIntervalProb(_confidenceInt);
                double tCriticalValue = ExcelFunctions.TInv(dbCI, idfE);
                //set each coeff properties
                //coeffs st error
                //use matrix math to get the standard error of coefficients
                Matrix <double> xt = x.Transpose();
                //matrix x'x
                Matrix <double> xx       = xt.Multiply(x);
                Matrix <double> xxminus1 = xx.Inverse();

                double   sxx  = 0;
                double[] xiSE = new double[x.ColumnCount];
                //coeff tstats
                double[] xiT = new double[x.ColumnCount];
                //lower value for pvalue
                double[] xiP = new double[x.ColumnCount];
                for (int i = 0; i < x.ColumnCount; i++)
                {
                    //use the matrix techniques shown on p 717 of Mendenhall and Sincich
                    sxx     = s * Math.Sqrt(xxminus1.Column(i)[i]);
                    xiSE[i] = sxx;
                    xiT[i]  = p[i] / sxx;
                    xiP[i]  = Shared.GetPValueForTDist(idfE, xiT[i], 0, 1);
                }
                double FCriticalValue    = 0;
                string FGreaterFCritical = string.Empty;
                if (_subalgorithm == Calculator1.MATH_SUBTYPES.subalgorithm8.ToString())
                {
                    //anova regression
                    //anova critical fvalue test
                    //FCriticalValue = ExcelFunctions.FInv(1 - _confidenceInt, idfR, idfE);
                    FCriticalValue    = ExcelFunctions.FInv(dbCI, idfR, idfE);
                    FGreaterFCritical = (FValue > FCriticalValue) ? "true" : "false";
                    SetAnovaIntervals(0, p, xiSE, tCriticalValue);
                    SetAnovaIntervals(1, p, xiSE, tCriticalValue);
                    SetAnovaIntervals(2, p, xiSE, tCriticalValue);
                }
                else
                {
                    //set QTM ci and pi intervals
                    SetQTIntervals(0, s, xxminus1, ci.Row(_scoreRows - 1).ToArray(), p, tCriticalValue);
                    SetQTIntervals(1, s, xxminus1, ci.Row(_scoreRows - 2).ToArray(), p, tCriticalValue);
                    SetQTIntervals(2, s, xxminus1, ci.Row(_scoreRows - 3).ToArray(), p, tCriticalValue);
                }
                //add the data to a string builder
                StringBuilder sb = new StringBuilder();
                sb.AppendLine("regression results");
                //dep var has to be in the 4 column always
                string sLine = string.Concat("dependent variable:  ", _colNames[3]);
                sb.AppendLine(sLine);
                string[] cols = new string[] { "source", "df", "SS", "MS" };
                sb.AppendLine(Shared.GetLine(cols, true));
                cols = new string[] { "model", dfR.ToString("F0"), SSR.ToString("F4"), MSR.ToString("F4") };
                sb.AppendLine(Shared.GetLine(cols, false));
                cols = new string[] { "error  ", dfE.ToString("F0"), SSE.ToString("F4"), MSE.ToString("F4") };
                sb.AppendLine(Shared.GetLine(cols, false));
                cols = new string[] { "total    ", (dfR + dfE).ToString("F0"), (SSR + SSE).ToString("F4") };
                sb.AppendLine(Shared.GetLine(cols, false));
                sb.AppendLine(string.Empty);
                cols = new string[] { "R-squared", rSquared.ToString("F4"), "Adj R-squared", adjRSquared.ToString("F4") };
                sb.AppendLine(Shared.GetLine(cols, false));
                cols = new string[] { "F value", FValue.ToString("F4"), "prob > F", pValue.ToString("F4") };
                sb.AppendLine(Shared.GetLine(cols, false));
                sb.AppendLine(string.Empty);
                cols = new string[] { GetName("variable"), "coefficient", "stand error", "T-ratio", "prob > T" };
                sb.AppendLine(Shared.GetLine(cols, true));
                for (int i = 0; i < p.Count(); i++)
                {
                    if (i == 0)
                    {
                        cols = new string[] { GetName(_depColNames[i]), p[i].ToString("F5"), xiSE[i].ToString("F4"), xiT[i].ToString("F4"), xiP[i].ToString("F4") };
                        sb.AppendLine(Shared.GetLine(cols, false));
                    }
                    else
                    {
                        cols = new string[] { GetName(_depColNames[i]), p[i].ToString("F5"), xiSE[i].ToString("F4"), xiT[i].ToString("F4"), xiP[i].ToString("F4") };
                        sb.AppendLine(Shared.GetLine(cols, false));
                    }
                }
                cols = new string[] { "durbin-watson: ", d.ToString("F4") };
                sb.AppendLine(Shared.GetLine(cols, false));
                if (_subalgorithm == Calculator1.MATH_SUBTYPES.subalgorithm8.ToString())
                {
                    cols = new string[] { "F Critical Value", FCriticalValue.ToString("F5"), "F > F Critical", FGreaterFCritical };
                    sb.AppendLine(Shared.GetLine(cols, true));
                    cols = new string[] { "estimate", "predicted", string.Concat("lower ", _confidenceInt.ToString(), "%"), string.Concat("upper ", _confidenceInt.ToString(), "%") };
                    sb.AppendLine(Shared.GetLine(cols, true));
                    cols = new string[] { "Col 0 Mean CI ", QTPredicted.ToString("F4"), QTL.ToString("F4"), QTU.ToString("F4") };
                    sb.AppendLine(Shared.GetLine(cols, false));
                    cols = new string[] { "Col 1 - 0 Mean CI ", QTPredicted10.ToString("F4"), QTL10.ToString("F4"), QTU10.ToString("F4") };
                    sb.AppendLine(Shared.GetLine(cols, false));
                    cols = new string[] { "Col 2 - 0 Mean CI ", QTPredicted20.ToString("F4"), QTL20.ToString("F4"), QTU20.ToString("F4") };
                    sb.AppendLine(Shared.GetLine(cols, false));
                }
                else
                {
                    cols = new string[] { "estimate", "predicted", string.Concat("lower ", _confidenceInt.ToString(), "%"), string.Concat("upper ", _confidenceInt.ToString(), "%") };
                    sb.AppendLine(Shared.GetLine(cols, true));
                    cols = new string[] { "QTM CI ", QTPredicted.ToString("F4"), QTL.ToString("F4"), QTU.ToString("F4") };
                    sb.AppendLine(Shared.GetLine(cols, false));
                    cols = new string[] { "QTM PI ", QTPredicted.ToString("F4"), (QTPredicted - QTPI).ToString("F4"), (QTPredicted + QTPI).ToString("F4") };
                    sb.AppendLine(Shared.GetLine(cols, false));
                    string sRow = string.Concat("row ", data.Count - 2);
                    cols = new string[] { sRow };
                    sb.AppendLine(Shared.GetLine(cols, true));
                    cols = new string[] { "CI ", QTPredicted10.ToString("F4"), QTL10.ToString("F4"), QTU10.ToString("F4") };
                    sb.AppendLine(Shared.GetLine(cols, false));
                    cols = new string[] { "PI ", QTPredicted10.ToString("F4"), (QTPredicted10 - QTPI10).ToString("F4"), (QTPredicted10 + QTPI10).ToString("F4") };
                    sb.AppendLine(Shared.GetLine(cols, false));
                    sRow = string.Concat("row ", data.Count - 1);
                    cols = new string[] { sRow };
                    sb.AppendLine(Shared.GetLine(cols, true));
                    cols = new string[] { "CI ", QTPredicted20.ToString("F4"), QTL20.ToString("F4"), QTU20.ToString("F4") };
                    sb.AppendLine(Shared.GetLine(cols, false));
                    cols = new string[] { "PI ", QTPredicted20.ToString("F4"), (QTPredicted20 - QTPI20).ToString("F4"), (QTPredicted20 + QTPI20).ToString("F4") };
                    sb.AppendLine(Shared.GetLine(cols, false));
                }
                if (this.MathResult.ToLower().StartsWith("http"))
                {
                    string sError    = string.Empty;
                    bool   bHasSaved = CalculatorHelpers.SaveTextInURI(
                        _params.ExtensionDocToCalcURI, sb.ToString(), this.MathResult, out sError);
                    if (!string.IsNullOrEmpty(sError))
                    {
                        this.MathResult += sError;
                    }
                }
                else
                {
                    this.MathResult = sb.ToString();
                }
            }
            catch (Exception ex)
            {
                this.ErrorMessage = ex.Message;
            }
        }
コード例 #2
0
        public static Vector <double> GetNormalizedVector(
            string subIndNormType, double startValue,
            bool scaleUp4Digits, double[] subIndicatorData)
        {
            //normalize them
            var             stats   = new MathNet.Numerics.Statistics.DescriptiveStatistics(subIndicatorData);
            Vector <double> siIndex = Vector <double> .Build.Dense(subIndicatorData);

            if (subIndNormType == CalculatorHelpers.NORMALIZATION_TYPES.none.ToString() ||
                string.IsNullOrEmpty(subIndNormType))
            {
                //data has already been normalized
            }
            else if (subIndNormType == CalculatorHelpers.NORMALIZATION_TYPES.zscore.ToString())
            {
                //z-score: (x – mean(x)) / stddev(x)
                for (int x = 0; x < siIndex.Count; x++)
                {
                    siIndex[x] = (siIndex[x] - stats.Mean) / stats.StandardDeviation;
                    if (scaleUp4Digits)
                    {
                        //scale the 4 digits by multiplying by 10,000
                        siIndex[x] = siIndex[x] * 10000.00;
                        siIndex[x] = Math.Round(siIndex[x], 2);
                    }
                    else
                    {
                        siIndex[x] = CalculatorHelpers.CheckForNaNandRound4(siIndex[x]);
                    }
                }
            }
            else if (subIndNormType == CalculatorHelpers.NORMALIZATION_TYPES.minmax.ToString())
            {
                //min-max: (x – min(x)) / (max(x) – min(x))
                for (int x = 0; x < siIndex.Count; x++)
                {
                    siIndex[x] = (siIndex[x] - stats.Minimum) / (stats.Maximum - stats.Minimum);
                    if (scaleUp4Digits)
                    {
                        //scale the 4 digits by multiplying by 10,000
                        siIndex[x] = siIndex[x] * 10000.00;
                        siIndex[x] = Math.Round(siIndex[x], 2);
                    }
                    else
                    {
                        siIndex[x] = CalculatorHelpers.CheckForNaNandRound4(siIndex[x]);
                    }
                }
            }
            else if (subIndNormType == CalculatorHelpers.NORMALIZATION_TYPES.logistic.ToString())
            {
                for (int x = 0; x < siIndex.Count; x++)
                {
                    //logistic: 1 / (1 + exp(-x))
                    siIndex[x] = MathNet.Numerics.SpecialFunctions.Logistic(siIndex[x]);
                    //or
                    //siIndex[x] = 1 / (1 + Math.Exp(-siIndex[x]));
                    if (scaleUp4Digits)
                    {
                        //scale the 4 digits by multiplying by 10,000
                        siIndex[x] = siIndex[x] * 10000.00;
                        siIndex[x] = Math.Round(siIndex[x], 2);
                    }
                    else
                    {
                        siIndex[x] = CalculatorHelpers.CheckForNaNandRound4(siIndex[x]);
                    }
                }
            }
            else if (subIndNormType == CalculatorHelpers.NORMALIZATION_TYPES.logit.ToString())
            {
                for (int x = 0; x < siIndex.Count; x++)
                {
                    //logit: inverese of logistic for y between 0 and 1
                    //this assumes x is actually y
                    siIndex[x] = MathNet.Numerics.SpecialFunctions.Logit(siIndex[x]);
                    if (scaleUp4Digits)
                    {
                        //scale the 4 digits by multiplying by 10,000
                        siIndex[x] = siIndex[x] * 10000.00;
                        siIndex[x] = Math.Round(siIndex[x], 2);
                    }
                    else
                    {
                        siIndex[x] = CalculatorHelpers.CheckForNaNandRound4(siIndex[x]);
                    }
                }
            }
            else if (subIndNormType == CalculatorHelpers.NORMALIZATION_TYPES.tanh.ToString())
            {
                for (int x = 0; x < siIndex.Count; x++)
                {
                    //hyperbolic tangent
                    siIndex[x] = MathNet.Numerics.Trig.Tanh(siIndex[x]);
                    if (scaleUp4Digits)
                    {
                        //scale the 4 digits by multiplying by 10,000
                        siIndex[x] = siIndex[x] * 10000.00;
                        siIndex[x] = Math.Round(siIndex[x], 2);
                    }
                    else
                    {
                        siIndex[x] = CalculatorHelpers.CheckForNaNandRound4(siIndex[x]);
                    }
                }
            }
            else if (subIndNormType == CalculatorHelpers.NORMALIZATION_TYPES.pnorm.ToString())
            {
                //p value for ttest with n-1
                double pValue = Shared.GetPValueForTDist(siIndex.Count() - 1,
                                                         startValue, stats.Mean, stats.Variance);
                pValue = CalculatorHelpers.CheckForNaNandRound4(pValue);
                //p norm
                siIndex = siIndex.Normalize(pValue);
                if (scaleUp4Digits)
                {
                    //scale the 4 digits by multiplying by 10,000
                    for (int x = 0; x < siIndex.Count; x++)
                    {
                        siIndex[x] = siIndex[x] * 10000.00;
                        siIndex[x] = Math.Round(siIndex[x], 2);
                    }
                }
            }
            else if (subIndNormType == CalculatorHelpers.NORMALIZATION_TYPES.weights.ToString())
            {
                for (int x = 0; x < siIndex.Count; x++)
                {
                    //rand 2016 technique
                    siIndex[x] = siIndex[x] / startValue;
                    if (scaleUp4Digits)
                    {
                        //scale the 4 digits by multiplying by 10,000
                        siIndex[x] = siIndex[x] * 10000.00;
                        siIndex[x] = Math.Round(siIndex[x], 2);
                    }
                    else
                    {
                        siIndex[x] = CalculatorHelpers.CheckForNaNandRound4(siIndex[x]);
                    }
                }
            }
            else
            {
                //indicator 2 in drr1 (p and q, not norm and index)
            }
            //add them to parent cat index
            return(siIndex);
        }