//this is asych for the calling Task.WhenAll //but does not necessarily need internal asych awaits public async Task RunAlgorithmAsync(List <List <double> > data) { try { //minimal data requirement is first five cols if (_colNames.Count() < 5 || _mathTerms.Count() == 0) { ErrorMessage = "Regression requires at least one dependent variable and one independent variable."; return; } if (data.Count() < 5) { //185 same as other analysis ErrorMessage = "Regression requires at least 2 rows of observed data and 3 rows of scoring data."; return; } //convert data to a Math.Net Matrix //v185 uses same ci technique as algos 2,3 and 4 -last 3 rows are used to generate ci List <List <double> > dataci = data.Skip(data.Count - _scoreRows).ToList(); data.Reverse(); List <List <double> > dataobs = data.Skip(_scoreRows).ToList(); dataobs.Reverse(); //actual observed values Vector <double> y = Shared.GetYData(dataobs); Matrix <double> x = Shared.GetDoubleMatrix(dataobs, _colNames, _depColNames); Matrix <double> ci = Shared.GetDoubleMatrix(dataci, _colNames, _depColNames); //model expected values - get the coefficents //use normal equations regression Vector <double> p = MultipleRegression.NormalEquations(x, y); //but note that this runs without errors in more cases but still does not give good results //Vector<double> p = MultipleRegression.QR(x, y); if (p.Count() != ci.Row(_scoreRows - 1).Count()) { //185 same as other analysis ErrorMessage = "The scoring and training datasets have different numbers of columns."; return; } //get the predicted yhats Vector <double> yhat = GetYHatandSetQTPred(y.Count, x, p, ci.Row(_scoreRows - 1).ToArray()); //get the durbin-watson d statistic double d = GetDurbinWatson(y, yhat); double SSE = 0; //sum of the square of the error (between the predicted, p, and observed, y); SSE = Distance.SSD(yhat, y); double rSquared = GoodnessOfFit.RSquared(yhat, y); //sum of the square of the regression (between the predicted, p, and observed mean, statsY.Mean); double SSR = 0; for (int i = 0; i < yhat.Count(); i++) { SSR += Math.Pow((yhat[i] - y.Mean()), 2); } //set joint vars properties //degrees freedom double dfR = x.ColumnCount - 1; double dfE = x.RowCount - x.ColumnCount; int idfR = x.ColumnCount - 1; int idfE = x.RowCount - x.ColumnCount; double s2 = SSE / dfE; double s = Math.Sqrt(s2); double MSR = SSR / dfR; double MSE = SSE / dfE; double FValue = MSR / MSE; double adjRSquared = 1 - ((x.RowCount - 1) * (MSE / (SSE + SSR))); double pValue = Shared.GetPValueForFDist(idfR, idfE, FValue); //correct 2 tailed t test //double TCritialValue = ExcelFunctions.TInv(0.05, idfE); //so do this double dbCI = CalculatorHelpers.GetConfidenceIntervalProb(_confidenceInt); double tCriticalValue = ExcelFunctions.TInv(dbCI, idfE); //set each coeff properties //coeffs st error //use matrix math to get the standard error of coefficients Matrix <double> xt = x.Transpose(); //matrix x'x Matrix <double> xx = xt.Multiply(x); Matrix <double> xxminus1 = xx.Inverse(); double sxx = 0; double[] xiSE = new double[x.ColumnCount]; //coeff tstats double[] xiT = new double[x.ColumnCount]; //lower value for pvalue double[] xiP = new double[x.ColumnCount]; for (int i = 0; i < x.ColumnCount; i++) { //use the matrix techniques shown on p 717 of Mendenhall and Sincich sxx = s * Math.Sqrt(xxminus1.Column(i)[i]); xiSE[i] = sxx; xiT[i] = p[i] / sxx; xiP[i] = Shared.GetPValueForTDist(idfE, xiT[i], 0, 1); } double FCriticalValue = 0; string FGreaterFCritical = string.Empty; if (_subalgorithm == Calculator1.MATH_SUBTYPES.subalgorithm8.ToString()) { //anova regression //anova critical fvalue test //FCriticalValue = ExcelFunctions.FInv(1 - _confidenceInt, idfR, idfE); FCriticalValue = ExcelFunctions.FInv(dbCI, idfR, idfE); FGreaterFCritical = (FValue > FCriticalValue) ? "true" : "false"; SetAnovaIntervals(0, p, xiSE, tCriticalValue); SetAnovaIntervals(1, p, xiSE, tCriticalValue); SetAnovaIntervals(2, p, xiSE, tCriticalValue); } else { //set QTM ci and pi intervals SetQTIntervals(0, s, xxminus1, ci.Row(_scoreRows - 1).ToArray(), p, tCriticalValue); SetQTIntervals(1, s, xxminus1, ci.Row(_scoreRows - 2).ToArray(), p, tCriticalValue); SetQTIntervals(2, s, xxminus1, ci.Row(_scoreRows - 3).ToArray(), p, tCriticalValue); } //add the data to a string builder StringBuilder sb = new StringBuilder(); sb.AppendLine("regression results"); //dep var has to be in the 4 column always string sLine = string.Concat("dependent variable: ", _colNames[3]); sb.AppendLine(sLine); string[] cols = new string[] { "source", "df", "SS", "MS" }; sb.AppendLine(Shared.GetLine(cols, true)); cols = new string[] { "model", dfR.ToString("F0"), SSR.ToString("F4"), MSR.ToString("F4") }; sb.AppendLine(Shared.GetLine(cols, false)); cols = new string[] { "error ", dfE.ToString("F0"), SSE.ToString("F4"), MSE.ToString("F4") }; sb.AppendLine(Shared.GetLine(cols, false)); cols = new string[] { "total ", (dfR + dfE).ToString("F0"), (SSR + SSE).ToString("F4") }; sb.AppendLine(Shared.GetLine(cols, false)); sb.AppendLine(string.Empty); cols = new string[] { "R-squared", rSquared.ToString("F4"), "Adj R-squared", adjRSquared.ToString("F4") }; sb.AppendLine(Shared.GetLine(cols, false)); cols = new string[] { "F value", FValue.ToString("F4"), "prob > F", pValue.ToString("F4") }; sb.AppendLine(Shared.GetLine(cols, false)); sb.AppendLine(string.Empty); cols = new string[] { GetName("variable"), "coefficient", "stand error", "T-ratio", "prob > T" }; sb.AppendLine(Shared.GetLine(cols, true)); for (int i = 0; i < p.Count(); i++) { if (i == 0) { cols = new string[] { GetName(_depColNames[i]), p[i].ToString("F5"), xiSE[i].ToString("F4"), xiT[i].ToString("F4"), xiP[i].ToString("F4") }; sb.AppendLine(Shared.GetLine(cols, false)); } else { cols = new string[] { GetName(_depColNames[i]), p[i].ToString("F5"), xiSE[i].ToString("F4"), xiT[i].ToString("F4"), xiP[i].ToString("F4") }; sb.AppendLine(Shared.GetLine(cols, false)); } } cols = new string[] { "durbin-watson: ", d.ToString("F4") }; sb.AppendLine(Shared.GetLine(cols, false)); if (_subalgorithm == Calculator1.MATH_SUBTYPES.subalgorithm8.ToString()) { cols = new string[] { "F Critical Value", FCriticalValue.ToString("F5"), "F > F Critical", FGreaterFCritical }; sb.AppendLine(Shared.GetLine(cols, true)); cols = new string[] { "estimate", "predicted", string.Concat("lower ", _confidenceInt.ToString(), "%"), string.Concat("upper ", _confidenceInt.ToString(), "%") }; sb.AppendLine(Shared.GetLine(cols, true)); cols = new string[] { "Col 0 Mean CI ", QTPredicted.ToString("F4"), QTL.ToString("F4"), QTU.ToString("F4") }; sb.AppendLine(Shared.GetLine(cols, false)); cols = new string[] { "Col 1 - 0 Mean CI ", QTPredicted10.ToString("F4"), QTL10.ToString("F4"), QTU10.ToString("F4") }; sb.AppendLine(Shared.GetLine(cols, false)); cols = new string[] { "Col 2 - 0 Mean CI ", QTPredicted20.ToString("F4"), QTL20.ToString("F4"), QTU20.ToString("F4") }; sb.AppendLine(Shared.GetLine(cols, false)); } else { cols = new string[] { "estimate", "predicted", string.Concat("lower ", _confidenceInt.ToString(), "%"), string.Concat("upper ", _confidenceInt.ToString(), "%") }; sb.AppendLine(Shared.GetLine(cols, true)); cols = new string[] { "QTM CI ", QTPredicted.ToString("F4"), QTL.ToString("F4"), QTU.ToString("F4") }; sb.AppendLine(Shared.GetLine(cols, false)); cols = new string[] { "QTM PI ", QTPredicted.ToString("F4"), (QTPredicted - QTPI).ToString("F4"), (QTPredicted + QTPI).ToString("F4") }; sb.AppendLine(Shared.GetLine(cols, false)); string sRow = string.Concat("row ", data.Count - 2); cols = new string[] { sRow }; sb.AppendLine(Shared.GetLine(cols, true)); cols = new string[] { "CI ", QTPredicted10.ToString("F4"), QTL10.ToString("F4"), QTU10.ToString("F4") }; sb.AppendLine(Shared.GetLine(cols, false)); cols = new string[] { "PI ", QTPredicted10.ToString("F4"), (QTPredicted10 - QTPI10).ToString("F4"), (QTPredicted10 + QTPI10).ToString("F4") }; sb.AppendLine(Shared.GetLine(cols, false)); sRow = string.Concat("row ", data.Count - 1); cols = new string[] { sRow }; sb.AppendLine(Shared.GetLine(cols, true)); cols = new string[] { "CI ", QTPredicted20.ToString("F4"), QTL20.ToString("F4"), QTU20.ToString("F4") }; sb.AppendLine(Shared.GetLine(cols, false)); cols = new string[] { "PI ", QTPredicted20.ToString("F4"), (QTPredicted20 - QTPI20).ToString("F4"), (QTPredicted20 + QTPI20).ToString("F4") }; sb.AppendLine(Shared.GetLine(cols, false)); } if (this.MathResult.ToLower().StartsWith("http")) { string sError = string.Empty; bool bHasSaved = CalculatorHelpers.SaveTextInURI( _params.ExtensionDocToCalcURI, sb.ToString(), this.MathResult, out sError); if (!string.IsNullOrEmpty(sError)) { this.MathResult += sError; } } else { this.MathResult = sb.ToString(); } } catch (Exception ex) { this.ErrorMessage = ex.Message; } }
public static Vector <double> GetNormalizedVector( string subIndNormType, double startValue, bool scaleUp4Digits, double[] subIndicatorData) { //normalize them var stats = new MathNet.Numerics.Statistics.DescriptiveStatistics(subIndicatorData); Vector <double> siIndex = Vector <double> .Build.Dense(subIndicatorData); if (subIndNormType == CalculatorHelpers.NORMALIZATION_TYPES.none.ToString() || string.IsNullOrEmpty(subIndNormType)) { //data has already been normalized } else if (subIndNormType == CalculatorHelpers.NORMALIZATION_TYPES.zscore.ToString()) { //z-score: (x – mean(x)) / stddev(x) for (int x = 0; x < siIndex.Count; x++) { siIndex[x] = (siIndex[x] - stats.Mean) / stats.StandardDeviation; if (scaleUp4Digits) { //scale the 4 digits by multiplying by 10,000 siIndex[x] = siIndex[x] * 10000.00; siIndex[x] = Math.Round(siIndex[x], 2); } else { siIndex[x] = CalculatorHelpers.CheckForNaNandRound4(siIndex[x]); } } } else if (subIndNormType == CalculatorHelpers.NORMALIZATION_TYPES.minmax.ToString()) { //min-max: (x – min(x)) / (max(x) – min(x)) for (int x = 0; x < siIndex.Count; x++) { siIndex[x] = (siIndex[x] - stats.Minimum) / (stats.Maximum - stats.Minimum); if (scaleUp4Digits) { //scale the 4 digits by multiplying by 10,000 siIndex[x] = siIndex[x] * 10000.00; siIndex[x] = Math.Round(siIndex[x], 2); } else { siIndex[x] = CalculatorHelpers.CheckForNaNandRound4(siIndex[x]); } } } else if (subIndNormType == CalculatorHelpers.NORMALIZATION_TYPES.logistic.ToString()) { for (int x = 0; x < siIndex.Count; x++) { //logistic: 1 / (1 + exp(-x)) siIndex[x] = MathNet.Numerics.SpecialFunctions.Logistic(siIndex[x]); //or //siIndex[x] = 1 / (1 + Math.Exp(-siIndex[x])); if (scaleUp4Digits) { //scale the 4 digits by multiplying by 10,000 siIndex[x] = siIndex[x] * 10000.00; siIndex[x] = Math.Round(siIndex[x], 2); } else { siIndex[x] = CalculatorHelpers.CheckForNaNandRound4(siIndex[x]); } } } else if (subIndNormType == CalculatorHelpers.NORMALIZATION_TYPES.logit.ToString()) { for (int x = 0; x < siIndex.Count; x++) { //logit: inverese of logistic for y between 0 and 1 //this assumes x is actually y siIndex[x] = MathNet.Numerics.SpecialFunctions.Logit(siIndex[x]); if (scaleUp4Digits) { //scale the 4 digits by multiplying by 10,000 siIndex[x] = siIndex[x] * 10000.00; siIndex[x] = Math.Round(siIndex[x], 2); } else { siIndex[x] = CalculatorHelpers.CheckForNaNandRound4(siIndex[x]); } } } else if (subIndNormType == CalculatorHelpers.NORMALIZATION_TYPES.tanh.ToString()) { for (int x = 0; x < siIndex.Count; x++) { //hyperbolic tangent siIndex[x] = MathNet.Numerics.Trig.Tanh(siIndex[x]); if (scaleUp4Digits) { //scale the 4 digits by multiplying by 10,000 siIndex[x] = siIndex[x] * 10000.00; siIndex[x] = Math.Round(siIndex[x], 2); } else { siIndex[x] = CalculatorHelpers.CheckForNaNandRound4(siIndex[x]); } } } else if (subIndNormType == CalculatorHelpers.NORMALIZATION_TYPES.pnorm.ToString()) { //p value for ttest with n-1 double pValue = Shared.GetPValueForTDist(siIndex.Count() - 1, startValue, stats.Mean, stats.Variance); pValue = CalculatorHelpers.CheckForNaNandRound4(pValue); //p norm siIndex = siIndex.Normalize(pValue); if (scaleUp4Digits) { //scale the 4 digits by multiplying by 10,000 for (int x = 0; x < siIndex.Count; x++) { siIndex[x] = siIndex[x] * 10000.00; siIndex[x] = Math.Round(siIndex[x], 2); } } } else if (subIndNormType == CalculatorHelpers.NORMALIZATION_TYPES.weights.ToString()) { for (int x = 0; x < siIndex.Count; x++) { //rand 2016 technique siIndex[x] = siIndex[x] / startValue; if (scaleUp4Digits) { //scale the 4 digits by multiplying by 10,000 siIndex[x] = siIndex[x] * 10000.00; siIndex[x] = Math.Round(siIndex[x], 2); } else { siIndex[x] = CalculatorHelpers.CheckForNaNandRound4(siIndex[x]); } } } else { //indicator 2 in drr1 (p and q, not norm and index) } //add them to parent cat index return(siIndex); }