/// <summary> /// Generate dynamic correlations and/or covariance's as <see cref="string"/>s. /// </summary> /// <param name="ranges">The <see cref="Range"/>s of the data that needs correlations and/or covariance's.</param> /// <param name="doCalculate">A collection of <see cref="bool"/>s that indicate which summary statistic has to be calculated.</param> public CorrelationCovariance(List <Range> ranges, SummaryStatisticsBool doCalculate) { // Functions to calculate the values for the correlation and/or covariance. Func <object, object, string> funcCorr = (name1, name2) => "CORREL(" + name1 + "," + name2 + ")"; Func <object, object, string> funcCova = (name1, name2) => "COVARIANCE.S(" + name1 + "," + name2 + ")"; if (doCalculate.Correlation && doCalculate.Covariance) { Correlations = Compute(funcCorr, ranges); Covariances = Compute(funcCova, ranges); HasCorrelations = true; HasCovariances = true; } else if (doCalculate.Correlation) { Correlations = Compute(funcCorr, ranges); HasCorrelations = true; } else if (doCalculate.Covariance) { Covariances = Compute(funcCova, ranges); HasCovariances = true; } Dimension = (int)Math.Sqrt(Correlations?.Count ?? Covariances?.Count ?? 0); }
/// <summary> /// Check the input so everything is working like intended. /// </summary> /// <param name="dataSet">The <see cref="DataSet"/> source.</param> /// <param name="messageBoxCaption">The caption for the <see cref="MessageBox"/>.</param> /// <param name="doIncludeX">A <see cref="IReadOnlyList{T}"/> of <see cref="bool"/>s that corresponds to which <see cref="Data"/> in the <see cref="DataSet.DataList"/> should be included.</param> /// <param name="checkX">The format the <see cref="Data"/> should be in.</param> /// <param name="fewestX">(Optional) The fewest amount of <see cref="Data"/> that has to be included. Default is 1.</param> /// <param name="doIncludeY">(Optional) A <see cref="IReadOnlyList{T}"/> of <see cref="bool"/>s that corresponds to which <see cref="Data"/> in the <see cref="DataSet.DataList"/> should be included. Default is null.</param> /// <param name="checkY">(Optional) The format the <see cref="Data"/> should be in. Default is <see cref="DefaultCheck.Unknown"/>.</param> /// <param name="fewestY">(Optional) The fewest amount of <see cref="Data"/> that has to be included. Default is 0.</param> /// <param name="doCalculate">(Optional) The indicator for which Summary Statistics should be calculated. Default is null.</param> /// <param name="dataType">(Optional) The type of the <see cref="Data"/>. Default is <see cref="DataType.Data"/>.</param> /// <param name="isCategory">(Optional) Will display 'category' instead of 'variable' when true. Default is false.</param> /// <param name="doIsNumericCheck">(Optional) Will do the numeric checks when true. Default is true.</param> /// <remarks> /// <para><see cref="doIncludeX"/> is used for single (value) column checks, X-column checks and category column checks.</para> /// <para><see cref="doIncludeY"/> is used for Y-column checks and value column checks if <see cref="doIncludeX"/> is used for category column checks.</para> /// <para>The similar is true for <see cref="checkX"/> and <see cref="checkY"/>.</para> /// </remarks> public CheckInput(DataSet dataSet, string messageBoxCaption, IReadOnlyList <bool> doIncludeX, DefaultCheck checkX, int fewestX = 1, IReadOnlyList <bool> doIncludeY = null, DefaultCheck checkY = DefaultCheck.Unknown, int fewestY = 0, SummaryStatisticsBool doCalculate = null, DataType dataType = DataType.Data, bool isCategory = false, bool doIsNumericCheck = true) { // Check if there is Data. if (dataSet == null) { MessageBox.Show("There is no data to work with.", "NoruST - " + messageBoxCaption, MessageBoxButtons.OK, MessageBoxIcon.Error); Successful = false; return; } // Check if Summary Statistics are calculated properly. if (doCalculate != null && !doCalculate.AtLeastOne) { MessageBox.Show("Nothing is being calculated. Select at least one.", "NoruST - " + messageBoxCaption, MessageBoxButtons.OK, MessageBoxIcon.Error); Successful = false; return; } // Initialize some variables. _messageBoxCaption = messageBoxCaption; if (doIncludeX != null && doIncludeY == null) { doIncludeY = Enumerable.Repeat(false, doIncludeX.Count).ToList(); } // Create the Lists that holds the Data. Categories = new List <Data>(); Values = new List <Data>(); // Check for each Data in the DataSet if it's included. for (var i = 0; i < dataSet.DataList.Count; i++) { // If it's not included, go to the next one. if (doIncludeY != null && doIncludeX != null && !doIncludeX[i] && !doIncludeY[i]) { continue; } // Check if the variables are in the correct format. If they aren't, warn the user. if (doIncludeX != null && doIsNumericCheck && doIncludeX[i]) { switch (checkX) { case DefaultCheck.All: break; case DefaultCheck.Numeric: if (!Numeric(dataSet.DataList[i])) { return; } break; case DefaultCheck.Nonnumeric: if (!Nonnumeric(dataSet.DataList[i])) { return; } break; case DefaultCheck.None: break; case DefaultCheck.LastState: break; case DefaultCheck.Unknown: break; default: throw new ArgumentOutOfRangeException(nameof(checkX), checkX, null); } } // Check if the variables are in the correct format. If they aren't, warn the user. if (doIncludeY != null && doIsNumericCheck && doIncludeY[i]) { switch (checkY) { case DefaultCheck.All: break; case DefaultCheck.Numeric: if (!Numeric(dataSet.DataList[i])) { return; } break; case DefaultCheck.Nonnumeric: if (!Nonnumeric(dataSet.DataList[i])) { return; } break; case DefaultCheck.None: break; case DefaultCheck.LastState: break; case DefaultCheck.Unknown: break; default: throw new ArgumentOutOfRangeException(nameof(checkX), checkX, null); } } // Add the included Data to the list. if (checkY != DefaultCheck.Unknown) { if (doIncludeX != null && doIncludeX[i]) { Categories.Add(dataSet.DataList[i]); } if (doIncludeY != null && doIncludeY[i]) { Values.Add(dataSet.DataList[i]); } } else { if (doIncludeX != null && doIncludeX[i]) { Values.Add(dataSet.DataList[i]); } } // If the type of the Data is a Dummy, check for blank cells. if (dataType != DataType.Dummy) { continue; } foreach (var d in dataSet.DataList[i].GetValuesList()) { if (d != null) { continue; } var result = MessageBox.Show("'" + dataSet.DataList[i].Name + $"' has blank data and will show some invalid data.{Environment.NewLine}{Environment.NewLine}Do you wish to continue anyway?", "NoruST - " + messageBoxCaption, MessageBoxButtons.YesNo, MessageBoxIcon.Warning); if (result == DialogResult.Yes) { break; } Successful = false; return; } } // Check if the least amount of Data is selected. if (checkY != DefaultCheck.Unknown) { if ((fewestX < 0 && Categories.Count != Math.Abs(fewestX)) || Categories.Count < fewestX) { MessageBox.Show((fewestX < 0 ? "Exactly " : Categories.Count < fewestX ? "At least " : "") + Math.Abs(fewestX) + " " + (Math.Abs(fewestX) == 1 ? isCategory ? "category" : "variable" + " has" : isCategory ? "categories" : "variables" + " have") + " to be selected.", "NoruST - " + messageBoxCaption, MessageBoxButtons.OK, MessageBoxIcon.Error); Successful = false; return; } if (checkY != DefaultCheck.Unknown && ((fewestY < 0 && Values.Count != Math.Abs(fewestY)) || Values.Count < fewestY)) { MessageBox.Show((fewestY < 0 ? "Exactly " : Values.Count < fewestY ? "At least " : "") + Math.Abs(fewestY) + " " + (Math.Abs(fewestY) == 1 ? "variable has" : "variables have") + " to be selected.", "NoruST - " + messageBoxCaption, MessageBoxButtons.OK, MessageBoxIcon.Error); Successful = false; return; } } else { if ((fewestX < 0 && Values.Count != Math.Abs(fewestX)) || Values.Count < fewestX) { MessageBox.Show((fewestX < 0 ? "Exactly " : Values.Count < fewestX ? "At least " : "") + Math.Abs(fewestX) + " " + (Math.Abs(fewestX) == 1 ? "variable has" : "variables have") + " to be selected.", "NoruST - " + messageBoxCaption, MessageBoxButtons.OK, MessageBoxIcon.Error); Successful = false; return; } } // If all checks succeeded, set it to true. Successful = true; }
/// <summary> /// Generate dynamic summary statistics as <see cref="string"/>s. /// </summary> /// <param name="range">The <see cref="Microsoft.Office.Interop.Excel.Range"/> of the data that needs summary statistics.</param> /// <param name="doCalculate">Defines which variables in the summary have to be calculated.</param> /// <param name="meanConfidenceLevel">(Optional) The confidence level for the <see cref="Mean"/>. Default is 0.</param> /// <param name="standardDeviationConfidenceLevel">(Optional) The confidence level for the <see cref="StandardDeviation"/>. Default is 0.</param> /// <param name="numberOfBins">(Optional) If a fixed number of bins, this is the value. Default is -1.</param> /// <remarks>In order for these calculations to be only based on the confidence level (this will always be a fixed value), the methods <see cref="SetMeanAlpha"/> and/or <see cref="SetStandardDeviationAlpha"/> must be called.</remarks> public SummaryStatistics(Range range, SummaryStatisticsBool doCalculate, int meanConfidenceLevel = 0, int standardDeviationConfidenceLevel = 0, int numberOfBins = -1) { // The DataSet name in Excel and functions. Name = ((Name)range.Name).Name; var function = Globals.ExcelAddIn.Application.WorksheetFunction; // Base if (doCalculate.Mean || doCalculate.BoxWhiskerPlot || doCalculate.MeanConfidenceInterval || doCalculate.StandardDeviationConfidenceInterval) { Mean = "AVERAGE(" + Name + ")"; } if (doCalculate.Variance) { Variance = "VAR.S(" + Name + ")"; } if (doCalculate.StandardDeviation || doCalculate.MeanConfidenceInterval || doCalculate.StandardDeviationConfidenceInterval) { StandardDeviation = "STDEV.S(" + Name + ")"; } if (doCalculate.Minimum || doCalculate.Range || doCalculate.BoxWhiskerPlot || doCalculate.Histogram) { Minimum = "MIN(" + Name + ")"; } if (doCalculate.Quartile1 || doCalculate.InterquartileRange || doCalculate.BoxWhiskerPlot) { Quartile1 = "QUARTILE.INC(" + Name + ",1)"; } if (doCalculate.Median || doCalculate.BoxWhiskerPlot) { Median = "MEDIAN(" + Name + ")"; } if (doCalculate.Quartile3 || doCalculate.InterquartileRange || doCalculate.BoxWhiskerPlot) { Quartile3 = "QUARTILE.INC(" + Name + ",3)"; } if (doCalculate.Maximum || doCalculate.Range || doCalculate.BoxWhiskerPlot || doCalculate.Histogram) { Maximum = "MAX(" + Name + ")"; } if (doCalculate.InterquartileRange || doCalculate.BoxWhiskerPlot) { InterquartileRange = Quartile3 + "-" + Quartile1; } if (doCalculate.Skewness) { Skewness = "SKEW(" + Name + ")"; } if (doCalculate.Kurtosis) { Kurtosis = "KURT(" + Name + ")"; } if (doCalculate.MeanAbsoluteDeviation) { MeanAbsoluteDeviation = "AVEDEV(" + Name + ")"; } if (doCalculate.Mode) { Mode = "MODE.SNGL(" + Name + ")"; try { Globals.ExcelAddIn.Application.WorksheetFunction.Mode_Sngl(range); HasMode = true; } catch { HasMode = false; } } if (doCalculate.Range || doCalculate.Histogram) { Range = Maximum + "-" + Minimum; } if (doCalculate.Count || doCalculate.MeanConfidenceInterval || doCalculate.StandardDeviationConfidenceInterval || doCalculate.Histogram) { Count = "COUNT(" + Name + ")"; } if (doCalculate.Sum) { Sum = "SUM(" + Name + ")"; } if (doCalculate.Outliers || doCalculate.BoxWhiskerPlot) { Outliers = new List <string>(); for (var i = 1; i <= range.Rows.Count; i++) { Outliers.Add("IF(OR(INDEX(" + Name + "," + i + ")<" + Quartile1 + "-1.5*(" + InterquartileRange + "),INDEX(" + Name + "," + i + ")>" + Quartile3 + "+1.5*(" + InterquartileRange + ")),IF(INDEX(" + Name + "," + i + ")<>\"\",INDEX(" + Name + "," + i + "),NA()),NA())"); } } // Box-Whisker Plot if (doCalculate.BoxWhiskerPlot) { Quartile1Median = Median + "-" + Quartile1; MedianQuartile3 = Quartile3 + "-" + Median; MinusWhisker = "IF(" + Quartile1 + "-" + Minimum + "<=1.5*(" + InterquartileRange + ")," + Quartile1 + "-" + Minimum + ",1.5*(" + InterquartileRange + "))"; PlusWhisker = "IF(" + Maximum + "-" + Quartile3 + "<=1.5*(" + InterquartileRange + ")," + Maximum + "-" + Quartile3 + ",1.5*(" + InterquartileRange + "))"; } // Confidence Interval if (doCalculate.MeanConfidenceInterval || doCalculate.StandardDeviationConfidenceInterval) { DegreesOfFreedom = Count + "-1"; } if (doCalculate.MeanConfidenceInterval) { MeanConfidenceLevel = meanConfidenceLevel / 100.0; MeanConfidenceInterval = "CONFIDENCE.T(" + MeanAlpha + "," + StandardDeviation + "," + Count + ")"; MeanLowerLimit = Mean + "-" + MeanConfidenceInterval; MeanUpperLimit = Mean + "+" + MeanConfidenceInterval; } if (doCalculate.StandardDeviationConfidenceInterval) { StandardDeviationConfidenceLevel = standardDeviationConfidenceLevel / 100.0; StandardDeviationConfidenceIntervalLowerLimit = "CHISQ.INV(1-(" + StandardDeviationAlpha + ")/2," + DegreesOfFreedom + ")"; StandardDeviationConfidenceIntervalUpperLimit = "CHISQ.INV((" + StandardDeviationAlpha + ")/2," + DegreesOfFreedom + ")"; StandardDeviationLowerLimit = StandardDeviation + "*SQRT((" + DegreesOfFreedom + ")/" + StandardDeviationConfidenceIntervalLowerLimit + ")"; StandardDeviationUpperLimit = StandardDeviation + "*SQRT((" + DegreesOfFreedom + ")/" + StandardDeviationConfidenceIntervalUpperLimit + ")"; } // Histogram if (doCalculate.Histogram) { NumberOfBins = numberOfBins < 0 ? (int)function.RoundUp(Math.Sqrt(function.Count(range)), 0) : numberOfBins; BinRange = "ROUND((" + Range + ")/" + NumberOfBins + ",0)"; } }