예제 #1
0
        /// <summary>
        /// Generate dynamic correlations and/or covariance's as <see cref="string"/>s.
        /// </summary>
        /// <param name="ranges">The <see cref="Range"/>s of the data that needs correlations and/or covariance's.</param>
        /// <param name="doCalculate">A collection of <see cref="bool"/>s that indicate which summary statistic has to be calculated.</param>
        public CorrelationCovariance(List <Range> ranges, SummaryStatisticsBool doCalculate)
        {
            // Functions to calculate the values for the correlation and/or covariance.
            Func <object, object, string> funcCorr = (name1, name2) => "CORREL(" + name1 + "," + name2 + ")";
            Func <object, object, string> funcCova = (name1, name2) => "COVARIANCE.S(" + name1 + "," + name2 + ")";

            if (doCalculate.Correlation && doCalculate.Covariance)
            {
                Correlations    = Compute(funcCorr, ranges);
                Covariances     = Compute(funcCova, ranges);
                HasCorrelations = true;
                HasCovariances  = true;
            }
            else if (doCalculate.Correlation)
            {
                Correlations    = Compute(funcCorr, ranges);
                HasCorrelations = true;
            }
            else if (doCalculate.Covariance)
            {
                Covariances    = Compute(funcCova, ranges);
                HasCovariances = true;
            }

            Dimension = (int)Math.Sqrt(Correlations?.Count ?? Covariances?.Count ?? 0);
        }
예제 #2
0
        /// <summary>
        /// Check the input so everything is working like intended.
        /// </summary>
        /// <param name="dataSet">The <see cref="DataSet"/> source.</param>
        /// <param name="messageBoxCaption">The caption for the <see cref="MessageBox"/>.</param>
        /// <param name="doIncludeX">A <see cref="IReadOnlyList{T}"/> of <see cref="bool"/>s that corresponds to which <see cref="Data"/> in the <see cref="DataSet.DataList"/> should be included.</param>
        /// <param name="checkX">The format the <see cref="Data"/> should be in.</param>
        /// <param name="fewestX">(Optional) The fewest amount of <see cref="Data"/> that has to be included. Default is 1.</param>
        /// <param name="doIncludeY">(Optional) A <see cref="IReadOnlyList{T}"/> of <see cref="bool"/>s that corresponds to which <see cref="Data"/> in the <see cref="DataSet.DataList"/> should be included. Default is null.</param>
        /// <param name="checkY">(Optional) The format the <see cref="Data"/> should be in. Default is <see cref="DefaultCheck.Unknown"/>.</param>
        /// <param name="fewestY">(Optional) The fewest amount of <see cref="Data"/> that has to be included. Default is 0.</param>
        /// <param name="doCalculate">(Optional) The indicator for which Summary Statistics should be calculated. Default is null.</param>
        /// <param name="dataType">(Optional) The type of the <see cref="Data"/>. Default is <see cref="DataType.Data"/>.</param>
        /// <param name="isCategory">(Optional) Will display 'category' instead of 'variable' when true. Default is false.</param>
        /// <param name="doIsNumericCheck">(Optional) Will do the numeric checks when true. Default is true.</param>
        /// <remarks>
        /// <para><see cref="doIncludeX"/> is used for single (value) column checks, X-column checks and category column checks.</para>
        /// <para><see cref="doIncludeY"/> is used for Y-column checks and value column checks if <see cref="doIncludeX"/> is used for category column checks.</para>
        /// <para>The similar is true for <see cref="checkX"/> and <see cref="checkY"/>.</para>
        /// </remarks>
        public CheckInput(DataSet dataSet, string messageBoxCaption, IReadOnlyList <bool> doIncludeX, DefaultCheck checkX, int fewestX = 1, IReadOnlyList <bool> doIncludeY = null, DefaultCheck checkY = DefaultCheck.Unknown, int fewestY = 0, SummaryStatisticsBool doCalculate = null, DataType dataType = DataType.Data, bool isCategory = false, bool doIsNumericCheck = true)
        {
            // Check if there is Data.
            if (dataSet == null)
            {
                MessageBox.Show("There is no data to work with.", "NoruST - " + messageBoxCaption, MessageBoxButtons.OK, MessageBoxIcon.Error);
                Successful = false;
                return;
            }

            // Check if Summary Statistics are calculated properly.
            if (doCalculate != null && !doCalculate.AtLeastOne)
            {
                MessageBox.Show("Nothing is being calculated. Select at least one.", "NoruST - " + messageBoxCaption, MessageBoxButtons.OK, MessageBoxIcon.Error);
                Successful = false;
                return;
            }

            // Initialize some variables.
            _messageBoxCaption = messageBoxCaption;
            if (doIncludeX != null && doIncludeY == null)
            {
                doIncludeY = Enumerable.Repeat(false, doIncludeX.Count).ToList();
            }

            // Create the Lists that holds the Data.
            Categories = new List <Data>();
            Values     = new List <Data>();

            // Check for each Data in the DataSet if it's included.
            for (var i = 0; i < dataSet.DataList.Count; i++)
            {
                //  If it's not included, go to the next one.
                if (doIncludeY != null && doIncludeX != null && !doIncludeX[i] && !doIncludeY[i])
                {
                    continue;
                }

                // Check if the variables are in the correct format. If they aren't, warn the user.
                if (doIncludeX != null && doIsNumericCheck && doIncludeX[i])
                {
                    switch (checkX)
                    {
                    case DefaultCheck.All:
                        break;

                    case DefaultCheck.Numeric:
                        if (!Numeric(dataSet.DataList[i]))
                        {
                            return;
                        }
                        break;

                    case DefaultCheck.Nonnumeric:
                        if (!Nonnumeric(dataSet.DataList[i]))
                        {
                            return;
                        }
                        break;

                    case DefaultCheck.None:
                        break;

                    case DefaultCheck.LastState:
                        break;

                    case DefaultCheck.Unknown:
                        break;

                    default:
                        throw new ArgumentOutOfRangeException(nameof(checkX), checkX, null);
                    }
                }

                // Check if the variables are in the correct format. If they aren't, warn the user.
                if (doIncludeY != null && doIsNumericCheck && doIncludeY[i])
                {
                    switch (checkY)
                    {
                    case DefaultCheck.All:
                        break;

                    case DefaultCheck.Numeric:
                        if (!Numeric(dataSet.DataList[i]))
                        {
                            return;
                        }
                        break;

                    case DefaultCheck.Nonnumeric:
                        if (!Nonnumeric(dataSet.DataList[i]))
                        {
                            return;
                        }
                        break;

                    case DefaultCheck.None:
                        break;

                    case DefaultCheck.LastState:
                        break;

                    case DefaultCheck.Unknown:
                        break;

                    default:
                        throw new ArgumentOutOfRangeException(nameof(checkX), checkX, null);
                    }
                }

                // Add the included Data to the list.
                if (checkY != DefaultCheck.Unknown)
                {
                    if (doIncludeX != null && doIncludeX[i])
                    {
                        Categories.Add(dataSet.DataList[i]);
                    }
                    if (doIncludeY != null && doIncludeY[i])
                    {
                        Values.Add(dataSet.DataList[i]);
                    }
                }
                else
                {
                    if (doIncludeX != null && doIncludeX[i])
                    {
                        Values.Add(dataSet.DataList[i]);
                    }
                }

                // If the type of the Data is a Dummy, check for blank cells.
                if (dataType != DataType.Dummy)
                {
                    continue;
                }
                foreach (var d in dataSet.DataList[i].GetValuesList())
                {
                    if (d != null)
                    {
                        continue;
                    }

                    var result = MessageBox.Show("'" + dataSet.DataList[i].Name + $"' has blank data and will show some invalid data.{Environment.NewLine}{Environment.NewLine}Do you wish to continue anyway?", "NoruST - " + messageBoxCaption, MessageBoxButtons.YesNo, MessageBoxIcon.Warning);

                    if (result == DialogResult.Yes)
                    {
                        break;
                    }

                    Successful = false;
                    return;
                }
            }

            // Check if the least amount of Data is selected.
            if (checkY != DefaultCheck.Unknown)
            {
                if ((fewestX < 0 && Categories.Count != Math.Abs(fewestX)) || Categories.Count < fewestX)
                {
                    MessageBox.Show((fewestX < 0 ? "Exactly " : Categories.Count < fewestX ? "At least " : "") + Math.Abs(fewestX) + " " + (Math.Abs(fewestX) == 1 ? isCategory ? "category" : "variable" + " has" : isCategory ? "categories" : "variables" + " have") + " to be selected.", "NoruST - " + messageBoxCaption, MessageBoxButtons.OK, MessageBoxIcon.Error);
                    Successful = false;
                    return;
                }
                if (checkY != DefaultCheck.Unknown && ((fewestY < 0 && Values.Count != Math.Abs(fewestY)) || Values.Count < fewestY))
                {
                    MessageBox.Show((fewestY < 0 ? "Exactly " : Values.Count < fewestY ? "At least " : "") + Math.Abs(fewestY) + " " + (Math.Abs(fewestY) == 1 ? "variable has" : "variables have") + " to be selected.", "NoruST - " + messageBoxCaption, MessageBoxButtons.OK, MessageBoxIcon.Error);
                    Successful = false;
                    return;
                }
            }
            else
            {
                if ((fewestX < 0 && Values.Count != Math.Abs(fewestX)) || Values.Count < fewestX)
                {
                    MessageBox.Show((fewestX < 0 ? "Exactly " : Values.Count < fewestX ? "At least " : "") + Math.Abs(fewestX) + " " + (Math.Abs(fewestX) == 1 ? "variable has" : "variables have") + " to be selected.", "NoruST - " + messageBoxCaption, MessageBoxButtons.OK, MessageBoxIcon.Error);
                    Successful = false;
                    return;
                }
            }

            // If all checks succeeded, set it to true.
            Successful = true;
        }
예제 #3
0
        /// <summary>
        /// Generate dynamic summary statistics as <see cref="string"/>s.
        /// </summary>
        /// <param name="range">The <see cref="Microsoft.Office.Interop.Excel.Range"/> of the data that needs summary statistics.</param>
        /// <param name="doCalculate">Defines which variables in the summary have to be calculated.</param>
        /// <param name="meanConfidenceLevel">(Optional) The confidence level for the <see cref="Mean"/>. Default is 0.</param>
        /// <param name="standardDeviationConfidenceLevel">(Optional) The confidence level for the <see cref="StandardDeviation"/>. Default is 0.</param>
        /// <param name="numberOfBins">(Optional) If a fixed number of bins, this is the value. Default is -1.</param>
        /// <remarks>In order for these calculations to be only based on the confidence level (this will always be a fixed value), the methods <see cref="SetMeanAlpha"/> and/or <see cref="SetStandardDeviationAlpha"/> must be called.</remarks>
        public SummaryStatistics(Range range, SummaryStatisticsBool doCalculate, int meanConfidenceLevel = 0, int standardDeviationConfidenceLevel = 0, int numberOfBins = -1)
        {
            // The DataSet name in Excel and functions.
            Name = ((Name)range.Name).Name;
            var function = Globals.ExcelAddIn.Application.WorksheetFunction;

            // Base
            if (doCalculate.Mean || doCalculate.BoxWhiskerPlot || doCalculate.MeanConfidenceInterval || doCalculate.StandardDeviationConfidenceInterval)
            {
                Mean = "AVERAGE(" + Name + ")";
            }
            if (doCalculate.Variance)
            {
                Variance = "VAR.S(" + Name + ")";
            }
            if (doCalculate.StandardDeviation || doCalculate.MeanConfidenceInterval || doCalculate.StandardDeviationConfidenceInterval)
            {
                StandardDeviation = "STDEV.S(" + Name + ")";
            }
            if (doCalculate.Minimum || doCalculate.Range || doCalculate.BoxWhiskerPlot || doCalculate.Histogram)
            {
                Minimum = "MIN(" + Name + ")";
            }
            if (doCalculate.Quartile1 || doCalculate.InterquartileRange || doCalculate.BoxWhiskerPlot)
            {
                Quartile1 = "QUARTILE.INC(" + Name + ",1)";
            }
            if (doCalculate.Median || doCalculate.BoxWhiskerPlot)
            {
                Median = "MEDIAN(" + Name + ")";
            }
            if (doCalculate.Quartile3 || doCalculate.InterquartileRange || doCalculate.BoxWhiskerPlot)
            {
                Quartile3 = "QUARTILE.INC(" + Name + ",3)";
            }
            if (doCalculate.Maximum || doCalculate.Range || doCalculate.BoxWhiskerPlot || doCalculate.Histogram)
            {
                Maximum = "MAX(" + Name + ")";
            }
            if (doCalculate.InterquartileRange || doCalculate.BoxWhiskerPlot)
            {
                InterquartileRange = Quartile3 + "-" + Quartile1;
            }
            if (doCalculate.Skewness)
            {
                Skewness = "SKEW(" + Name + ")";
            }
            if (doCalculate.Kurtosis)
            {
                Kurtosis = "KURT(" + Name + ")";
            }
            if (doCalculate.MeanAbsoluteDeviation)
            {
                MeanAbsoluteDeviation = "AVEDEV(" + Name + ")";
            }
            if (doCalculate.Mode)
            {
                Mode = "MODE.SNGL(" + Name + ")";
                try
                {
                    Globals.ExcelAddIn.Application.WorksheetFunction.Mode_Sngl(range);
                    HasMode = true;
                }
                catch
                {
                    HasMode = false;
                }
            }
            if (doCalculate.Range || doCalculate.Histogram)
            {
                Range = Maximum + "-" + Minimum;
            }
            if (doCalculate.Count || doCalculate.MeanConfidenceInterval || doCalculate.StandardDeviationConfidenceInterval || doCalculate.Histogram)
            {
                Count = "COUNT(" + Name + ")";
            }
            if (doCalculate.Sum)
            {
                Sum = "SUM(" + Name + ")";
            }
            if (doCalculate.Outliers || doCalculate.BoxWhiskerPlot)
            {
                Outliers = new List <string>();
                for (var i = 1; i <= range.Rows.Count; i++)
                {
                    Outliers.Add("IF(OR(INDEX(" + Name + "," + i + ")<" + Quartile1 + "-1.5*(" + InterquartileRange + "),INDEX(" + Name + "," + i + ")>" + Quartile3 + "+1.5*(" + InterquartileRange + ")),IF(INDEX(" + Name + "," + i + ")<>\"\",INDEX(" + Name + "," + i + "),NA()),NA())");
                }
            }

            // Box-Whisker Plot
            if (doCalculate.BoxWhiskerPlot)
            {
                Quartile1Median = Median + "-" + Quartile1;
                MedianQuartile3 = Quartile3 + "-" + Median;
                MinusWhisker    = "IF(" + Quartile1 + "-" + Minimum + "<=1.5*(" + InterquartileRange + ")," + Quartile1 + "-" + Minimum + ",1.5*(" + InterquartileRange + "))";
                PlusWhisker     = "IF(" + Maximum + "-" + Quartile3 + "<=1.5*(" + InterquartileRange + ")," + Maximum + "-" + Quartile3 + ",1.5*(" + InterquartileRange + "))";
            }

            // Confidence Interval
            if (doCalculate.MeanConfidenceInterval || doCalculate.StandardDeviationConfidenceInterval)
            {
                DegreesOfFreedom = Count + "-1";
            }
            if (doCalculate.MeanConfidenceInterval)
            {
                MeanConfidenceLevel    = meanConfidenceLevel / 100.0;
                MeanConfidenceInterval = "CONFIDENCE.T(" + MeanAlpha + "," + StandardDeviation + "," + Count + ")";
                MeanLowerLimit         = Mean + "-" + MeanConfidenceInterval;
                MeanUpperLimit         = Mean + "+" + MeanConfidenceInterval;
            }
            if (doCalculate.StandardDeviationConfidenceInterval)
            {
                StandardDeviationConfidenceLevel = standardDeviationConfidenceLevel / 100.0;
                StandardDeviationConfidenceIntervalLowerLimit = "CHISQ.INV(1-(" + StandardDeviationAlpha + ")/2," + DegreesOfFreedom + ")";
                StandardDeviationConfidenceIntervalUpperLimit = "CHISQ.INV((" + StandardDeviationAlpha + ")/2," + DegreesOfFreedom + ")";
                StandardDeviationLowerLimit = StandardDeviation + "*SQRT((" + DegreesOfFreedom + ")/" + StandardDeviationConfidenceIntervalLowerLimit + ")";
                StandardDeviationUpperLimit = StandardDeviation + "*SQRT((" + DegreesOfFreedom + ")/" + StandardDeviationConfidenceIntervalUpperLimit + ")";
            }

            // Histogram
            if (doCalculate.Histogram)
            {
                NumberOfBins = numberOfBins < 0 ? (int)function.RoundUp(Math.Sqrt(function.Count(range)), 0) : numberOfBins;
                BinRange     = "ROUND((" + Range + ")/" + NumberOfBins + ",0)";
            }
        }