Ejemplo n.º 1
0
        /// <summary>
        /// This method reads the training data and calculates a variety of statistical data about each column.
        /// </summary>
        public static void UnderstandData(DataSetColumns dataSetColumns)
        {
            //Calculate and output the statistics for your pleasure
            var stats = MetricStatistics.CalculateMetricStatisticsAsync(dataSetColumns.CPUCount, DateTime.Now, TimeSpan.FromSeconds(1)).Result;

            Console.WriteLine("CPUCount Statistics:");
            Console.WriteLine(stats.ToString());
            stats = MetricStatistics.CalculateMetricStatisticsAsync(dataSetColumns.TotalMemory, DateTime.Now, TimeSpan.FromSeconds(1)).Result;
            Console.WriteLine("TotalMemory Statistics:");
            Console.WriteLine(stats.ToString());
            stats = MetricStatistics.CalculateMetricStatisticsAsync(dataSetColumns.SystemUpTime, DateTime.Now, TimeSpan.FromSeconds(1)).Result;
            Console.WriteLine("SystemUpTime Statistics:");
            Console.WriteLine(stats.ToString());
            stats = MetricStatistics.CalculateMetricStatisticsAsync(dataSetColumns.SumTriggers, DateTime.Now, TimeSpan.FromSeconds(1)).Result;
            Console.WriteLine("SumTriggers Statistics:");
            Console.WriteLine(stats.ToString());
        }
        /// <summary>
        /// Computes statistics for a data serie (metric)
        /// </summary>
        /// <param name="values">Serie of data</param>
        /// <param name="startTime">Begining of the time period the statistics are computed for</param>
        /// <param name="duration">Length of the time period the statistics are computed for</param>
        /// <returns>Computed statistics</returns>
        public static Task <MetricStatistics> CalculateMetricStatisticsAsync(double[] values, DateTime startTime, TimeSpan duration)
        {
            if (values == null || values.Length < RequiredValuesCountForComputation)
            {
                throw new ArgumentException($"{nameof(values)} is expected to be not null and have a length >= {RequiredValuesCountForComputation}.");
            }

            return(Task.Run(() =>
            {
                var retval = new MetricStatistics
                {
                    StartTime = startTime,
                    Duration = duration
                };

                var sortedArray = new double[values.Length];
                Array.Copy(values, sortedArray, values.Length);
                Array.Sort(sortedArray);

                var slopeIntercept = CalculateSlope(values);

                retval.Slope = slopeIntercept.Item1;
                retval.SlopeYIntercept = slopeIntercept.Item2;
                retval.MeanSquaredError = slopeIntercept.Item3;

                retval.Maximum = sortedArray[sortedArray.Length - 1];
                retval.Minimum = sortedArray[0];
                retval.Count = sortedArray.Length;
                retval.Sum = sortedArray.Sum();
                retval.Mean = retval.Sum / retval.Count;

                var quartiles = GetQuartiles(sortedArray);
                retval.Median = quartiles.Item2;
                retval.FirstQuartile = quartiles.Item1;
                retval.ThirdQuartile = quartiles.Item3;

                var stdDevSum = sortedArray.Sum(i => Math.Pow(i - retval.Mean, 2));
                retval.VarianceSum = stdDevSum;
                retval.Variance = stdDevSum / (retval.Count - 1);
                retval.StandardDeviation = Math.Sqrt(retval.Variance);
                retval.GesdValue = retval.StandardDeviation < double.Epsilon
                    ? 0.0
                    : Math.Max(retval.Mean - retval.Minimum, retval.Maximum - retval.Mean) / retval.StandardDeviation;

                var skewnessValue = values.Sum(d => Math.Pow(d - retval.Mean, 3));
                var kurtosisValue = values.Sum(d => Math.Pow(d - retval.Mean, 4));
                retval.Kurtosis = kurtosisValue / retval.Count / Math.Pow(retval.Variance, 2) - 3;
                retval.Skewness = Math.Sqrt((long)retval.Count * ((long)retval.Count - 1)) / (retval.Count - 2) *
                                  (skewnessValue / retval.Count) / Math.Pow(retval.StandardDeviation, 3);

                //sorted array will now contain sorted values of absolute deviation
                for (int i = 0; i < sortedArray.Length; i++)
                {
                    sortedArray[i] = Math.Abs(sortedArray[i] - retval.Median);
                }

                Array.Sort(sortedArray);

                retval.MedianAbsoluteDeviation = MedianOfSortedArrayRange(sortedArray, 0, sortedArray.Length - 1);

                retval.Cardinality = values.Distinct().Count();
                retval.CardinalityRatio = (double)retval.Cardinality / retval.Count;

                return retval;
            }));
        }
Ejemplo n.º 3
0
        private static void EvaluateModel(PredictionModel <NodeData, ClusterPrediction> model)
        {
            Console.WriteLine("Let's evaluate the model!");
            // How do our clusters look
            var testResults      = GetTestingData();
            var clusterDataSets  = new Dictionary <int, DataSetLists>();
            var clusterDistances = new Dictionary <uint, Dictionary <int, List <double> > >();

            for (int i = 0; i < testResults.Length; i++)
            {
                // View the cluster for each value in the data
                var prediction = model.Predict(testResults[i]);
                Console.WriteLine($"{testResults[i].Vendor} = Cluster: {prediction.PredictedClusterId} Distances: {string.Join(" ", prediction.Distances)}");
                // How would we evaluate our clusters to draw conclusions?
                if (!clusterDataSets.ContainsKey((int)prediction.PredictedClusterId))
                {
                    clusterDataSets[(int)prediction.PredictedClusterId] = new DataSetLists();
                }
                clusterDataSets[(int)prediction.PredictedClusterId].CPUCount.Add(testResults[i].CPUCount);
                clusterDataSets[(int)prediction.PredictedClusterId].SumTriggers.Add(testResults[i].SumTriggers);
                clusterDataSets[(int)prediction.PredictedClusterId].SystemUpTime.Add(testResults[i].SystemUpTime);
                clusterDataSets[(int)prediction.PredictedClusterId].TotalMemory.Add(testResults[i].TotalMemory);

                // Analyze the distances within clusters and between clusters
                if (!clusterDistances.ContainsKey(prediction.PredictedClusterId))
                {
                    clusterDistances[prediction.PredictedClusterId] = new Dictionary <int, List <double> >();
                }
                for (int j = 0; j < prediction.Distances.Length; j++)
                {
                    if (!clusterDistances[prediction.PredictedClusterId].ContainsKey(j))
                    {
                        clusterDistances[prediction.PredictedClusterId][j] = new List <double>();
                    }
                    clusterDistances[prediction.PredictedClusterId][j].Add(prediction.Distances[j]);
                }
            }

            Console.WriteLine("------------- Statistical Analysis of Clusters ---------------");
            foreach (var item in clusterDistances)
            {
                Console.WriteLine($"Analysis of cluster {item.Key}");
                foreach (var cluster in item.Value)
                {
                    var stats = MetricStatistics.CalculateMetricStatisticsAsync(cluster.Value.ToArray(), DateTime.Now, TimeSpan.FromSeconds(1)).Result;
                    if ((item.Key - 1) == cluster.Key)
                    {
                        Console.WriteLine($"Statistics for items within the cluster {item.Key}:");
                    }
                    else
                    {
                        Console.WriteLine($"Statistics for items from {item.Key} to cluster {cluster.Key + 1}");
                    }
                    Console.WriteLine(stats.ToString());
                }
            }

            Console.WriteLine();
            Console.WriteLine("------------- Statistical Analysis of Model ---------------");
            foreach (var item in clusterDataSets)
            {
                Console.WriteLine($"#################### Cluster: {item.Key}");
                UnderstandData(item.Value.GetDataSetColumns());
            }
        }