public void HistogramBox_ShowTest1()
        {
            // Generate some normally distributed samples
            double[] data = NormalDistribution.Standard.Generate(100);

            HistogramBox.Show(data).Hold();
        }
        static void Main(string[] args)
        {
            Console.SetWindowSize(100, 50);

            // Read in the OHLC dataset
            // TODO: change the path to point to your data directory
            string dataDirPath = @"<path-to-your-data-dir>";

            // Load the OHLC data into a data frame
            string ohlcDataPath = Path.Combine(dataDirPath, "eurusd-daily-ohlc.csv");

            Console.WriteLine("Loading {0}\n", ohlcDataPath);
            var ohlcDF = Frame.ReadCsv(
                ohlcDataPath,
                hasHeaders: true,
                inferTypes: true
                );

            // Time-series line chart of close prices
            var closePriceLineChart = DataSeriesBox.Show(
                ohlcDF.RowKeys.Select(x => (double)x),
                ohlcDF.GetColumn <double>("Close").ValuesAll
                );

            System.Threading.Thread.Sleep(3000);
            closePriceLineChart.Invoke(
                new Action(() =>
            {
                closePriceLineChart.Size = new System.Drawing.Size(700, 500);
            })
                );

            // Time-series line chart of daily returns
            var dailyReturnLineChart = DataSeriesBox.Show(
                ohlcDF.RowKeys.Select(x => (double)x),
                ohlcDF.FillMissing(0.0)["DailyReturn"].ValuesAll
                );

            System.Threading.Thread.Sleep(3000);
            dailyReturnLineChart.Invoke(
                new Action(() =>
            {
                dailyReturnLineChart.Size = new System.Drawing.Size(700, 500);
            })
                );

            var dailyReturnHistogram = HistogramBox
                                       .Show(
                ohlcDF.FillMissing(0.0)["DailyReturn"].ValuesAll.ToArray()
                )
                                       .SetNumberOfBins(20);

            System.Threading.Thread.Sleep(3000);
            dailyReturnHistogram.Invoke(
                new Action(() =>
            {
                dailyReturnHistogram.Size = new System.Drawing.Size(700, 500);
            })
                );

            // Check the distribution of daily returns
            double returnMax    = ohlcDF["DailyReturn"].Max();
            double returnMean   = ohlcDF["DailyReturn"].Mean();
            double returnMedian = ohlcDF["DailyReturn"].Median();
            double returnMin    = ohlcDF["DailyReturn"].Min();
            double returnStdDev = ohlcDF["DailyReturn"].StdDev();

            double[] quantiles = Accord.Statistics.Measures.Quantiles(
                ohlcDF.FillMissing(0.0)["DailyReturn"].ValuesAll.ToArray(),
                new double[] { 0.25, 0.5, 0.75 }
                );

            Console.WriteLine("-- DailyReturn Distribution-- ");

            Console.WriteLine("Mean: \t\t\t{0:0.00}\nStdDev: \t\t{1:0.00}\n", returnMean, returnStdDev);

            Console.WriteLine(
                "Min: \t\t\t{0:0.00}\nQ1 (25% Percentile): \t{1:0.00}\nQ2 (Median): \t\t{2:0.00}\nQ3 (75% Percentile): \t{3:0.00}\nMax: \t\t\t{4:0.00}",
                returnMin, quantiles[0], quantiles[1], quantiles[2], returnMax
                );

            Console.WriteLine("\nDONE!!!");
            Console.ReadKey();
        }
        static void Main(string[] args)
        {
            Console.SetWindowSize(100, 55);

            // Read in the Credit Card Fraud dataset
            // TODO: change the path to point to your data directory
            string dataDirPath = @"\\Mac\Home\Documents\c-sharp-machine-learning\ch.10\input-data";

            // Load the data into a data frame
            string dataPath = Path.Combine(dataDirPath, "creditcard.csv");

            Console.WriteLine("Loading {0}\n\n", dataPath);
            var df = Frame.ReadCsv(
                dataPath,
                hasHeaders: true,
                inferTypes: true
                );

            Console.WriteLine("* Shape: {0}, {1}\n\n", df.RowCount, df.ColumnCount);

            // Target variable distribution
            var targetVarCount = df.AggregateRowsBy <string, int>(
                new string[] { "Class" },
                new string[] { "V1" },
                x => x.ValueCount
                ).SortRows("V1");

            targetVarCount.RenameColumns(new string[] { "is_fraud", "count" });

            targetVarCount.Print();

            DataBarBox.Show(
                targetVarCount.GetColumn <string>("is_fraud").Values.ToArray(),
                targetVarCount["count"].Values.ToArray()
                ).SetTitle(
                "Counts by Target Class"
                );

            // Feature distributions
            HistogramBox.CheckForIllegalCrossThreadCalls = false;

            foreach (string col in df.ColumnKeys)
            {
                if (col.Equals("Class") || col.Equals("Time"))
                {
                    continue;
                }

                double[] values = df[col].DropMissing().ValuesAll.ToArray();
                // Compute Quartiles
                Console.WriteLine(String.Format("\n\n-- {0} Distribution -- ", col));
                double[] quartiles = Accord.Statistics.Measures.Quantiles(
                    values,
                    new double[] { 0, 0.25, 0.5, 0.75, 1.0 }
                    );
                Console.WriteLine(
                    "Min: \t\t\t{0:0.00}\nQ1 (25% Percentile): \t{1:0.00}\nQ2 (Median): \t\t{2:0.00}\nQ3 (75% Percentile): \t{3:0.00}\nMax: \t\t\t{4:0.00}",
                    quartiles[0], quartiles[1], quartiles[2], quartiles[3], quartiles[4]
                    );
                // Visualize Distributions
                HistogramBox.Show(
                    values,
                    title: col
                    )
                .SetNumberOfBins(50);
            }

            // Target Var Distributions on 2-dimensional feature space
            double[][] data = BuildJaggedArray(
                df.ToArray2D <double>(), df.RowCount, df.ColumnCount
                );
            int[] labels = df.GetColumn <int>("Class").ValuesAll.ToArray();

            double[][] first2Components = data.Select(
                x => x.Where((y, i) => i < 2
                             ).ToArray()).ToArray();
            ScatterplotBox.Show("Feature #1 vs. Feature #2", first2Components, labels);

            double[][] next2Components = data.Select(
                x => x.Where((y, i) => i >= 1 && i <= 2).ToArray()
                ).ToArray();
            ScatterplotBox.Show("Feature #2 vs. Feature #3", next2Components, labels);

            next2Components = data.Select(
                x => x.Where((y, i) => i >= 2 && i <= 3).ToArray()
                ).ToArray();
            ScatterplotBox.Show("Feature #3 vs. Feature #4", next2Components, labels);

            Console.WriteLine("\n\n\n\n\nDONE!!!");
            Console.ReadKey();
        }
        static void Main(string[] args)
        {
            Console.SetWindowSize(100, 50);

            // Read in the House Price dataset
            // TODO: change the path to point to your data directory
            string dataDirPath = @"\\Mac\Home\Documents\c-sharp-machine-learning\ch.5\input-data";

            // Load the data into a data frame
            string dataPath = Path.Combine(dataDirPath, "train.csv");

            Console.WriteLine("Loading {0}\n", dataPath);
            var houseDF = Frame.ReadCsv(
                dataPath,
                hasHeaders: true,
                inferTypes: true
                );

            // Categorical Variable #1: Building Type
            Console.WriteLine("\nCategorical Variable #1: Building Type");
            var buildingTypeDistribution = houseDF.GetColumn <string>(
                "BldgType"
                ).GroupBy <string>(x => x.Value).Select(x => (double)x.Value.KeyCount);

            buildingTypeDistribution.Print();

            var buildingTypeBarChart = DataBarBox.Show(
                buildingTypeDistribution.Keys.ToArray(),
                buildingTypeDistribution.Values.ToArray()
                );

            buildingTypeBarChart.SetTitle("Building Type Distribution (Categorical)");
            System.Threading.Thread.Sleep(3000);
            buildingTypeBarChart.Invoke(
                new Action(() =>
            {
                buildingTypeBarChart.Size = new System.Drawing.Size(1000, 700);
            })
                );

            // Categorical Variable #2: Lot Configuration
            Console.WriteLine("\nCategorical Variable #1: Building Type");
            var lotConfigDistribution = houseDF.GetColumn <string>(
                "LotConfig"
                ).GroupBy <string>(x => x.Value).Select(x => (double)x.Value.KeyCount);

            lotConfigDistribution.Print();

            var lotConfigBarChart = DataBarBox.Show(
                lotConfigDistribution.Keys.ToArray(),
                lotConfigDistribution.Values.ToArray()
                );

            lotConfigBarChart.SetTitle("Lot Configuration Distribution (Categorical)");
            System.Threading.Thread.Sleep(3000);
            lotConfigBarChart.Invoke(
                new Action(() =>
            {
                lotConfigBarChart.Size = new System.Drawing.Size(1000, 700);
            })
                );

            // Ordinal Categorical Variable #1: Overall material and finish of the house
            Console.WriteLine("\nOrdinal Categorical #1: Overall material and finish of the house");
            var overallQualDistribution = houseDF.GetColumn <string>(
                "OverallQual"
                ).GroupBy <int>(
                x => Convert.ToInt32(x.Value)
                ).Select(
                x => (double)x.Value.KeyCount
                ).SortByKey().Reversed;

            overallQualDistribution.Print();

            var overallQualBarChart = DataBarBox.Show(
                overallQualDistribution.Keys.Select(x => x.ToString()),
                overallQualDistribution.Values.ToArray()
                );

            overallQualBarChart.SetTitle("Overall House Quality Distribution (Ordinal)");
            System.Threading.Thread.Sleep(3000);
            overallQualBarChart.Invoke(
                new Action(() =>
            {
                overallQualBarChart.Size = new System.Drawing.Size(1000, 700);
            })
                );

            // Ordinal Categorical Variable #2: Exterior Quality
            Console.WriteLine("\nOrdinal Categorical #2: Exterior Quality");
            var exteriorQualDistribution = houseDF.GetColumn <string>(
                "ExterQual"
                ).GroupBy <string>(x => x.Value).Select(
                x => (double)x.Value.KeyCount
                )[new string[] { "Ex", "Gd", "TA", "Fa" }];

            exteriorQualDistribution.Print();

            var exteriorQualBarChart = DataBarBox.Show(
                exteriorQualDistribution.Keys.Select(x => x.ToString()),
                exteriorQualDistribution.Values.ToArray()
                );

            exteriorQualBarChart.SetTitle("Exterior Quality Distribution (Ordinal)");
            System.Threading.Thread.Sleep(3000);
            exteriorQualBarChart.Invoke(
                new Action(() =>
            {
                exteriorQualBarChart.Size = new System.Drawing.Size(1000, 700);
            })
                );

            HistogramBox.CheckForIllegalCrossThreadCalls = false;

            // Continuous Variable #1-1: First Floor Square Feet
            var firstFloorHistogram = HistogramBox
                                      .Show(
                houseDF.DropSparseRows()["1stFlrSF"].ValuesAll.ToArray(),
                title: "First Floor Square Feet (Continuous)"
                )
                                      .SetNumberOfBins(20);

            System.Threading.Thread.Sleep(3000);
            firstFloorHistogram.Invoke(
                new Action(() =>
            {
                firstFloorHistogram.Size = new System.Drawing.Size(1000, 700);
            })
                );

            // Continuous Variable #1-2: Log of First Floor Square Feet
            var logFirstFloorHistogram = HistogramBox
                                         .Show(
                houseDF.DropSparseRows()["1stFlrSF"].Log().ValuesAll.ToArray(),
                title: "First Floor Square Feet - Log Transformed (Continuous)"
                )
                                         .SetNumberOfBins(20);

            System.Threading.Thread.Sleep(3000);
            logFirstFloorHistogram.Invoke(
                new Action(() =>
            {
                logFirstFloorHistogram.Size = new System.Drawing.Size(1000, 700);
            })
                );

            // Continuous Variable #2-1: Size of garage in square feet
            var garageHistogram = HistogramBox
                                  .Show(
                houseDF.DropSparseRows()["GarageArea"].ValuesAll.ToArray(),
                title: "Size of garage in square feet (Continuous)"
                )
                                  .SetNumberOfBins(20);

            System.Threading.Thread.Sleep(3000);
            garageHistogram.Invoke(
                new Action(() =>
            {
                garageHistogram.Size = new System.Drawing.Size(1000, 700);
            })
                );

            // Continuous Variable #2-2: Log of Value of miscellaneous feature
            var logGarageHistogram = HistogramBox
                                     .Show(
                houseDF.DropSparseRows()["GarageArea"].Log().ValuesAll.ToArray(),
                title: "Size of garage in square feet - Log Transformed (Continuous)"
                )
                                     .SetNumberOfBins(20);

            System.Threading.Thread.Sleep(3000);
            logGarageHistogram.Invoke(
                new Action(() =>
            {
                logGarageHistogram.Size = new System.Drawing.Size(1000, 700);
            })
                );

            // Target Variable: Sale Price
            var salePriceHistogram = HistogramBox
                                     .Show(
                houseDF.DropSparseRows()["SalePrice"].ValuesAll.ToArray(),
                title: "Sale Price (Continuous)"
                )
                                     .SetNumberOfBins(20);

            System.Threading.Thread.Sleep(3000);
            salePriceHistogram.Invoke(
                new Action(() =>
            {
                salePriceHistogram.Size = new System.Drawing.Size(1000, 700);
            })
                );

            // Target Variable: Sale Price - Log Transformed
            var logSalePriceHistogram = HistogramBox
                                        .Show(
                houseDF.DropSparseRows()["SalePrice"].Log().ValuesAll.ToArray(),
                title: "Sale Price - Log Transformed (Continuous)"
                )
                                        .SetNumberOfBins(20);

            System.Threading.Thread.Sleep(3000);
            logSalePriceHistogram.Invoke(
                new Action(() =>
            {
                logSalePriceHistogram.Size = new System.Drawing.Size(1000, 700);
            })
                );


            Console.WriteLine("\nDONE!!!");
            Console.ReadKey();
        }
        static void Main(string[] args)
        {
            Console.SetWindowSize(100, 50);

            // Read in the Online Retail dataset
            // TODO: change the path to point to your data directory
            string dataDirPath = @"\\Mac\Home\Documents\c-sharp-machine-learning\ch.6\input-data";

            // Load the data into a data frame
            string dataPath = Path.Combine(dataDirPath, "data-clean.csv");

            Console.WriteLine("Loading {0}\n\n", dataPath);
            var ecommerceDF = Frame.ReadCsv(
                dataPath,
                hasHeaders: true,
                inferTypes: true
                );

            Console.WriteLine("* Shape: {0}, {1}\n\n", ecommerceDF.RowCount, ecommerceDF.ColumnCount);

            // 1. Net Revenue per Customer
            var revPerCustomerDF = ecommerceDF.AggregateRowsBy <double, double>(
                new string[] { "CustomerID" },
                new string[] { "Amount" },
                x => x.Sum()
                );
            // 2. # of Total Transactions per Customer
            var numTransactionsPerCustomerDF = ecommerceDF.AggregateRowsBy <double, double>(
                new string[] { "CustomerID" },
                new string[] { "Quantity" },
                x => x.ValueCount
                );
            // 3. # of Cancelled Transactions per Customer
            var numCancelledPerCustomerDF = ecommerceDF.AggregateRowsBy <double, double>(
                new string[] { "CustomerID" },
                new string[] { "Quantity" },
                x => x.Select(y => y.Value >= 0 ? 0.0 : 1.0).Sum()
                );
            // 4. Average UnitPrice per Customer
            var avgUnitPricePerCustomerDF = ecommerceDF.AggregateRowsBy <double, double>(
                new string[] { "CustomerID" },
                new string[] { "UnitPrice" },
                x => x.Sum() / x.ValueCount
                );
            // 5. Average Quantity per Customer
            var avgQuantityPerCustomerDF = ecommerceDF.AggregateRowsBy <double, double>(
                new string[] { "CustomerID" },
                new string[] { "Quantity" },
                x => x.Sum() / x.ValueCount
                );

            // Aggregate all results
            var featuresDF = Frame.CreateEmpty <int, string>();

            featuresDF.AddColumn("CustomerID", revPerCustomerDF.GetColumn <double>("CustomerID"));
            featuresDF.AddColumn("Description", ecommerceDF.GetColumn <string>("Description"));
            featuresDF.AddColumn("NetRevenue", revPerCustomerDF.GetColumn <double>("Amount"));
            featuresDF.AddColumn("NumTransactions", numTransactionsPerCustomerDF.GetColumn <double>("Quantity"));
            featuresDF.AddColumn("NumCancelled", numCancelledPerCustomerDF.GetColumn <double>("Quantity"));
            featuresDF.AddColumn("AvgUnitPrice", avgUnitPricePerCustomerDF.GetColumn <double>("UnitPrice"));
            featuresDF.AddColumn("AvgQuantity", avgQuantityPerCustomerDF.GetColumn <double>("Quantity"));
            featuresDF.AddColumn("PercentageCancelled", featuresDF["NumCancelled"] / featuresDF["NumTransactions"]);

            Console.WriteLine("\n\n* Feature Set:");
            featuresDF.Print();

            // NetRevenue feature distribution
            PrintQuartiles(featuresDF, "NetRevenue");
            // NumTransactions feature distribution
            PrintQuartiles(featuresDF, "NumTransactions");
            // AvgUnitPrice feature distribution
            PrintQuartiles(featuresDF, "AvgUnitPrice");
            // AvgQuantity feature distribution
            PrintQuartiles(featuresDF, "AvgQuantity");
            // PercentageCancelled feature distribution
            PrintQuartiles(featuresDF, "PercentageCancelled");
            Console.WriteLine("\n\n* Feature DF Shape: ({0}, {1})", featuresDF.RowCount, featuresDF.ColumnCount);

            // 1. Drop Customers with Negative NetRevenue
            featuresDF = featuresDF.Rows[
                featuresDF["NetRevenue"].Where(x => x.Value >= 0.0).Keys
                         ];
            // 2. Drop Customers with Negative AvgQuantity
            featuresDF = featuresDF.Rows[
                featuresDF["AvgQuantity"].Where(x => x.Value >= 0.0).Keys
                         ];
            // 3. Drop Customers who have more cancel orders than purchase orders
            featuresDF = featuresDF.Rows[
                featuresDF["PercentageCancelled"].Where(x => x.Value < 0.5).Keys
                         ];

            Console.WriteLine("\n\n\n\n* After dropping customers with potential orphan cancel orders:");
            // NetRevenue feature distribution
            PrintQuartiles(featuresDF, "NetRevenue");
            // NumTransactions feature distribution
            PrintQuartiles(featuresDF, "NumTransactions");
            // AvgUnitPrice feature distribution
            PrintQuartiles(featuresDF, "AvgUnitPrice");
            // AvgQuantity feature distribution
            PrintQuartiles(featuresDF, "AvgQuantity");
            // PercentageCancelled feature distribution
            PrintQuartiles(featuresDF, "PercentageCancelled");
            Console.WriteLine("\n\n* Feature DF Shape: ({0}, {1})", featuresDF.RowCount, featuresDF.ColumnCount);

            HistogramBox.CheckForIllegalCrossThreadCalls = false;
            HistogramBox
            .Show(
                featuresDF.DropSparseRows()["NetRevenue"].ValuesAll.ToArray(),
                title: "NetRevenue Distribution"
                )
            .SetNumberOfBins(50);
            HistogramBox
            .Show(
                featuresDF.DropSparseRows()["NumTransactions"].ValuesAll.ToArray(),
                title: "NumTransactions Distribution"
                )
            .SetNumberOfBins(50);
            HistogramBox
            .Show(
                featuresDF.DropSparseRows()["AvgUnitPrice"].ValuesAll.ToArray(),
                title: "AvgUnitPrice Distribution"
                )
            .SetNumberOfBins(50);
            HistogramBox
            .Show(
                featuresDF.DropSparseRows()["AvgQuantity"].ValuesAll.ToArray(),
                title: "AvgQuantity Distribution"
                )
            .SetNumberOfBins(50);
            HistogramBox
            .Show(
                featuresDF.DropSparseRows()["PercentageCancelled"].ValuesAll.ToArray(),
                title: "PercentageCancelled Distribution"
                )
            .SetNumberOfBins(50);


            // Create Percentile Features
            featuresDF.AddColumn(
                "NetRevenuePercentile",
                featuresDF["NetRevenue"].Select(
                    x => StatsFunctions.PercentileRank(featuresDF["NetRevenue"].Values.ToArray(), x.Value)
                    )
                );
            featuresDF.AddColumn(
                "NumTransactionsPercentile",
                featuresDF["NumTransactions"].Select(
                    x => StatsFunctions.PercentileRank(featuresDF["NumTransactions"].Values.ToArray(), x.Value)
                    )
                );
            featuresDF.AddColumn(
                "AvgUnitPricePercentile",
                featuresDF["AvgUnitPrice"].Select(
                    x => StatsFunctions.PercentileRank(featuresDF["AvgUnitPrice"].Values.ToArray(), x.Value)
                    )
                );
            featuresDF.AddColumn(
                "AvgQuantityPercentile",
                featuresDF["AvgQuantity"].Select(
                    x => StatsFunctions.PercentileRank(featuresDF["AvgQuantity"].Values.ToArray(), x.Value)
                    )
                );
            featuresDF.AddColumn(
                "PercentageCancelledPercentile",
                featuresDF["PercentageCancelled"].Select(
                    x => StatsFunctions.PercentileRank(featuresDF["PercentageCancelled"].Values.ToArray(), x.Value)
                    )
                );
            Console.WriteLine("\n\n\n* Percentile Features:");
            featuresDF.Columns[
                new string[] { "NetRevenue", "NetRevenuePercentile", "NumTransactions", "NumTransactionsPercentile" }
            ].Print();

            HistogramBox
            .Show(
                featuresDF.DropSparseRows()["NetRevenuePercentile"].ValuesAll.ToArray(),
                title: "NetRevenuePercentile Distribution"
                )
            .SetNumberOfBins(50);
            HistogramBox
            .Show(
                featuresDF.DropSparseRows()["NumTransactionsPercentile"].ValuesAll.ToArray(),
                title: "NumTransactionsPercentile Distribution"
                )
            .SetNumberOfBins(50);
            HistogramBox
            .Show(
                featuresDF.DropSparseRows()["AvgUnitPricePercentile"].ValuesAll.ToArray(),
                title: "AvgUnitPricePercentile Distribution"
                )
            .SetNumberOfBins(50);
            HistogramBox
            .Show(
                featuresDF.DropSparseRows()["AvgQuantityPercentile"].ValuesAll.ToArray(),
                title: "AvgQuantityPercentile Distribution"
                )
            .SetNumberOfBins(50);
            HistogramBox
            .Show(
                featuresDF.DropSparseRows()["PercentageCancelledPercentile"].ValuesAll.ToArray(),
                title: "PercentageCancelledPercentile Distribution"
                )
            .SetNumberOfBins(50);

            string outputPath = Path.Combine(dataDirPath, "features.csv");

            Console.WriteLine("* Exporting features data: {0}", outputPath);
            featuresDF.SaveCsv(outputPath);

            Console.WriteLine("\n\n\n\nDONE!!");
            Console.ReadKey();
        }
Exemple #6
0
        static void Main(string[] args)
        {
            Console.SetWindowSize(100, 60);

            // Read in the Cyber Attack dataset
            // TODO: change the path to point to your data directory
            string dataDirPath = @"\\Mac\Home\Documents\c-sharp-machine-learning\ch.9\input-data";

            // Load the data into a data frame
            string dataPath = Path.Combine(dataDirPath, "pca-transformed-features.csv");
            Console.WriteLine("Loading {0}\n\n", dataPath);
            var featuresDF = Frame.ReadCsv(
                dataPath,
                hasHeaders: false,
                inferTypes: true
            );
            featuresDF.RenameColumns(
                featuresDF.ColumnKeys.Select((x, i) => String.Format("component-{0}", i + 1))
            );

            int[] labels = File.ReadLines(
                Path.Combine(dataDirPath, "pca-transformed-labels.csv")
            ).Select(x => int.Parse(x)).ToArray();
            featuresDF.AddColumn("attack_category", labels);

            Console.WriteLine("* Shape: ({0}, {1})\n\n", featuresDF.RowCount, featuresDF.ColumnCount);

            var count = featuresDF.AggregateRowsBy<string, int>(
                new string[] { "attack_category" },
                new string[] { "component-1" },
                x => x.ValueCount
            ).SortRows("component-1");
            count.RenameColumns(new string[] { "attack_category", "count" });
            count.Print();

            // First 13 components explain about 50% of the variance
            // First 19 components explain about 60% of the variance
            // First 27 components explain about 70% of the variance
            // First 34 components explain about 80% of the variance
            int numComponents = 27;
            string[] cols = featuresDF.ColumnKeys.Where((x, i) => i < numComponents).ToArray();

            // First, compute distances from the center/mean among normal events
            var normalDF = featuresDF.Rows[
                featuresDF["attack_category"].Where(x => x.Value == 0).Keys
            ].Columns[cols];

            double[][] normalData = BuildJaggedArray(
                normalDF.ToArray2D<double>(), normalDF.RowCount, cols.Length
            );
            double[] normalVariances = ComputeVariances(normalData);
            double[] rawDistances = ComputeDistances(normalData, normalVariances);

            // Filter out extreme values
            int[] idxFiltered = Matrix.ArgSort(rawDistances)
                .Where((x, i) =>  i < rawDistances.Length * 0.99).ToArray();
            double[] distances = rawDistances.Where((x, i) => idxFiltered.Contains(i)).ToArray();

            double meanDistance = distances.Average();
            double stdDistance = Math.Sqrt(
                distances
                .Select(x => Math.Pow(x - meanDistance, 2))
                .Sum() / distances.Length
            );

            Console.WriteLine(
                "\n\n* Normal - mean: {0:0.0000}, std: {1:0.0000}",
                meanDistance, stdDistance
            );

            HistogramBox.CheckForIllegalCrossThreadCalls = false;

            HistogramBox.Show(
                distances,
                title: "Distances"
            )
            .SetNumberOfBins(50);

            // Detection
            var attackDF = featuresDF.Rows[
                featuresDF["attack_category"].Where(x => x.Value > 0).Keys
            ].Columns[cols];

            double[][] attackData = BuildJaggedArray(
                attackDF.ToArray2D<double>(), attackDF.RowCount, cols.Length
            );
            double[] attackDistances = ComputeDistances(attackData, normalVariances);
            int[] attackLabels = featuresDF.Rows[
                featuresDF["attack_category"].Where(x => x.Value > 0).Keys
            ].GetColumn<int>("attack_category").ValuesAll.ToArray();

            // 5-10% false alarm rate
            for (int i = 4; i < 10; i++)
            {
                double targetFalseAlarmRate = 0.01 * (i + 1);
                double threshold = Accord.Statistics.Measures.Quantile(
                    distances,
                    1 - targetFalseAlarmRate
                );
                Console.WriteLine(threshold);
                int[] detected = attackDistances.Select(x => x > threshold ? 1 : 0).ToArray();

                EvaluateResults(attackLabels, detected, targetFalseAlarmRate);
            }

            Console.WriteLine("\n\n\n\n\nDONE!!!");
            Console.ReadKey();
        }