static void Main(string[] args)
        {
            Console.SetWindowSize(100, 50);

            // Read in the OHLC dataset
            // TODO: change the path to point to your data directory
            string dataDirPath = @"<path-to-your-data-dir>";

            // Load the OHLC data into a data frame
            string ohlcDataPath = Path.Combine(dataDirPath, "eurusd-daily-ohlc.csv");

            Console.WriteLine("Loading {0}\n", ohlcDataPath);
            var ohlcDF = Frame.ReadCsv(
                ohlcDataPath,
                hasHeaders: true,
                inferTypes: true
                );

            // Time-series line chart of close prices
            var closePriceLineChart = DataSeriesBox.Show(
                ohlcDF.RowKeys.Select(x => (double)x),
                ohlcDF.GetColumn <double>("Close").ValuesAll
                );

            System.Threading.Thread.Sleep(3000);
            closePriceLineChart.Invoke(
                new Action(() =>
            {
                closePriceLineChart.Size = new System.Drawing.Size(700, 500);
            })
                );

            // Time-series line chart of daily returns
            var dailyReturnLineChart = DataSeriesBox.Show(
                ohlcDF.RowKeys.Select(x => (double)x),
                ohlcDF.FillMissing(0.0)["DailyReturn"].ValuesAll
                );

            System.Threading.Thread.Sleep(3000);
            dailyReturnLineChart.Invoke(
                new Action(() =>
            {
                dailyReturnLineChart.Size = new System.Drawing.Size(700, 500);
            })
                );

            var dailyReturnHistogram = HistogramBox
                                       .Show(
                ohlcDF.FillMissing(0.0)["DailyReturn"].ValuesAll.ToArray()
                )
                                       .SetNumberOfBins(20);

            System.Threading.Thread.Sleep(3000);
            dailyReturnHistogram.Invoke(
                new Action(() =>
            {
                dailyReturnHistogram.Size = new System.Drawing.Size(700, 500);
            })
                );

            // Check the distribution of daily returns
            double returnMax    = ohlcDF["DailyReturn"].Max();
            double returnMean   = ohlcDF["DailyReturn"].Mean();
            double returnMedian = ohlcDF["DailyReturn"].Median();
            double returnMin    = ohlcDF["DailyReturn"].Min();
            double returnStdDev = ohlcDF["DailyReturn"].StdDev();

            double[] quantiles = Accord.Statistics.Measures.Quantiles(
                ohlcDF.FillMissing(0.0)["DailyReturn"].ValuesAll.ToArray(),
                new double[] { 0.25, 0.5, 0.75 }
                );

            Console.WriteLine("-- DailyReturn Distribution-- ");

            Console.WriteLine("Mean: \t\t\t{0:0.00}\nStdDev: \t\t{1:0.00}\n", returnMean, returnStdDev);

            Console.WriteLine(
                "Min: \t\t\t{0:0.00}\nQ1 (25% Percentile): \t{1:0.00}\nQ2 (Median): \t\t{2:0.00}\nQ3 (75% Percentile): \t{3:0.00}\nMax: \t\t\t{4:0.00}",
                returnMin, quantiles[0], quantiles[1], quantiles[2], returnMax
                );

            Console.WriteLine("\nDONE!!!");
            Console.ReadKey();
        }
        static void Main(string[] args)
        {
            Console.SetWindowSize(100, 60);

            // Read in the Image Features dataset
            // TODO: change the path to point to your data directory
            string dataDirPath = @"\\Mac\Home\Documents\c-sharp-machine-learning\ch.8\input-data";

            // Load the data into a data frame
            string dataPath = Path.Combine(dataDirPath, "train.csv");

            Console.WriteLine("Loading {0}\n\n", dataPath);
            var featuresDF = Frame.ReadCsv(
                dataPath,
                hasHeaders: true,
                inferTypes: true
                );

            Console.WriteLine("* Shape: {0}, {1}\n\n", featuresDF.RowCount, featuresDF.ColumnCount);

            double trainSetProportiona = 0.7;

            var rnd      = new Random();
            var trainIdx = featuresDF.RowKeys.Where((x, i) => rnd.NextDouble() <= trainSetProportiona);
            var testIdx  = featuresDF.RowKeys.Where((x, i) => !trainIdx.Contains(i));

            var trainset = featuresDF.Rows[trainIdx];
            var testset  = featuresDF.Rows[testIdx];

            var trainLabels = trainset.GetColumn <int>("label").Values.ToArray();

            string[] nonZeroPixelCols = trainset.ColumnKeys.Where(x => trainset[x].Max() > 0 && !x.Equals("label")).ToArray();

            double[][] data = trainset.Columns[nonZeroPixelCols].Rows.Select(
                x => Array.ConvertAll <object, double>(x.Value.ValuesAll.ToArray(), o => Convert.ToDouble(o))
                ).ValuesAll.ToArray();

            Console.WriteLine("* Shape: {0}, {1}\n\n", data.Length, data[0].Length);

            var digitCount = trainset.AggregateRowsBy <string, int>(
                new string[] { "label" },
                new string[] { "pixel0" },
                x => x.ValueCount
                ).SortRows("pixel0");

            digitCount.Print();

            var barChart = DataBarBox.Show(
                digitCount.GetColumn <string>("label").Values.ToArray(),
                digitCount["pixel0"].Values.ToArray()
                ).SetTitle(
                "Train Set - Digit Count"
                );

            digitCount = testset.AggregateRowsBy <string, int>(
                new string[] { "label" },
                new string[] { "pixel0" },
                x => x.ValueCount
                ).SortRows("pixel0");

            digitCount.Print();

            barChart = DataBarBox.Show(
                digitCount.GetColumn <string>("label").Values.ToArray(),
                digitCount["pixel0"].Values.ToArray()
                ).SetTitle(
                "Test Set - Digit Count"
                );

            var pca = new PrincipalComponentAnalysis(
                PrincipalComponentMethod.Standardize
                );

            pca.Learn(data);

            double[][] transformed      = pca.Transform(data);
            double[][] first2Components = transformed.Select(x => x.Where((y, i) => i < 2).ToArray()).ToArray();
            ScatterplotBox.Show("Component #1 vs. Component #2", first2Components, trainLabels);

            DataSeriesBox.Show(
                pca.Components.Select((x, i) => (double)i),
                pca.Components.Select(x => x.CumulativeProportion)
                ).SetTitle("Explained Variance");
            System.IO.File.WriteAllLines(
                Path.Combine(dataDirPath, "explained-variance.csv"),
                pca.Components.Select((x, i) => String.Format("{0},{1:0.0000}", i, x.CumulativeProportion))
                );

            Console.WriteLine("exporting train set...");
            var trainTransformed = pca.Transform(
                trainset.Columns[nonZeroPixelCols].Rows.Select(
                    x => Array.ConvertAll <object, double>(x.Value.ValuesAll.ToArray(), o => Convert.ToDouble(o))
                    ).ValuesAll.ToArray()
                );

            System.IO.File.WriteAllLines(
                Path.Combine(dataDirPath, "pca-train.csv"),
                trainTransformed.Select((x, i) => String.Format("{0},{1}", String.Join(",", x), trainset["label"].GetAt(i)))
                );

            Console.WriteLine("exporting test set...");
            var testTransformed = pca.Transform(
                testset.Columns[nonZeroPixelCols].Rows.Select(
                    x => Array.ConvertAll <object, double>(x.Value.ValuesAll.ToArray(), o => Convert.ToDouble(o))
                    ).ValuesAll.ToArray()
                );

            System.IO.File.WriteAllLines(
                Path.Combine(dataDirPath, "pca-test.csv"),
                testTransformed.Select((x, i) => String.Format("{0},{1}", String.Join(",", x), testset["label"].GetAt(i)))
                );

            Console.WriteLine("\n\n\n\n\nDONE!!!");
            Console.ReadKey();
        }
예제 #3
0
        static void Main(string[] args)
        {
            Console.SetWindowSize(100, 60);

            // Read in the Cyber Attack dataset
            // TODO: change the path to point to your data directory
            string dataDirPath = @"\\Mac\Home\Documents\c-sharp-machine-learning\ch.9\input-data";

            // Load the data into a data frame
            string dataPath = Path.Combine(dataDirPath, "data.csv");

            Console.WriteLine("Loading {0}\n\n", dataPath);
            var rawDF = Frame.ReadCsv(
                dataPath,
                hasHeaders: true,
                inferTypes: true
                );

            // Encode Categorical Variables
            string[] categoricalVars =
            {
                "protocol_type", "service", "flag", "land"
            };
            // Encode Target Variables
            IDictionary <string, int> targetVarEncoding = new Dictionary <string, int>
            {
                { "normal", 0 },
                { "dos", 1 },
                { "probe", 2 },
                { "r2l", 3 },
                { "u2r", 4 }
            };

            var featuresDF = Frame.CreateEmpty <int, string>();

            foreach (string col in rawDF.ColumnKeys)
            {
                if (col.Equals("attack_type"))
                {
                    continue;
                }
                else if (col.Equals("attack_category"))
                {
                    featuresDF.AddColumn(
                        col,
                        rawDF.GetColumn <string>(col).Select(x => targetVarEncoding[x.Value])
                        );
                }
                else if (categoricalVars.Contains(col))
                {
                    var categoryDF = EncodeOneHot(rawDF.GetColumn <string>(col), col);

                    foreach (string newCol in categoryDF.ColumnKeys)
                    {
                        featuresDF.AddColumn(newCol, categoryDF.GetColumn <int>(newCol));
                    }
                }
                else
                {
                    featuresDF.AddColumn(
                        col,
                        rawDF[col].Select((x, i) => double.IsNaN(x.Value) ? 0.0 : x.Value)
                        );
                }
            }
            Console.WriteLine("* Shape: {0}, {1}\n\n", featuresDF.RowCount, featuresDF.ColumnCount);
            Console.WriteLine("* Exporting feature set...");
            featuresDF.SaveCsv(Path.Combine(dataDirPath, "features.csv"));

            // Build PCA with only normal data
            var rnd = new Random();

            int[] normalIdx = featuresDF["attack_category"]
                              .Where(x => x.Value == 0)
                              .Keys
                              .OrderBy(x => rnd.Next())
                              .Take(90000).ToArray();
            int[] attackIdx = featuresDF["attack_category"]
                              .Where(x => x.Value > 0)
                              .Keys
                              .OrderBy(x => rnd.Next())
                              .Take(10000).ToArray();
            int[] totalIdx = normalIdx.Concat(attackIdx).ToArray();

            var normalSet = featuresDF.Rows[normalIdx];

            string[] nonZeroValueCols = normalSet.ColumnKeys.Where(
                x => !x.Equals("attack_category") && normalSet[x].Max() != normalSet[x].Min()
                ).ToArray();

            double[][] normalData = BuildJaggedArray(
                normalSet.Columns[nonZeroValueCols].ToArray2D <double>(),
                normalSet.RowCount,
                nonZeroValueCols.Length
                );
            double[][] wholeData = BuildJaggedArray(
                featuresDF.Rows[totalIdx].Columns[nonZeroValueCols].ToArray2D <double>(),
                totalIdx.Length,
                nonZeroValueCols.Length
                );
            int[] labels = featuresDF
                           .Rows[totalIdx]
                           .GetColumn <int>("attack_category")
                           .ValuesAll.ToArray();

            var pca = new PrincipalComponentAnalysis(
                PrincipalComponentMethod.Standardize
                );

            pca.Learn(normalData);

            double[][] transformed      = pca.Transform(wholeData);
            double[][] first2Components = transformed.Select(
                x => x.Where((y, i) => i < 2).ToArray()
                ).ToArray();
            ScatterplotBox.Show("Component #1 vs. Component #2", first2Components, labels);
            double[][] next2Components = transformed.Select(
                x => x.Where((y, i) => i < 3 && i >= 1).ToArray()
                ).ToArray();
            ScatterplotBox.Show("Component #2 vs. Component #3", next2Components, labels);
            next2Components = transformed.Select(
                x => x.Where((y, i) => i < 4 && i >= 2).ToArray()
                ).ToArray();
            ScatterplotBox.Show("Component #3 vs. Component #4", next2Components, labels);
            next2Components = transformed.Select(
                x => x.Where((y, i) => i < 5 && i >= 3).ToArray()
                ).ToArray();
            ScatterplotBox.Show("Component #4 vs. Component #5", next2Components, labels);
            next2Components = transformed.Select(
                x => x.Where((y, i) => i < 6 && i >= 4).ToArray()
                ).ToArray();
            ScatterplotBox.Show("Component #5 vs. Component #6", next2Components, labels);

            double[] explainedVariance = pca.Components
                                         .Select(x => x.CumulativeProportion)
                                         .Where(x => x < 1)
                                         .ToArray();

            DataSeriesBox.Show(
                explainedVariance.Select((x, i) => (double)i),
                explainedVariance
                ).SetTitle("Explained Variance");
            System.IO.File.WriteAllLines(
                Path.Combine(dataDirPath, "explained-variance.csv"),
                explainedVariance.Select((x, i) => String.Format("{0},{1:0.0000}", i, x))
                );

            Console.WriteLine("* Exporting pca-transformed feature set...");
            System.IO.File.WriteAllLines(
                Path.Combine(
                    dataDirPath,
                    "pca-transformed-features.csv"
                    ),
                transformed.Select(x => String.Join(",", x))
                );
            System.IO.File.WriteAllLines(
                Path.Combine(
                    dataDirPath,
                    "pca-transformed-labels.csv"
                    ),
                labels.Select(x => x.ToString())
                );


            Console.WriteLine("\n\n\n\n\nDONE!!!");
            Console.ReadKey();
        }
예제 #4
0
        static void Main(string[] args)
        {
            Console.SetWindowSize(100, 69);

            // Read in the OHLC dataset
            // TODO: change the path to point to your data directory
            string dataDirPath = @"<path-to-your-dir>";

            // Load the OHLC data into a data frame
            string ohlcDataPath = Path.Combine(dataDirPath, "eurusd-daily-ohlc.csv");

            Console.WriteLine("Loading {0}", ohlcDataPath);
            var ohlcDF = Frame.ReadCsv(
                ohlcDataPath,
                hasHeaders: true,
                inferTypes: true
                );

            // 1. Moving Averages
            ohlcDF.AddColumn("10_MA", ohlcDF.Window(10).Select(x => x.Value["Close"].Mean()));
            ohlcDF.AddColumn("20_MA", ohlcDF.Window(20).Select(x => x.Value["Close"].Mean()));
            ohlcDF.AddColumn("50_MA", ohlcDF.Window(50).Select(x => x.Value["Close"].Mean()));
            ohlcDF.AddColumn("200_MA", ohlcDF.Window(200).Select(x => x.Value["Close"].Mean()));

            // Time-series line chart of close prices & moving averages
            var maLineChart = DataSeriesBox.Show(
                ohlcDF.Where(x => x.Key > 4400 && x.Key < 4900).RowKeys.Select(x => (double)x),
                ohlcDF.Where(x => x.Key > 4400 && x.Key < 4900).GetColumn <double>("Close").ValuesAll,
                ohlcDF.Where(x => x.Key > 4400 && x.Key < 4900).GetColumn <double>("10_MA").ValuesAll,
                ohlcDF.Where(x => x.Key > 4400 && x.Key < 4900).GetColumn <double>("20_MA").ValuesAll,
                ohlcDF.Where(x => x.Key > 4400 && x.Key < 4900).GetColumn <double>("50_MA").ValuesAll,
                ohlcDF.Where(x => x.Key > 4400 && x.Key < 4900).GetColumn <double>("200_MA").ValuesAll
                );

            System.Threading.Thread.Sleep(3000);
            maLineChart.Invoke(
                new Action(() =>
            {
                maLineChart.Size = new System.Drawing.Size(900, 700);
            })
                );

            // Distance from moving averages
            ohlcDF.AddColumn("Close_minus_10_MA", ohlcDF["Close"] - ohlcDF["10_MA"]);
            ohlcDF.AddColumn("Close_minus_20_MA", ohlcDF["Close"] - ohlcDF["20_MA"]);
            ohlcDF.AddColumn("Close_minus_50_MA", ohlcDF["Close"] - ohlcDF["50_MA"]);
            ohlcDF.AddColumn("Close_minus_200_MA", ohlcDF["Close"] - ohlcDF["200_MA"]);

            // 2. Bollinger Band
            ohlcDF.AddColumn("20_day_std", ohlcDF.Window(20).Select(x => x.Value["Close"].StdDev()));
            ohlcDF.AddColumn("BollingerUpperBound", ohlcDF["20_MA"] + ohlcDF["20_day_std"] * 2);
            ohlcDF.AddColumn("BollingerLowerBound", ohlcDF["20_MA"] - ohlcDF["20_day_std"] * 2);

            // Time-series line chart of close prices & bollinger bands
            var bbLineChart = DataSeriesBox.Show(
                ohlcDF.Where(x => x.Key > 4400 && x.Key < 4900).RowKeys.Select(x => (double)x),
                ohlcDF.Where(x => x.Key > 4400 && x.Key < 4900).GetColumn <double>("Close").ValuesAll,
                ohlcDF.Where(x => x.Key > 4400 && x.Key < 4900).GetColumn <double>("BollingerUpperBound").ValuesAll,
                ohlcDF.Where(x => x.Key > 4400 && x.Key < 4900).GetColumn <double>("20_MA").ValuesAll,
                ohlcDF.Where(x => x.Key > 4400 && x.Key < 4900).GetColumn <double>("BollingerLowerBound").ValuesAll
                );

            System.Threading.Thread.Sleep(3000);
            bbLineChart.Invoke(
                new Action(() =>
            {
                bbLineChart.Size = new System.Drawing.Size(900, 700);
            })
                );

            // Distance from Bollinger Bands
            ohlcDF.AddColumn("Close_minus_BollingerUpperBound", ohlcDF["Close"] - ohlcDF["BollingerUpperBound"]);
            ohlcDF.AddColumn("Close_minus_BollingerLowerBound", ohlcDF["Close"] - ohlcDF["BollingerLowerBound"]);

            // 3. Lagging Variables
            ohlcDF.AddColumn("DailyReturn_T-1", ohlcDF["DailyReturn"].Shift(1));
            ohlcDF.AddColumn("DailyReturn_T-2", ohlcDF["DailyReturn"].Shift(2));
            ohlcDF.AddColumn("DailyReturn_T-3", ohlcDF["DailyReturn"].Shift(3));
            ohlcDF.AddColumn("DailyReturn_T-4", ohlcDF["DailyReturn"].Shift(4));
            ohlcDF.AddColumn("DailyReturn_T-5", ohlcDF["DailyReturn"].Shift(5));

            ohlcDF.AddColumn("Close_minus_10_MA_T-1", ohlcDF["Close_minus_10_MA"].Shift(1));
            ohlcDF.AddColumn("Close_minus_10_MA_T-2", ohlcDF["Close_minus_10_MA"].Shift(2));
            ohlcDF.AddColumn("Close_minus_10_MA_T-3", ohlcDF["Close_minus_10_MA"].Shift(3));
            ohlcDF.AddColumn("Close_minus_10_MA_T-4", ohlcDF["Close_minus_10_MA"].Shift(4));
            ohlcDF.AddColumn("Close_minus_10_MA_T-5", ohlcDF["Close_minus_10_MA"].Shift(5));

            ohlcDF.AddColumn("Close_minus_20_MA_T-1", ohlcDF["Close_minus_20_MA"].Shift(1));
            ohlcDF.AddColumn("Close_minus_20_MA_T-2", ohlcDF["Close_minus_20_MA"].Shift(2));
            ohlcDF.AddColumn("Close_minus_20_MA_T-3", ohlcDF["Close_minus_20_MA"].Shift(3));
            ohlcDF.AddColumn("Close_minus_20_MA_T-4", ohlcDF["Close_minus_20_MA"].Shift(4));
            ohlcDF.AddColumn("Close_minus_20_MA_T-5", ohlcDF["Close_minus_20_MA"].Shift(5));

            ohlcDF.AddColumn("Close_minus_50_MA_T-1", ohlcDF["Close_minus_50_MA"].Shift(1));
            ohlcDF.AddColumn("Close_minus_50_MA_T-2", ohlcDF["Close_minus_50_MA"].Shift(2));
            ohlcDF.AddColumn("Close_minus_50_MA_T-3", ohlcDF["Close_minus_50_MA"].Shift(3));
            ohlcDF.AddColumn("Close_minus_50_MA_T-4", ohlcDF["Close_minus_50_MA"].Shift(4));
            ohlcDF.AddColumn("Close_minus_50_MA_T-5", ohlcDF["Close_minus_50_MA"].Shift(5));

            ohlcDF.AddColumn("Close_minus_200_MA_T-1", ohlcDF["Close_minus_200_MA"].Shift(1));
            ohlcDF.AddColumn("Close_minus_200_MA_T-2", ohlcDF["Close_minus_200_MA"].Shift(2));
            ohlcDF.AddColumn("Close_minus_200_MA_T-3", ohlcDF["Close_minus_200_MA"].Shift(3));
            ohlcDF.AddColumn("Close_minus_200_MA_T-4", ohlcDF["Close_minus_200_MA"].Shift(4));
            ohlcDF.AddColumn("Close_minus_200_MA_T-5", ohlcDF["Close_minus_200_MA"].Shift(5));

            ohlcDF.AddColumn("Close_minus_BollingerUpperBound_T-1", ohlcDF["Close_minus_BollingerUpperBound"].Shift(1));
            ohlcDF.AddColumn("Close_minus_BollingerUpperBound_T-2", ohlcDF["Close_minus_BollingerUpperBound"].Shift(2));
            ohlcDF.AddColumn("Close_minus_BollingerUpperBound_T-3", ohlcDF["Close_minus_BollingerUpperBound"].Shift(3));
            ohlcDF.AddColumn("Close_minus_BollingerUpperBound_T-4", ohlcDF["Close_minus_BollingerUpperBound"].Shift(4));
            ohlcDF.AddColumn("Close_minus_BollingerUpperBound_T-5", ohlcDF["Close_minus_BollingerUpperBound"].Shift(5));

            Console.WriteLine("Saving features DF into a CSV file...");

            Console.WriteLine("\n\nDF Shape BEFORE Dropping Missing Values: ({0}, {1})", ohlcDF.RowCount, ohlcDF.ColumnCount);
            ohlcDF = ohlcDF.DropSparseRows();
            Console.WriteLine("\nDF Shape AFTER Dropping Missing Values: ({0}, {1})\n\n", ohlcDF.RowCount, ohlcDF.ColumnCount);

            ohlcDF.SaveCsv(Path.Combine(dataDirPath, "eurusd-features.csv"));
            Console.WriteLine("\nDONE!!!");
            Console.ReadKey();
        }
        static void Main(string[] args)
        {
            Console.SetWindowSize(100, 60);

            // Read in the Credit Card Fraud dataset
            // TODO: change the path to point to your data directory
            string dataDirPath = @"\\Mac\Home\Documents\c-sharp-machine-learning\ch.10\input-data";

            // Load the data into a data frame
            string dataPath = Path.Combine(dataDirPath, "creditcard.csv");

            Console.WriteLine("Loading {0}\n\n", dataPath);
            var df = Frame.ReadCsv(
                dataPath,
                hasHeaders: true,
                inferTypes: true
                );

            Console.WriteLine("* Shape: {0}, {1}\n\n", df.RowCount, df.ColumnCount);

            string[] featureCols = df.ColumnKeys.Where(
                x => !x.Equals("Time") && !x.Equals("Class")
                ).ToArray();

            var noFraudData = df.Rows[
                df["Class"].Where(x => x.Value == 0.0).Keys
                              ].Columns[featureCols];

            double[][] data = BuildJaggedArray(
                noFraudData.ToArray2D <double>(), noFraudData.RowCount, featureCols.Length
                );

            double[][] wholeData = BuildJaggedArray(
                df.Columns[featureCols].ToArray2D <double>(), df.RowCount, featureCols.Length
                );
            int[] labels = df.GetColumn <int>("Class").ValuesAll.ToArray();

            var pca = new PrincipalComponentAnalysis(
                PrincipalComponentMethod.Standardize
                );

            pca.Learn(data);

            double[][] transformed      = pca.Transform(wholeData);
            double[][] first2Components = transformed.Select(x => x.Where((y, i) => i < 2).ToArray()).ToArray();
            ScatterplotBox.Show("Component #1 vs. Component #2", first2Components, labels);
            double[][] next2Components = transformed.Select(
                x => x.Where((y, i) => i >= 1 && i <= 2).ToArray()
                ).ToArray();
            ScatterplotBox.Show("Component #2 vs. Component #3", next2Components, labels);
            next2Components = transformed.Select(
                x => x.Where((y, i) => i >= 2 && i <= 3).ToArray()
                ).ToArray();
            ScatterplotBox.Show("Component #3 vs. Component #4", next2Components, labels);
            next2Components = transformed.Select(
                x => x.Where((y, i) => i >= 3 && i <= 4).ToArray()
                ).ToArray();
            ScatterplotBox.Show("Component #4 vs. Component #5", next2Components, labels);

            DataSeriesBox.Show(
                pca.Components.Select((x, i) => (double)i),
                pca.Components.Select(x => x.CumulativeProportion)
                ).SetTitle("Explained Variance");
            System.IO.File.WriteAllLines(
                Path.Combine(dataDirPath, "explained-variance.csv"),
                pca.Components.Select((x, i) => String.Format("{0},{1:0.0000}", i + 1, x.CumulativeProportion))
                );

            Console.WriteLine("exporting train set...");

            System.IO.File.WriteAllLines(
                Path.Combine(dataDirPath, "pca-features.csv"),
                transformed.Select((x, i) => String.Format("{0},{1}", String.Join(",", x), labels[i]))
                );


            Console.WriteLine("\n\n\n\n\nDONE!!!");
            Console.ReadKey();
        }