void ChangeFront2ImgProcStep() { btnStartProcssing.IsEnabled = true; Removeevent(CornerCanvs); ClearLRFrame(); while (canvas.Children.Count > 0) { canvas.Children.RemoveAt(canvas.Children.Count - 1); } // delect rect titleRB.Text = "Histogram"; titleLT.Text = "Indexing View"; Core.CreateEstedChipFunc(Core.PData.CornerPos_Img, ckbEst4Pos.IsChecked.Value ? EstChipPosMode.With4Point : EstChipPosMode.With2Point); Core.IndexViewImg = new Image <Bgr, byte>(Core.PData.ChipWNum, Core.PData.ChipHNum); Core.IndexViewImg.Data = MatPattern(Core.PData.ChipHNum, Core.PData.ChipWNum, 3); imgIndex.ImageSource = BitmapSrcConvert.ToBitmapSource(Core.IndexViewImg); imgRB.ImageSource = null; WinHost = CreateWinHost(canvasLT); HistoBox = new HistogramBox(); canvasRB.Children.Clear(); AddHist2Box(HistoBox, ref HistogramList, HistoFromImage(Core.OriginImg, Core.BinSize), ( bool )ckbSetHistRange.IsChecked ? float.Parse(nudHistDW.Text) : 0, ( bool )ckbSetHistRange.IsChecked ? float.Parse(nudHistUP.Text) : 255); HistoBox.Refresh(); WinHost.Child = HistoBox; canvasRB.Children.Add(WinHost); }
public void HistogramBox_ShowTest1() { // Generate some normally distributed samples double[] data = NormalDistribution.Standard.Generate(100); HistogramBox.Show(data).Hold(); }
void ChangeFront2ImgProcStep() { btnStartProcssing.IsEnabled = true; Removeevent(canvasLT, canvasRB); ClearLRFrame(); while (canvas.Children.Count > 0) { canvas.Children.RemoveAt(canvas.Children.Count - 1); } // delect rect titleRB.Text = "Histogram"; titleLT.Text = "Indexing View"; Core.EstedChipPos = ImgPFunc.FnCreateEstedChipPos((Core.PData.EndImgPos[1] - Core.PData.StrImgPos[1]), (Core.PData.EndImgPos[0] - Core.PData.StrImgPos[0]), Core.PData.StrImgPos[1], Core.PData.StrImgPos[0]); Core.IndexViewImg = new Image <Bgr, byte>(Core.PData.ChipWNum, Core.PData.ChipHNum); Core.IndexViewImg.Data = MatPattern(Core.PData.ChipHNum, Core.PData.ChipWNum, 3); imgLT.ImageSource = BitmapSourceConvert.ToBitmapSource(Core.IndexViewImg); imgRB.ImageSource = null; //var passfun = curry(HistoFromImage(Core.BinSize))(Core.OriginImg); // Passing Form WinHost = CreateWinHost(canvasLT); HistoBox = new HistogramBox(); canvasRB.Children.Clear(); WinHost.Child = HistoBox; canvasRB.Children.Add(WinHost); }
private void AddHistogram(Image <Bgr, byte> image, Color color, HistogramBox histogramBox) { using (DenseHistogram histogram = new DenseHistogram(256, new RangeF(0f, 255f))) { Mat mat = new Mat(); if (color == Color.Black) { histogram.Calculate(new Image <Gray, byte>[] { image.Convert <Gray, byte>() }, false, null); } else if (color == Color.Blue) { histogram.Calculate(new Image <Gray, byte>[] { image[0] }, false, null); } else if (color == Color.Red) { histogram.Calculate(new Image <Gray, byte>[] { image[1] }, false, null); } else if (color == Color.Green) { histogram.Calculate(new Image <Gray, byte>[] { image[2] }, false, null); } else { return; } histogram.CopyTo(mat); histogramBox.ClearHistogram(); histogramBox.AddHistogram(color.ToString(), color, mat, 256, new float[] { 0f, 255f }); histogramBox.Refresh(); } }
void AddHist2Box(HistogramBox box, ref DenseHistogram[] histogramArr, dynamic createhist, float dw, float up) { histogramArr = createhist(dw, up); for (int i = 0; i < histogramArr.GetLength(0); i++) { if (histogramArr[i] != null) { box.AddHistogram(null, System.Drawing.Color.Black, histogramArr[i], Core.BinSize, new float[] { dw, up }); } } }
void AddHist2Box(HistogramBox box, DenseHistogram[] histogramArr, dynamic createhist) { histogramArr = createhist(); float histmax = (float)(Core.PData.ChipHSize * Core.PData.ChipWSize * 255); var temp = HistogramList; for (int i = 0; i < histogramArr.GetLength(0); i++) { if (histogramArr[i] != null) { box.AddHistogram(i == 0?"Intensity":"Size", System.Drawing.Color.Black, histogramArr[i], Core.BinSize, new float[] { 0, histmax }); } } }
public PictureBoxTestForm() { InitializeComponent(); pi = new Pi2(); Pi2Image img = pi.NewImage(ImageDataType.UInt16); pi.Read(img, "../../testing/uint16.png"); box = new Pi2PictureBox(img, 2, 0, 65535 / 2); box.AutoResize = true; box.Location = new Point(50, 200); Controls.Add(box); hist = new HistogramBox(); hist.Location = new Point(300, 30); hist.PictureBox = box; Controls.Add(hist); box.UpdateImage(); hist.UpdateHistogram(); }
private void BtnFotoSec_Click_1(object sender, EventArgs e) { string FotoGetir = DosyaAdiGetir(); Image <Bgr, byte> renkliFoto = new Image <Bgr, byte>(FotoGetir); FotoImageBox.Image = renkliFoto; //gri image Image <Gray, byte> griFoto = GriGetir(renkliFoto); GriImageBox.Image = griFoto; //binary image Image <Gray, byte> binaryFoto = BinaryGetir(griFoto); BinaryImageBox.Image = binaryFoto; //histogram Mat m = HistogramGetir(griFoto); HistogramBox.ClearHistogram(); HistogramBox.AddHistogram("Gri Histogram", Color.Gray, m, 256, new float[] { 0.0f, 256.0f }); HistogramBox.Refresh(); }
static void Main(string[] args) { Console.SetWindowSize(100, 50); // Read in the OHLC dataset // TODO: change the path to point to your data directory string dataDirPath = @"<path-to-your-data-dir>"; // Load the OHLC data into a data frame string ohlcDataPath = Path.Combine(dataDirPath, "eurusd-daily-ohlc.csv"); Console.WriteLine("Loading {0}\n", ohlcDataPath); var ohlcDF = Frame.ReadCsv( ohlcDataPath, hasHeaders: true, inferTypes: true ); // Time-series line chart of close prices var closePriceLineChart = DataSeriesBox.Show( ohlcDF.RowKeys.Select(x => (double)x), ohlcDF.GetColumn <double>("Close").ValuesAll ); System.Threading.Thread.Sleep(3000); closePriceLineChart.Invoke( new Action(() => { closePriceLineChart.Size = new System.Drawing.Size(700, 500); }) ); // Time-series line chart of daily returns var dailyReturnLineChart = DataSeriesBox.Show( ohlcDF.RowKeys.Select(x => (double)x), ohlcDF.FillMissing(0.0)["DailyReturn"].ValuesAll ); System.Threading.Thread.Sleep(3000); dailyReturnLineChart.Invoke( new Action(() => { dailyReturnLineChart.Size = new System.Drawing.Size(700, 500); }) ); var dailyReturnHistogram = HistogramBox .Show( ohlcDF.FillMissing(0.0)["DailyReturn"].ValuesAll.ToArray() ) .SetNumberOfBins(20); System.Threading.Thread.Sleep(3000); dailyReturnHistogram.Invoke( new Action(() => { dailyReturnHistogram.Size = new System.Drawing.Size(700, 500); }) ); // Check the distribution of daily returns double returnMax = ohlcDF["DailyReturn"].Max(); double returnMean = ohlcDF["DailyReturn"].Mean(); double returnMedian = ohlcDF["DailyReturn"].Median(); double returnMin = ohlcDF["DailyReturn"].Min(); double returnStdDev = ohlcDF["DailyReturn"].StdDev(); double[] quantiles = Accord.Statistics.Measures.Quantiles( ohlcDF.FillMissing(0.0)["DailyReturn"].ValuesAll.ToArray(), new double[] { 0.25, 0.5, 0.75 } ); Console.WriteLine("-- DailyReturn Distribution-- "); Console.WriteLine("Mean: \t\t\t{0:0.00}\nStdDev: \t\t{1:0.00}\n", returnMean, returnStdDev); Console.WriteLine( "Min: \t\t\t{0:0.00}\nQ1 (25% Percentile): \t{1:0.00}\nQ2 (Median): \t\t{2:0.00}\nQ3 (75% Percentile): \t{3:0.00}\nMax: \t\t\t{4:0.00}", returnMin, quantiles[0], quantiles[1], quantiles[2], returnMax ); Console.WriteLine("\nDONE!!!"); Console.ReadKey(); }
static void Main(string[] args) { Console.SetWindowSize(100, 55); // Read in the Credit Card Fraud dataset // TODO: change the path to point to your data directory string dataDirPath = @"\\Mac\Home\Documents\c-sharp-machine-learning\ch.10\input-data"; // Load the data into a data frame string dataPath = Path.Combine(dataDirPath, "creditcard.csv"); Console.WriteLine("Loading {0}\n\n", dataPath); var df = Frame.ReadCsv( dataPath, hasHeaders: true, inferTypes: true ); Console.WriteLine("* Shape: {0}, {1}\n\n", df.RowCount, df.ColumnCount); // Target variable distribution var targetVarCount = df.AggregateRowsBy <string, int>( new string[] { "Class" }, new string[] { "V1" }, x => x.ValueCount ).SortRows("V1"); targetVarCount.RenameColumns(new string[] { "is_fraud", "count" }); targetVarCount.Print(); DataBarBox.Show( targetVarCount.GetColumn <string>("is_fraud").Values.ToArray(), targetVarCount["count"].Values.ToArray() ).SetTitle( "Counts by Target Class" ); // Feature distributions HistogramBox.CheckForIllegalCrossThreadCalls = false; foreach (string col in df.ColumnKeys) { if (col.Equals("Class") || col.Equals("Time")) { continue; } double[] values = df[col].DropMissing().ValuesAll.ToArray(); // Compute Quartiles Console.WriteLine(String.Format("\n\n-- {0} Distribution -- ", col)); double[] quartiles = Accord.Statistics.Measures.Quantiles( values, new double[] { 0, 0.25, 0.5, 0.75, 1.0 } ); Console.WriteLine( "Min: \t\t\t{0:0.00}\nQ1 (25% Percentile): \t{1:0.00}\nQ2 (Median): \t\t{2:0.00}\nQ3 (75% Percentile): \t{3:0.00}\nMax: \t\t\t{4:0.00}", quartiles[0], quartiles[1], quartiles[2], quartiles[3], quartiles[4] ); // Visualize Distributions HistogramBox.Show( values, title: col ) .SetNumberOfBins(50); } // Target Var Distributions on 2-dimensional feature space double[][] data = BuildJaggedArray( df.ToArray2D <double>(), df.RowCount, df.ColumnCount ); int[] labels = df.GetColumn <int>("Class").ValuesAll.ToArray(); double[][] first2Components = data.Select( x => x.Where((y, i) => i < 2 ).ToArray()).ToArray(); ScatterplotBox.Show("Feature #1 vs. Feature #2", first2Components, labels); double[][] next2Components = data.Select( x => x.Where((y, i) => i >= 1 && i <= 2).ToArray() ).ToArray(); ScatterplotBox.Show("Feature #2 vs. Feature #3", next2Components, labels); next2Components = data.Select( x => x.Where((y, i) => i >= 2 && i <= 3).ToArray() ).ToArray(); ScatterplotBox.Show("Feature #3 vs. Feature #4", next2Components, labels); Console.WriteLine("\n\n\n\n\nDONE!!!"); Console.ReadKey(); }
static void Main(string[] args) { Console.SetWindowSize(100, 50); // Read in the House Price dataset // TODO: change the path to point to your data directory string dataDirPath = @"\\Mac\Home\Documents\c-sharp-machine-learning\ch.5\input-data"; // Load the data into a data frame string dataPath = Path.Combine(dataDirPath, "train.csv"); Console.WriteLine("Loading {0}\n", dataPath); var houseDF = Frame.ReadCsv( dataPath, hasHeaders: true, inferTypes: true ); // Categorical Variable #1: Building Type Console.WriteLine("\nCategorical Variable #1: Building Type"); var buildingTypeDistribution = houseDF.GetColumn <string>( "BldgType" ).GroupBy <string>(x => x.Value).Select(x => (double)x.Value.KeyCount); buildingTypeDistribution.Print(); var buildingTypeBarChart = DataBarBox.Show( buildingTypeDistribution.Keys.ToArray(), buildingTypeDistribution.Values.ToArray() ); buildingTypeBarChart.SetTitle("Building Type Distribution (Categorical)"); System.Threading.Thread.Sleep(3000); buildingTypeBarChart.Invoke( new Action(() => { buildingTypeBarChart.Size = new System.Drawing.Size(1000, 700); }) ); // Categorical Variable #2: Lot Configuration Console.WriteLine("\nCategorical Variable #1: Building Type"); var lotConfigDistribution = houseDF.GetColumn <string>( "LotConfig" ).GroupBy <string>(x => x.Value).Select(x => (double)x.Value.KeyCount); lotConfigDistribution.Print(); var lotConfigBarChart = DataBarBox.Show( lotConfigDistribution.Keys.ToArray(), lotConfigDistribution.Values.ToArray() ); lotConfigBarChart.SetTitle("Lot Configuration Distribution (Categorical)"); System.Threading.Thread.Sleep(3000); lotConfigBarChart.Invoke( new Action(() => { lotConfigBarChart.Size = new System.Drawing.Size(1000, 700); }) ); // Ordinal Categorical Variable #1: Overall material and finish of the house Console.WriteLine("\nOrdinal Categorical #1: Overall material and finish of the house"); var overallQualDistribution = houseDF.GetColumn <string>( "OverallQual" ).GroupBy <int>( x => Convert.ToInt32(x.Value) ).Select( x => (double)x.Value.KeyCount ).SortByKey().Reversed; overallQualDistribution.Print(); var overallQualBarChart = DataBarBox.Show( overallQualDistribution.Keys.Select(x => x.ToString()), overallQualDistribution.Values.ToArray() ); overallQualBarChart.SetTitle("Overall House Quality Distribution (Ordinal)"); System.Threading.Thread.Sleep(3000); overallQualBarChart.Invoke( new Action(() => { overallQualBarChart.Size = new System.Drawing.Size(1000, 700); }) ); // Ordinal Categorical Variable #2: Exterior Quality Console.WriteLine("\nOrdinal Categorical #2: Exterior Quality"); var exteriorQualDistribution = houseDF.GetColumn <string>( "ExterQual" ).GroupBy <string>(x => x.Value).Select( x => (double)x.Value.KeyCount )[new string[] { "Ex", "Gd", "TA", "Fa" }]; exteriorQualDistribution.Print(); var exteriorQualBarChart = DataBarBox.Show( exteriorQualDistribution.Keys.Select(x => x.ToString()), exteriorQualDistribution.Values.ToArray() ); exteriorQualBarChart.SetTitle("Exterior Quality Distribution (Ordinal)"); System.Threading.Thread.Sleep(3000); exteriorQualBarChart.Invoke( new Action(() => { exteriorQualBarChart.Size = new System.Drawing.Size(1000, 700); }) ); HistogramBox.CheckForIllegalCrossThreadCalls = false; // Continuous Variable #1-1: First Floor Square Feet var firstFloorHistogram = HistogramBox .Show( houseDF.DropSparseRows()["1stFlrSF"].ValuesAll.ToArray(), title: "First Floor Square Feet (Continuous)" ) .SetNumberOfBins(20); System.Threading.Thread.Sleep(3000); firstFloorHistogram.Invoke( new Action(() => { firstFloorHistogram.Size = new System.Drawing.Size(1000, 700); }) ); // Continuous Variable #1-2: Log of First Floor Square Feet var logFirstFloorHistogram = HistogramBox .Show( houseDF.DropSparseRows()["1stFlrSF"].Log().ValuesAll.ToArray(), title: "First Floor Square Feet - Log Transformed (Continuous)" ) .SetNumberOfBins(20); System.Threading.Thread.Sleep(3000); logFirstFloorHistogram.Invoke( new Action(() => { logFirstFloorHistogram.Size = new System.Drawing.Size(1000, 700); }) ); // Continuous Variable #2-1: Size of garage in square feet var garageHistogram = HistogramBox .Show( houseDF.DropSparseRows()["GarageArea"].ValuesAll.ToArray(), title: "Size of garage in square feet (Continuous)" ) .SetNumberOfBins(20); System.Threading.Thread.Sleep(3000); garageHistogram.Invoke( new Action(() => { garageHistogram.Size = new System.Drawing.Size(1000, 700); }) ); // Continuous Variable #2-2: Log of Value of miscellaneous feature var logGarageHistogram = HistogramBox .Show( houseDF.DropSparseRows()["GarageArea"].Log().ValuesAll.ToArray(), title: "Size of garage in square feet - Log Transformed (Continuous)" ) .SetNumberOfBins(20); System.Threading.Thread.Sleep(3000); logGarageHistogram.Invoke( new Action(() => { logGarageHistogram.Size = new System.Drawing.Size(1000, 700); }) ); // Target Variable: Sale Price var salePriceHistogram = HistogramBox .Show( houseDF.DropSparseRows()["SalePrice"].ValuesAll.ToArray(), title: "Sale Price (Continuous)" ) .SetNumberOfBins(20); System.Threading.Thread.Sleep(3000); salePriceHistogram.Invoke( new Action(() => { salePriceHistogram.Size = new System.Drawing.Size(1000, 700); }) ); // Target Variable: Sale Price - Log Transformed var logSalePriceHistogram = HistogramBox .Show( houseDF.DropSparseRows()["SalePrice"].Log().ValuesAll.ToArray(), title: "Sale Price - Log Transformed (Continuous)" ) .SetNumberOfBins(20); System.Threading.Thread.Sleep(3000); logSalePriceHistogram.Invoke( new Action(() => { logSalePriceHistogram.Size = new System.Drawing.Size(1000, 700); }) ); Console.WriteLine("\nDONE!!!"); Console.ReadKey(); }
static void Main(string[] args) { Console.SetWindowSize(100, 50); // Read in the Online Retail dataset // TODO: change the path to point to your data directory string dataDirPath = @"\\Mac\Home\Documents\c-sharp-machine-learning\ch.6\input-data"; // Load the data into a data frame string dataPath = Path.Combine(dataDirPath, "data-clean.csv"); Console.WriteLine("Loading {0}\n\n", dataPath); var ecommerceDF = Frame.ReadCsv( dataPath, hasHeaders: true, inferTypes: true ); Console.WriteLine("* Shape: {0}, {1}\n\n", ecommerceDF.RowCount, ecommerceDF.ColumnCount); // 1. Net Revenue per Customer var revPerCustomerDF = ecommerceDF.AggregateRowsBy <double, double>( new string[] { "CustomerID" }, new string[] { "Amount" }, x => x.Sum() ); // 2. # of Total Transactions per Customer var numTransactionsPerCustomerDF = ecommerceDF.AggregateRowsBy <double, double>( new string[] { "CustomerID" }, new string[] { "Quantity" }, x => x.ValueCount ); // 3. # of Cancelled Transactions per Customer var numCancelledPerCustomerDF = ecommerceDF.AggregateRowsBy <double, double>( new string[] { "CustomerID" }, new string[] { "Quantity" }, x => x.Select(y => y.Value >= 0 ? 0.0 : 1.0).Sum() ); // 4. Average UnitPrice per Customer var avgUnitPricePerCustomerDF = ecommerceDF.AggregateRowsBy <double, double>( new string[] { "CustomerID" }, new string[] { "UnitPrice" }, x => x.Sum() / x.ValueCount ); // 5. Average Quantity per Customer var avgQuantityPerCustomerDF = ecommerceDF.AggregateRowsBy <double, double>( new string[] { "CustomerID" }, new string[] { "Quantity" }, x => x.Sum() / x.ValueCount ); // Aggregate all results var featuresDF = Frame.CreateEmpty <int, string>(); featuresDF.AddColumn("CustomerID", revPerCustomerDF.GetColumn <double>("CustomerID")); featuresDF.AddColumn("Description", ecommerceDF.GetColumn <string>("Description")); featuresDF.AddColumn("NetRevenue", revPerCustomerDF.GetColumn <double>("Amount")); featuresDF.AddColumn("NumTransactions", numTransactionsPerCustomerDF.GetColumn <double>("Quantity")); featuresDF.AddColumn("NumCancelled", numCancelledPerCustomerDF.GetColumn <double>("Quantity")); featuresDF.AddColumn("AvgUnitPrice", avgUnitPricePerCustomerDF.GetColumn <double>("UnitPrice")); featuresDF.AddColumn("AvgQuantity", avgQuantityPerCustomerDF.GetColumn <double>("Quantity")); featuresDF.AddColumn("PercentageCancelled", featuresDF["NumCancelled"] / featuresDF["NumTransactions"]); Console.WriteLine("\n\n* Feature Set:"); featuresDF.Print(); // NetRevenue feature distribution PrintQuartiles(featuresDF, "NetRevenue"); // NumTransactions feature distribution PrintQuartiles(featuresDF, "NumTransactions"); // AvgUnitPrice feature distribution PrintQuartiles(featuresDF, "AvgUnitPrice"); // AvgQuantity feature distribution PrintQuartiles(featuresDF, "AvgQuantity"); // PercentageCancelled feature distribution PrintQuartiles(featuresDF, "PercentageCancelled"); Console.WriteLine("\n\n* Feature DF Shape: ({0}, {1})", featuresDF.RowCount, featuresDF.ColumnCount); // 1. Drop Customers with Negative NetRevenue featuresDF = featuresDF.Rows[ featuresDF["NetRevenue"].Where(x => x.Value >= 0.0).Keys ]; // 2. Drop Customers with Negative AvgQuantity featuresDF = featuresDF.Rows[ featuresDF["AvgQuantity"].Where(x => x.Value >= 0.0).Keys ]; // 3. Drop Customers who have more cancel orders than purchase orders featuresDF = featuresDF.Rows[ featuresDF["PercentageCancelled"].Where(x => x.Value < 0.5).Keys ]; Console.WriteLine("\n\n\n\n* After dropping customers with potential orphan cancel orders:"); // NetRevenue feature distribution PrintQuartiles(featuresDF, "NetRevenue"); // NumTransactions feature distribution PrintQuartiles(featuresDF, "NumTransactions"); // AvgUnitPrice feature distribution PrintQuartiles(featuresDF, "AvgUnitPrice"); // AvgQuantity feature distribution PrintQuartiles(featuresDF, "AvgQuantity"); // PercentageCancelled feature distribution PrintQuartiles(featuresDF, "PercentageCancelled"); Console.WriteLine("\n\n* Feature DF Shape: ({0}, {1})", featuresDF.RowCount, featuresDF.ColumnCount); HistogramBox.CheckForIllegalCrossThreadCalls = false; HistogramBox .Show( featuresDF.DropSparseRows()["NetRevenue"].ValuesAll.ToArray(), title: "NetRevenue Distribution" ) .SetNumberOfBins(50); HistogramBox .Show( featuresDF.DropSparseRows()["NumTransactions"].ValuesAll.ToArray(), title: "NumTransactions Distribution" ) .SetNumberOfBins(50); HistogramBox .Show( featuresDF.DropSparseRows()["AvgUnitPrice"].ValuesAll.ToArray(), title: "AvgUnitPrice Distribution" ) .SetNumberOfBins(50); HistogramBox .Show( featuresDF.DropSparseRows()["AvgQuantity"].ValuesAll.ToArray(), title: "AvgQuantity Distribution" ) .SetNumberOfBins(50); HistogramBox .Show( featuresDF.DropSparseRows()["PercentageCancelled"].ValuesAll.ToArray(), title: "PercentageCancelled Distribution" ) .SetNumberOfBins(50); // Create Percentile Features featuresDF.AddColumn( "NetRevenuePercentile", featuresDF["NetRevenue"].Select( x => StatsFunctions.PercentileRank(featuresDF["NetRevenue"].Values.ToArray(), x.Value) ) ); featuresDF.AddColumn( "NumTransactionsPercentile", featuresDF["NumTransactions"].Select( x => StatsFunctions.PercentileRank(featuresDF["NumTransactions"].Values.ToArray(), x.Value) ) ); featuresDF.AddColumn( "AvgUnitPricePercentile", featuresDF["AvgUnitPrice"].Select( x => StatsFunctions.PercentileRank(featuresDF["AvgUnitPrice"].Values.ToArray(), x.Value) ) ); featuresDF.AddColumn( "AvgQuantityPercentile", featuresDF["AvgQuantity"].Select( x => StatsFunctions.PercentileRank(featuresDF["AvgQuantity"].Values.ToArray(), x.Value) ) ); featuresDF.AddColumn( "PercentageCancelledPercentile", featuresDF["PercentageCancelled"].Select( x => StatsFunctions.PercentileRank(featuresDF["PercentageCancelled"].Values.ToArray(), x.Value) ) ); Console.WriteLine("\n\n\n* Percentile Features:"); featuresDF.Columns[ new string[] { "NetRevenue", "NetRevenuePercentile", "NumTransactions", "NumTransactionsPercentile" } ].Print(); HistogramBox .Show( featuresDF.DropSparseRows()["NetRevenuePercentile"].ValuesAll.ToArray(), title: "NetRevenuePercentile Distribution" ) .SetNumberOfBins(50); HistogramBox .Show( featuresDF.DropSparseRows()["NumTransactionsPercentile"].ValuesAll.ToArray(), title: "NumTransactionsPercentile Distribution" ) .SetNumberOfBins(50); HistogramBox .Show( featuresDF.DropSparseRows()["AvgUnitPricePercentile"].ValuesAll.ToArray(), title: "AvgUnitPricePercentile Distribution" ) .SetNumberOfBins(50); HistogramBox .Show( featuresDF.DropSparseRows()["AvgQuantityPercentile"].ValuesAll.ToArray(), title: "AvgQuantityPercentile Distribution" ) .SetNumberOfBins(50); HistogramBox .Show( featuresDF.DropSparseRows()["PercentageCancelledPercentile"].ValuesAll.ToArray(), title: "PercentageCancelledPercentile Distribution" ) .SetNumberOfBins(50); string outputPath = Path.Combine(dataDirPath, "features.csv"); Console.WriteLine("* Exporting features data: {0}", outputPath); featuresDF.SaveCsv(outputPath); Console.WriteLine("\n\n\n\nDONE!!"); Console.ReadKey(); }
static void Main(string[] args) { Console.SetWindowSize(100, 60); // Read in the Cyber Attack dataset // TODO: change the path to point to your data directory string dataDirPath = @"\\Mac\Home\Documents\c-sharp-machine-learning\ch.9\input-data"; // Load the data into a data frame string dataPath = Path.Combine(dataDirPath, "pca-transformed-features.csv"); Console.WriteLine("Loading {0}\n\n", dataPath); var featuresDF = Frame.ReadCsv( dataPath, hasHeaders: false, inferTypes: true ); featuresDF.RenameColumns( featuresDF.ColumnKeys.Select((x, i) => String.Format("component-{0}", i + 1)) ); int[] labels = File.ReadLines( Path.Combine(dataDirPath, "pca-transformed-labels.csv") ).Select(x => int.Parse(x)).ToArray(); featuresDF.AddColumn("attack_category", labels); Console.WriteLine("* Shape: ({0}, {1})\n\n", featuresDF.RowCount, featuresDF.ColumnCount); var count = featuresDF.AggregateRowsBy<string, int>( new string[] { "attack_category" }, new string[] { "component-1" }, x => x.ValueCount ).SortRows("component-1"); count.RenameColumns(new string[] { "attack_category", "count" }); count.Print(); // First 13 components explain about 50% of the variance // First 19 components explain about 60% of the variance // First 27 components explain about 70% of the variance // First 34 components explain about 80% of the variance int numComponents = 27; string[] cols = featuresDF.ColumnKeys.Where((x, i) => i < numComponents).ToArray(); // First, compute distances from the center/mean among normal events var normalDF = featuresDF.Rows[ featuresDF["attack_category"].Where(x => x.Value == 0).Keys ].Columns[cols]; double[][] normalData = BuildJaggedArray( normalDF.ToArray2D<double>(), normalDF.RowCount, cols.Length ); double[] normalVariances = ComputeVariances(normalData); double[] rawDistances = ComputeDistances(normalData, normalVariances); // Filter out extreme values int[] idxFiltered = Matrix.ArgSort(rawDistances) .Where((x, i) => i < rawDistances.Length * 0.99).ToArray(); double[] distances = rawDistances.Where((x, i) => idxFiltered.Contains(i)).ToArray(); double meanDistance = distances.Average(); double stdDistance = Math.Sqrt( distances .Select(x => Math.Pow(x - meanDistance, 2)) .Sum() / distances.Length ); Console.WriteLine( "\n\n* Normal - mean: {0:0.0000}, std: {1:0.0000}", meanDistance, stdDistance ); HistogramBox.CheckForIllegalCrossThreadCalls = false; HistogramBox.Show( distances, title: "Distances" ) .SetNumberOfBins(50); // Detection var attackDF = featuresDF.Rows[ featuresDF["attack_category"].Where(x => x.Value > 0).Keys ].Columns[cols]; double[][] attackData = BuildJaggedArray( attackDF.ToArray2D<double>(), attackDF.RowCount, cols.Length ); double[] attackDistances = ComputeDistances(attackData, normalVariances); int[] attackLabels = featuresDF.Rows[ featuresDF["attack_category"].Where(x => x.Value > 0).Keys ].GetColumn<int>("attack_category").ValuesAll.ToArray(); // 5-10% false alarm rate for (int i = 4; i < 10; i++) { double targetFalseAlarmRate = 0.01 * (i + 1); double threshold = Accord.Statistics.Measures.Quantile( distances, 1 - targetFalseAlarmRate ); Console.WriteLine(threshold); int[] detected = attackDistances.Select(x => x > threshold ? 1 : 0).ToArray(); EvaluateResults(attackLabels, detected, targetFalseAlarmRate); } Console.WriteLine("\n\n\n\n\nDONE!!!"); Console.ReadKey(); }