Пример #1
0
        public void TestValidity()
        {
            PrimitiveDataFrameColumn <int> dataFrameColumn1 = new PrimitiveDataFrameColumn <int>("Int1", Enumerable.Range(0, 10).Select(x => x));

            dataFrameColumn1.Append(null);
            Assert.False(dataFrameColumn1.IsValid(10));
            for (long i = 0; i < dataFrameColumn1.Length - 1; i++)
            {
                Assert.True(dataFrameColumn1.IsValid(i));
            }
        }
        public DataFrame CreatePriceTable()
        {
            PrimitiveDataFrameColumn <DateTime> dates  = new PrimitiveDataFrameColumn <DateTime>("date");
            PrimitiveDataFrameColumn <decimal>  prices = new PrimitiveDataFrameColumn <decimal>("price");

            for (int i = 1; i <= 10; i++)
            {
                dates.Append(new DateTime(2020, 5, i));
                prices.Append(i + 10.27m);
            }
            DataFrame priceTable = new DataFrame(dates, prices);

            return(priceTable);
        }
Пример #3
0
        public static PrimitiveDataFrameColumn <TResult> Apply <T, TResult>(this PrimitiveDataFrameColumn <T> column,
                                                                            Func <T, TResult> func)
            where T : unmanaged
            where TResult : unmanaged
        {
            var resultColumn = new PrimitiveDataFrameColumn <TResult>(string.Empty, 0);

            foreach (var row in column)
            {
                resultColumn.Append(func(row.Value));
            }

            return(resultColumn);
        }
Пример #4
0
        private static FxDataFrame CountCharacters(FxDataFrame dataFrame)
        {
            int characterCount = 0;

            var characterCountColumn = new PrimitiveDataFrameColumn <int>("nameCharCount");
            var ageColumn            = new PrimitiveDataFrameColumn <int>("age");
            ArrowStringDataFrameColumn fieldColumn = dataFrame["name"] as ArrowStringDataFrameColumn;

            for (long i = 0; i < dataFrame.Rows.Count; ++i)
            {
                characterCount += fieldColumn[i].Length;
            }

            if (dataFrame.Rows.Count > 0)
            {
                characterCountColumn.Append(characterCount);
                ageColumn.Append((int?)dataFrame["age"][0]);
            }

            return(new FxDataFrame(ageColumn, characterCountColumn));
        }
Пример #5
0
        public void TestNullCounts()
        {
            PrimitiveDataFrameColumn <int> dataFrameColumn1 = new PrimitiveDataFrameColumn <int>("Int1", Enumerable.Range(0, 10).Select(x => x));

            dataFrameColumn1.Append(null);
            Assert.Equal(1, dataFrameColumn1.NullCount);

            PrimitiveDataFrameColumn <int> column2 = new PrimitiveDataFrameColumn <int>("Int2");

            Assert.Equal(0, column2.NullCount);

            PrimitiveDataFrameColumn <int> column3 = new PrimitiveDataFrameColumn <int>("Int3", 10);

            Assert.Equal(10, column3.NullCount);

            // Test null counts with assignments on Primitive Columns
            column2.Append(null);
            column2.Append(1);
            Assert.Equal(1, column2.NullCount);
            column2[1] = 10;
            Assert.Equal(1, column2.NullCount);
            column2[1] = null;
            Assert.Equal(2, column2.NullCount);
            column2[1] = 5;
            Assert.Equal(1, column2.NullCount);
            column2[0] = null;
            Assert.Equal(1, column2.NullCount);

            // Test null counts with assignments on String Columns
            StringDataFrameColumn strCol = new StringDataFrameColumn("String", 0);

            Assert.Equal(0, strCol.NullCount);

            StringDataFrameColumn strCol1 = new StringDataFrameColumn("String1", 5);

            Assert.Equal(0, strCol1.NullCount);

            StringDataFrameColumn strCol2 = new StringDataFrameColumn("String", Enumerable.Range(0, 10).Select(x => x.ToString()));

            Assert.Equal(0, strCol2.NullCount);

            StringDataFrameColumn strCol3 = new StringDataFrameColumn("String", Enumerable.Range(0, 10).Select(x => (string)null));

            Assert.Equal(10, strCol3.NullCount);

            strCol.Append(null);
            Assert.Equal(1, strCol.NullCount);
            strCol.Append("foo");
            Assert.Equal(1, strCol.NullCount);
            strCol[1] = "bar";
            Assert.Equal(1, strCol.NullCount);
            strCol[1] = null;
            Assert.Equal(2, strCol.NullCount);
            strCol[1] = "foo";
            Assert.Equal(1, strCol.NullCount);
            strCol[0] = null;
            Assert.Equal(1, strCol.NullCount);

            PrimitiveDataFrameColumn <int> intColumn = new PrimitiveDataFrameColumn <int>("Int");

            intColumn.Append(0);
            intColumn.Append(1);
            intColumn.Append(null);
            intColumn.Append(2);
            intColumn.Append(null);
            intColumn.Append(3);
            Assert.Equal(0, intColumn[0]);
            Assert.Equal(1, intColumn[1]);
            Assert.Null(intColumn[2]);
            Assert.Equal(2, intColumn[3]);
            Assert.Null(intColumn[4]);
            Assert.Equal(3, intColumn[5]);
        }
        /// <summary>
        /// Predict/Forecast based on time-series
        /// </summary>
        public void Forecast()
        {
            #region Load Data

            var predictedDataFrame = DataFrame.LoadCsv(CONFIRMED_DATASET_FILE);

            #endregion

            #region Display data

            // Top 5 Rows
            var topRows = predictedDataFrame.Head(5);
            Console.WriteLine("------- Head: Top Rows(5) -------");
            topRows.PrettyPrint();

            // Bottom 5 Rows
            var bottomRows = predictedDataFrame.Tail(5);
            Console.WriteLine("------- Tail: Bottom Rows(5) -------");
            bottomRows.PrettyPrint();

            // Description
            var description = predictedDataFrame.Description();
            Console.WriteLine("------- Description -------");
            description.PrettyPrint();

            #endregion

            #region Visualization

            #region Number of Confirmed cases over Time

            // Number of confirmed cases over time
            var totalConfirmedDateColumn = predictedDataFrame.Columns[DATE_COLUMN];
            var totalConfirmedColumn     = predictedDataFrame.Columns[TOTAL_CONFIRMED_COLUMN];

            var dates = new List <DateTime>();
            var totalConfirmedCases = new List <string>();
            for (int index = 0; index < totalConfirmedDateColumn.Length; index++)
            {
                dates.Add(Convert.ToDateTime(totalConfirmedDateColumn[index]));
                totalConfirmedCases.Add(totalConfirmedColumn[index].ToString());
            }

            var title = "Number of Confirmed Cases over Time";
            var confirmedTimeGraph = new Graph.Scattergl()
            {
                x    = dates.ToArray(),
                y    = totalConfirmedCases.ToArray(),
                mode = "lines+markers"
            };

            var chart = Chart.Plot(confirmedTimeGraph);
            chart.WithTitle(title);
            // display(chart);

            #endregion

            #endregion

            #region Prediction

            #region Load Data - ML Context

            var context = new MLContext();
            var data    = context.Data.LoadFromTextFile <ConfirmedData>(CONFIRMED_DATASET_FILE, hasHeader: true, separatorChar: ',');

            #region Split dataset

            var totalRows        = (int)data.GetColumn <float>("TotalConfirmed").ToList().Count;
            int numTrain         = (int)(0.8 * totalRows);
            var confirmedAtSplit = (int)data.GetColumn <float>("TotalConfirmed").ElementAt(numTrain);
            var startingDate     = data.GetColumn <DateTime>("Date").FirstOrDefault();
            var dateAtSplit      = data.GetColumn <DateTime>("Date").ElementAt(numTrain);

            IDataView trainData = context.Data.FilterRowsByColumn(data, "TotalConfirmed", upperBound: confirmedAtSplit);
            IDataView testData  = context.Data.FilterRowsByColumn(data, "TotalConfirmed", lowerBound: confirmedAtSplit);

            var totalRowsTrain = (int)trainData.GetColumn <float>("TotalConfirmed").ToList().Count;
            var totalRowsTest  = (int)testData.GetColumn <float>("TotalConfirmed").ToList().Count;

            Console.WriteLine($"Training dataset range : {startingDate.ToShortDateString()} to {dateAtSplit.ToShortDateString()}");

            #endregion

            #endregion

            #region ML Pipeline

            var pipeline = context.Forecasting.ForecastBySsa(
                nameof(ConfirmedForecast.Forecast),
                nameof(ConfirmedData.TotalConfirmed),
                WINDOW_SIZE,
                SERIES_LENGTH,
                trainSize: numTrain,
                horizon: HORIZON,
                confidenceLevel: CONFIDENCE_LEVEL,
                confidenceLowerBoundColumn: nameof(ConfirmedForecast.LowerBoundConfirmed),
                confidenceUpperBoundColumn: nameof(ConfirmedForecast.UpperBoundConfirmed));

            #endregion

            #region Train Model

            var model = pipeline.Fit(trainData);

            #endregion

            #region Evaluate

            IDataView predictions = model.Transform(testData);

            IEnumerable <float> actual =
                context.Data.CreateEnumerable <ConfirmedData>(testData, true)
                .Select(observed => observed.TotalConfirmed);

            IEnumerable <float> forecast =
                context.Data.CreateEnumerable <ConfirmedForecast>(predictions, true)
                .Select(prediction => prediction.Forecast[0]);

            var metrics = actual.Zip(forecast, (actualValue, forecastValue) => actualValue - forecastValue);

            var MAE  = metrics.Average(error => Math.Abs(error));               // Mean Absolute Error
            var RMSE = Math.Sqrt(metrics.Average(error => Math.Pow(error, 2))); // Root Mean Squared Error

            Console.ForegroundColor = ConsoleColor.Cyan;
            Console.WriteLine("Evaluation Metrics");
            Console.WriteLine("---------------------");
            Console.WriteLine($"Mean Absolute Error: {MAE:F3}");
            Console.WriteLine($"Root Mean Squared Error: {RMSE:F3}\n");

            #endregion

            #region Save Model

            var forecastingEngine = model.CreateTimeSeriesEngine <ConfirmedData, ConfirmedForecast>(context);
            forecastingEngine.CheckPoint(context, MODEL_PATH);

            #endregion

            #region Prediction/Forecasting - 7 days

            var forecasts = forecastingEngine.Predict();

            var forecastOuputs = context.Data.CreateEnumerable <ConfirmedData>(testData, reuseRowObject: false)
                                 .Take(HORIZON)
                                 .Select((ConfirmedData confirmedData, int index) =>
            {
                float lowerEstimate = Math.Max(0, forecasts.LowerBoundConfirmed[index]);
                float estimate      = forecasts.Forecast[index];
                float upperEstimate = forecasts.UpperBoundConfirmed[index];

                return(new ForecastOutput
                {
                    ActualConfirmed = confirmedData.TotalConfirmed,
                    Date = confirmedData.Date,
                    Forecast = estimate,
                    LowerEstimate = lowerEstimate,
                    UpperEstimate = upperEstimate
                });
            });

            PrimitiveDataFrameColumn <DateTime> forecastDates        = new PrimitiveDataFrameColumn <DateTime>("Date");
            PrimitiveDataFrameColumn <float>    actualConfirmedCases = new PrimitiveDataFrameColumn <float>("ActualConfirmed");
            PrimitiveDataFrameColumn <float>    forecastCases        = new PrimitiveDataFrameColumn <float>("Forecast");
            PrimitiveDataFrameColumn <float>    lowerEstimates       = new PrimitiveDataFrameColumn <float>("LowerEstimate");
            PrimitiveDataFrameColumn <float>    upperEstimates       = new PrimitiveDataFrameColumn <float>("UpperEstimate");

            foreach (var output in forecastOuputs)
            {
                forecastDates.Append(output.Date);
                actualConfirmedCases.Append(output.ActualConfirmed);
                forecastCases.Append(output.Forecast);
                lowerEstimates.Append(output.LowerEstimate);
                upperEstimates.Append(output.UpperEstimate);
            }

            Console.WriteLine("Total Confirmed Cases Forecast");
            Console.WriteLine("---------------------");
            var forecastDataFrame = new DataFrame(forecastDates, actualConfirmedCases, lowerEstimates, forecastCases, upperEstimates);
            forecastDataFrame.PrettyPrint();

            Console.WriteLine(Environment.NewLine);
            Console.ForegroundColor = ConsoleColor.White;

            #endregion

            #region Prediction Visualization

            // var lastDate =  // DateTime.Parse(dates.LastOrDefault());
            var predictionStartDate = dateAtSplit.AddDays(-1); // lastDate.AddDays(1);

            var newDates  = new List <DateTime>();
            var fullDates = new List <DateTime>();
            fullDates.AddRange(dates.Take(numTrain));

            var fullTotalConfirmedCases = new List <string>();
            fullTotalConfirmedCases.AddRange(totalConfirmedCases.Take(numTrain));

            for (int index = 0; index < HORIZON; index++)
            {
                var nextDate = predictionStartDate.AddDays(index + 1);
                newDates.Add(nextDate);
                fullTotalConfirmedCases.Add(forecasts.Forecast[index].ToString());
            }

            fullDates.AddRange(newDates);

            var layout = new Layout.Layout();
            layout.shapes = new List <Graph.Shape>
            {
                new Graph.Shape
                {
                    x0   = predictionStartDate.ToShortDateString(),
                    x1   = predictionStartDate.ToShortDateString(),
                    y0   = "0",
                    y1   = "1",
                    xref = 'x',
                    yref = "paper",
                    line = new Graph.Line()
                    {
                        color = "red", width = 2
                    }
                }
            };

            var predictionChart = Chart.Plot(
                new[]
            {
                new Graph.Scattergl()
                {
                    x    = fullDates.ToArray(),
                    y    = fullTotalConfirmedCases.ToArray(),
                    mode = "lines+markers"
                }
            },
                layout
                );

            predictionChart.WithTitle("Number of Confirmed Cases over Time");
            // display(predictionChart);

            Graph.Scattergl[] scatters =
            {
                new Graph.Scattergl()
                {
                    x    = newDates,
                    y    = forecasts.UpperBoundConfirmed,
                    fill = "tonexty",
                    name = "Upper bound"
                },
                new Graph.Scattergl()
                {
                    x    = newDates,
                    y    = forecasts.Forecast,
                    fill = "tonexty",
                    name = "Forecast"
                },
                new Graph.Scattergl()
                {
                    x    = newDates,
                    y    = forecasts.LowerBoundConfirmed,
                    fill = "tonexty",
                    name = "Lower bound"
                }
            };


            var predictionChart2 = Chart.Plot(scatters);
            chart.Width  = 600;
            chart.Height = 600;
            // display(predictionChart2);

            #endregion

            #endregion
        }
Пример #7
0
        static void Main(string[] args)
        {
            Console.WriteLine("Hello World!");
            PrimitiveDataFrameColumn <DateTime> createdDate = new PrimitiveDataFrameColumn <DateTime>("CreatedDate");
            PrimitiveDataFrameColumn <float>    temp        = new PrimitiveDataFrameColumn <float>("Temp");
            PrimitiveDataFrameColumn <bool>     status      = new PrimitiveDataFrameColumn <bool>("Status", 10);
            StringDataFrameColumn deviceName = new StringDataFrameColumn("DeviceName", 10);
            StringDataFrameColumn actions    = new StringDataFrameColumn("Actions", 10);
            StringDataFrameColumn factory    = new StringDataFrameColumn("Factory", 10);



            Random rnd = new Random(Environment.TickCount);

            Enumerable.Range(1, 10).ToList().ForEach(x => { createdDate.Append(DateTime.Now.AddDays(x)); temp.Append(rnd.Next(25, 50)); deviceName[x - 1] = $"device-{x}"; factory[x - 1] = $"factory-{rnd.Next(1, 3)}"; });

            var df = new DataFrame(createdDate, deviceName, factory, temp, status, actions);

            df.Info();

            for (int row = 0; row < temp.Length; row++)
            {
                status[row] = temp[row] <= 30;
            }
            for (int row = 0; row < status.Length; row++)
            {
                if (!status[row].Value)
                {
                    df[row, 5] = "device perlu di reset";
                }
            }
            df["Actions"].FillNulls("-", inPlace: true);

            DataTable dt = new DataTable("data sensor");

            foreach (var dc in df.Columns)
            {
                dt.Columns.Add(dc.Name.Replace(" ", "").Trim());
            }
            dt.AcceptChanges();

            for (long i = 0; i < df.Rows.Count; i++)
            {
                DataFrameRow row    = df.Rows[i];
                var          newRow = dt.NewRow();
                var          cols   = 0;
                foreach (var cell in row)
                {
                    newRow[cols] = cell.ToString();
                    cols++;
                }
                dt.Rows.Add(newRow);
            }
            dt.AcceptChanges();

            /*
             * Formatter<DataTable>.Register((df, writer) =>
             * {
             *  var headers = new List<IHtmlContent>();
             *  headers.Add(th(i("index")));
             *  foreach (DataColumn dc in df.Columns)
             *  {
             *      headers.Add((IHtmlContent)th(dc.ColumnName));
             *  }
             *
             *  var rows = new List<List<IHtmlContent>>();
             *  var take = 20;
             *  for (var i = 0; i < Math.Min(take, df.Rows.Count); i++)
             *  {
             *      var cells = new List<IHtmlContent>();
             *      cells.Add(td(i));
             *      DataRow obj = df.Rows[i];
             *
             *      for (int x = 0; x < df.Columns.Count;x++)
             *      {
             *          cells.Add(td(obj[x].ToString()));
             *      }
             *
             *
             *      rows.Add(cells);
             *  }
             *
             *  var t = table(
             *      thead(
             *          headers),
             *      tbody(
             *          rows.Select(
             *              r => tr(r))));
             *
             *  writer.Write(t);
             * }, "text/html");
             */
            PrimitiveDataFrameColumn <bool> boolFilter = df["Actions"].ElementwiseNotEquals("-");
            DataFrame filtered = df.Filter(boolFilter);

            GroupBy groupBy = df.GroupBy("Factory");

            DataFrame groupCounts  = groupBy.Count();
            DataFrame tempGroupAvg = groupBy.Mean("Temp");

            var lineChart = Chart.Line(df.Rows.Select(g => new Tuple <DateTime, float>(Convert.ToDateTime(g[0]), Convert.ToSingle(g[3]))));

            lineChart.WithTitle("Temperature per Date");
            //display(lineChart);
        }