private static DataFrame CreateDataFrame()
        {
            var names = new StringDataFrameColumn("name", 3);

            names[0] = "apple";
            names[1] = "pineapple";
            names[2] = "durian";

            var prices = new PrimitiveDataFrameColumn <decimal>("price", 3);

            prices[0] = 12.2m;
            prices[1] = 22.12m;
            prices[2] = 3_000_000m;

            var availabilities = new BooleanDataFrameColumn("is_available", 3);

            availabilities[0] = false;
            availabilities[1] = false;
            availabilities[2] = true;

            return(new DataFrame(
                       names,
                       prices,
                       availabilities));
        }
Пример #2
0
        public void TestDataFrameWorkerFunctionForBool()
        {
            var func = new DataFrameWorkerFunction(
                new DataFrameUdfWrapper <ArrowStringDataFrameColumn, PrimitiveDataFrameColumn <bool>, PrimitiveDataFrameColumn <bool> >(
                    (strings, flags) =>
            {
                for (long i = 0; i < strings.Length; ++i)
                {
                    flags[i] = flags[i].Value || strings[i].Contains("true");
                }
                return(flags);
            }).Execute);

            var stringColumn = (StringArray)ToArrowArray(new[] { "arg1_true", "arg1_true", "arg1_false", "arg1_false" });

            ArrowStringDataFrameColumn ArrowStringDataFrameColumn = ToArrowStringDataFrameColumn(stringColumn);
            var boolColumn = new PrimitiveDataFrameColumn <bool>("Bool", Enumerable.Range(0, 4).Select(x => x % 2 == 0));
            var input      = new DataFrameColumn[]
            {
                ArrowStringDataFrameColumn,
                boolColumn
            };
            var results = (PrimitiveDataFrameColumn <bool>)func.Func(input, new[] { 0, 1 });

            Assert.Equal(4, results.Length);
            Assert.True(results[0]);
            Assert.True(results[1]);
            Assert.True(results[2]);
            Assert.False(results[3]);
        }
Пример #3
0
        public void TestAppendMany()
        {
            PrimitiveDataFrameColumn <int> intColumn = new PrimitiveDataFrameColumn <int>("Int1");

            intColumn.AppendMany(null, 5);
            Assert.Equal(5, intColumn.NullCount);
            Assert.Equal(5, intColumn.Length);
            for (int i = 0; i < intColumn.Length; i++)
            {
                Assert.False(intColumn.IsValid(i));
            }

            intColumn.AppendMany(5, 5);
            Assert.Equal(5, intColumn.NullCount);
            Assert.Equal(10, intColumn.Length);
            for (int i = 5; i < intColumn.Length; i++)
            {
                Assert.True(intColumn.IsValid(i));
            }

            intColumn[2] = 10;
            Assert.Equal(4, intColumn.NullCount);
            Assert.True(intColumn.IsValid(2));

            intColumn[7] = null;
            Assert.Equal(5, intColumn.NullCount);
            Assert.False(intColumn.IsValid(7));
        }
        private DataFrame CreateFlowTable(List <Trade> allTrades)
        {
            int otherlength = allTrades.Count;
            PrimitiveDataFrameColumn <DateTime> flowDate  = new PrimitiveDataFrameColumn <DateTime>("date", otherlength);
            PrimitiveDataFrameColumn <decimal>  cashCol   = new PrimitiveDataFrameColumn <decimal>("cash", otherlength);
            PrimitiveDataFrameColumn <decimal>  inflowCol = new PrimitiveDataFrameColumn <decimal>("inflow", otherlength);
            DataFrame flowFrame     = new DataFrame(flowDate, cashCol, inflowCol);
            int       rowRef        = 0;
            decimal   lastcashvalue = Decimal.Zero; // cumsum for cash column

            foreach (Trade trade in allTrades)
            {
                decimal TradeAmount = trade.Quantity * trade.Price;
                flowFrame[rowRef, 0] = trade.TradeDate;
                if (TradeAmount > 0)
                {
                    //trade is a purchase, set inflow
                    flowFrame[rowRef, 1] = lastcashvalue;
                    flowFrame[rowRef, 2] = TradeAmount;
                }
                else
                {
                    //trade is a sell, set cash
                    flowFrame[rowRef, 1] = Math.Abs(TradeAmount) + lastcashvalue;
                    lastcashvalue        = (decimal)flowFrame[rowRef, 1];
                    flowFrame[rowRef, 2] = Decimal.Zero;
                }
                rowRef++;
            }
            GroupBy groupBy = flowFrame.GroupBy("date");

            flowFrame = groupBy.Sum();
            return(flowFrame);
        }
Пример #5
0
        public DataFrame GetValuation()
        {
            if (this.PortfolioTable.Columns.Count < 2)
            {
                //return empty dataframe
                return(this.PortfolioTable);
            }
            DataFrame ResultTable     = this.PortfolioTable.Clone();
            int       lastColumnIndex = ResultTable.Columns.Count - 1;
            int       numberOfRows    = ResultTable.Rows.Count();
            PrimitiveDataFrameColumn <decimal> totalMarketVal = new PrimitiveDataFrameColumn <decimal>("TotalMarketValue", numberOfRows);

            ResultTable.Columns.Add(totalMarketVal);

            int marketValCol = lastColumnIndex + 1;

            for (int row = 0; row < numberOfRows; row++)
            {
                decimal total = 0;
                //for each column that isnt a date and isnt the total get the sum.
                for (int col = 1; col <= lastColumnIndex; col++)
                {
                    decimal?value = (decimal?)ResultTable[row, col];
                    if (value != null)
                    {
                        total += (decimal)value;
                    }
                }
                ResultTable[row, marketValCol] = Math.Round(total, 2);
            }
            return(ResultTable);
        }
        public void TestIDataViewSchemaInvalidate()
        {
            DataFrame df = MakeDataFrameWithAllMutableColumnTypes(10, withNulls: false);

            IDataView dataView = df;

            DataViewSchema schema = dataView.Schema;

            Assert.Equal(14, schema.Count);

            df.Columns.Remove("Bool");
            schema = dataView.Schema;
            Assert.Equal(13, schema.Count);

            DataFrameColumn boolColumn = new PrimitiveDataFrameColumn <bool>("Bool", Enumerable.Range(0, (int)df.RowCount).Select(x => x % 2 == 1));

            df.Columns.Insert(0, boolColumn);
            schema = dataView.Schema;
            Assert.Equal(14, schema.Count);
            Assert.Equal("Bool", schema[0].Name);

            DataFrameColumn boolClone = boolColumn.Clone();

            boolClone.SetName("BoolClone");
            df.Columns[1] = boolClone;
            schema        = dataView.Schema;
            Assert.Equal("BoolClone", schema[1].Name);
        }
        public void TestEmptyDataFrameRecordBatch()
        {
            PrimitiveDataFrameColumn <int> ageColumn    = new PrimitiveDataFrameColumn <int>("Age");
            PrimitiveDataFrameColumn <int> lengthColumn = new PrimitiveDataFrameColumn <int>("CharCount");
            ArrowStringDataFrameColumn     stringColumn = new ArrowStringDataFrameColumn("Empty");
            DataFrame df = new DataFrame(new List <DataFrameColumn>()
            {
                ageColumn, lengthColumn, stringColumn
            });

            IEnumerable <RecordBatch> recordBatches = df.ToArrowRecordBatches();
            bool foundARecordBatch = false;

            foreach (RecordBatch recordBatch in recordBatches)
            {
                foundARecordBatch = true;
                MemoryStream      stream = new MemoryStream();
                ArrowStreamWriter writer = new ArrowStreamWriter(stream, recordBatch.Schema);
                writer.WriteRecordBatchAsync(recordBatch).GetAwaiter().GetResult();

                stream.Position = 0;
                ArrowStreamReader reader          = new ArrowStreamReader(stream);
                RecordBatch       readRecordBatch = reader.ReadNextRecordBatch();
                while (readRecordBatch != null)
                {
                    RecordBatchComparer.CompareBatches(recordBatch, readRecordBatch);
                    readRecordBatch = reader.ReadNextRecordBatch();
                }
            }
            Assert.True(foundARecordBatch);
        }
Пример #8
0
        internal static PrimitiveDataFrameColumn <double> ComputeDiscountPrice(PrimitiveDataFrameColumn <double> price, PrimitiveDataFrameColumn <double> discount)
        {
            if (price.Length != discount.Length)
            {
                throw new ArgumentException("Arrays need to be the same length");
            }

            return((PrimitiveDataFrameColumn <double>)(price * (1 - discount)));
        }
Пример #9
0
        internal static PrimitiveDataFrameColumn <double> ComputeTotal(PrimitiveDataFrameColumn <double> price, PrimitiveDataFrameColumn <double> discount, PrimitiveDataFrameColumn <double> tax)
        {
            if ((price.Length != discount.Length) || (price.Length != tax.Length))
            {
                throw new ArgumentException("Arrays need to be the same length");
            }

            return((PrimitiveDataFrameColumn <double>)(price * (1 - discount) * (1 + tax)));
        }
Пример #10
0
        public void TestNullCountWithIndexers()
        {
            PrimitiveDataFrameColumn <int> intColumn = new PrimitiveDataFrameColumn <int>("Int", 5);

            Assert.Equal(5, intColumn.NullCount);
            intColumn[2] = null;
            Assert.Equal(5, intColumn.NullCount);
            intColumn[2] = 5;
            Assert.Equal(4, intColumn.NullCount);
        }
Пример #11
0
        static public DataFrame MakeTestTableWithTwoColumns(int length)
        {
            BaseDataFrameColumn dataFrameColumn1 = new PrimitiveDataFrameColumn <int>("Int1", Enumerable.Range(0, length).Select(x => x));
            BaseDataFrameColumn dataFrameColumn2 = new PrimitiveDataFrameColumn <int>("Int2", Enumerable.Range(10, length).Select(x => x));

            Data.DataFrame dataFrame = new Data.DataFrame();
            dataFrame.InsertColumn(0, dataFrameColumn1);
            dataFrame.InsertColumn(1, dataFrameColumn2);
            return(dataFrame);
        }
Пример #12
0
        public void TestValidity()
        {
            PrimitiveDataFrameColumn <int> dataFrameColumn1 = new PrimitiveDataFrameColumn <int>("Int1", Enumerable.Range(0, 10).Select(x => x));

            dataFrameColumn1.Append(null);
            Assert.False(dataFrameColumn1.IsValid(10));
            for (long i = 0; i < dataFrameColumn1.Length - 1; i++)
            {
                Assert.True(dataFrameColumn1.IsValid(i));
            }
        }
        private DataFrame CreatePriceTable(List <SecurityPrices> securityPrices)
        {
            List <DateTime> dates  = securityPrices.Select(sp => sp.Date).ToList();
            List <decimal>  prices = securityPrices.Select(sp => sp.ClosePrice).ToList();

            PrimitiveDataFrameColumn <DateTime> dateCol  = new PrimitiveDataFrameColumn <DateTime>("date", dates);
            PrimitiveDataFrameColumn <decimal>  priceCol = new PrimitiveDataFrameColumn <decimal>("price", prices);
            DataFrame marketPricesFrame = new DataFrame(dateCol, priceCol);

            return(marketPricesFrame);
        }
Пример #14
0
        public void DataFrame_InputDataFrame_ReturnsHtml()
        {
            //Arrange
            PrimitiveDataFrameColumn <int> ints    = new PrimitiveDataFrameColumn <int>("Ints", 3); // Makes a column of length 3. Filled with nulls initially
            StringDataFrameColumn          strings = new StringDataFrameColumn("Strings", 3);       // Makes a column of length 3. Filled with nulls initially
            DataFrame df = new DataFrame(ints, strings);                                            // This will throw if the columns are of different lengths

            //Act
            Formatters.Register <DataFrame>();

            //Assert
            df.ToDisplayString("text/html").Should().Contain("table_");
        }
Пример #15
0
        public TickCache()
        {
            PrimitiveDataFrameColumn <DateTime> dateTimes = new PrimitiveDataFrameColumn <DateTime>("timestamp");
            UInt32DataFrameColumn  instrumentToken        = new UInt32DataFrameColumn("instrumenttoken");
            BooleanDataFrameColumn tradable  = new BooleanDataFrameColumn("tradable");
            DecimalDataFrameColumn open      = new DecimalDataFrameColumn("open");
            DecimalDataFrameColumn high      = new DecimalDataFrameColumn("high");
            DecimalDataFrameColumn low       = new DecimalDataFrameColumn("low");
            DecimalDataFrameColumn close     = new DecimalDataFrameColumn("close");
            DecimalDataFrameColumn lastPrice = new DecimalDataFrameColumn("lastprice");
            UInt64DataFrameColumn  volume    = new UInt64DataFrameColumn("volume");

            _tickFrame = new DataFrame(dateTimes, instrumentToken, tradable, open, high, low, close, lastPrice, volume);
        }
        public DataFrame CreatePriceTable()
        {
            PrimitiveDataFrameColumn <DateTime> dates  = new PrimitiveDataFrameColumn <DateTime>("date");
            PrimitiveDataFrameColumn <decimal>  prices = new PrimitiveDataFrameColumn <decimal>("price");

            for (int i = 1; i <= 10; i++)
            {
                dates.Append(new DateTime(2020, 5, i));
                prices.Append(i + 10.27m);
            }
            DataFrame priceTable = new DataFrame(dates, prices);

            return(priceTable);
        }
Пример #17
0
        public static PrimitiveDataFrameColumn <TResult> Apply <T, TResult>(this PrimitiveDataFrameColumn <T> column,
                                                                            Func <T, TResult> func)
            where T : unmanaged
            where TResult : unmanaged
        {
            var resultColumn = new PrimitiveDataFrameColumn <TResult>(string.Empty, 0);

            foreach (var row in column)
            {
                resultColumn.Append(func(row.Value));
            }

            return(resultColumn);
        }
Пример #18
0
        public void TestPrimitiveColumnGetReadOnlyBuffers()
        {
            RecordBatch recordBatch = new RecordBatch.Builder()
                                      .Append("Column1", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10)))).Build();
            DataFrame df = DataFrame.FromArrowRecordBatch(recordBatch);

            PrimitiveDataFrameColumn <int> column = df["Column1"] as PrimitiveDataFrameColumn <int>;

            IEnumerable <ReadOnlyMemory <int> >  buffers     = column.GetReadOnlyDataBuffers();
            IEnumerable <ReadOnlyMemory <byte> > nullBitMaps = column.GetReadOnlyNullBitMapBuffers();

            long i = 0;
            IEnumerator <ReadOnlyMemory <int> >  bufferEnumerator      = buffers.GetEnumerator();
            IEnumerator <ReadOnlyMemory <byte> > nullBitMapsEnumerator = nullBitMaps.GetEnumerator();

            while (bufferEnumerator.MoveNext() && nullBitMapsEnumerator.MoveNext())
            {
                ReadOnlyMemory <int>  dataBuffer = bufferEnumerator.Current;
                ReadOnlyMemory <byte> nullBitMap = nullBitMapsEnumerator.Current;

                ReadOnlySpan <int> span = dataBuffer.Span;
                for (int j = 0; j < span.Length; j++)
                {
                    // Each buffer has a max length of int.MaxValue
                    Assert.Equal(span[j], column[j + i * int.MaxValue]);
                }

                bool GetBit(byte curBitMap, int index)
                {
                    return(((curBitMap >> (index & 7)) & 1) != 0);
                }

                ReadOnlySpan <byte> bitMapSpan = nullBitMap.Span;
                // No nulls in this column, so each bit must be set
                for (int j = 0; j < bitMapSpan.Length; j++)
                {
                    for (int k = 0; k < 8; k++)
                    {
                        if (j * 8 + k == column.Length)
                        {
                            break;
                        }
                        Assert.True(GetBit(bitMapSpan[j], k));
                    }
                }
                i++;
            }
        }
Пример #19
0
        public void AddPositon(PositionFormulas position)
        {
            // adds the position to the position list.
            // adds the positon to the list of positions.
            this.positions.Add(position);

            // append the position.GetDailyValuation to the PortfolioTable initially.
            // then every table appended after that is added on.
            DataFrame positionValuation = position.GetDailyValuation();

            if (this.PortfolioTable.Columns.Count == 0)
            {
                this.PortfolioTable = positionValuation.Clone();
            }
            else
            {
                // If the portfolio already contains securities then add a new column
                int    numberOfRows = this.PortfolioTable.Rows.Count();
                string NewColName   = $"{position.symbol}_MarketValue";
                PrimitiveDataFrameColumn <decimal> newCol = new PrimitiveDataFrameColumn <decimal>(NewColName, numberOfRows);
                this.PortfolioTable.Columns.Add(newCol);

                int dateCol      = 0;
                int secondaryRow = 0;

                int newColIndex = PortfolioTable.Columns.Count - 1;
                for (int row = 0; row < numberOfRows; row++)
                {
                    if (secondaryRow == positionValuation.Rows.Count)
                    {
                        break;
                    }

                    if (this.PortfolioTable[row, dateCol].Equals(positionValuation[secondaryRow, dateCol]))
                    {
                        // if the dates between the tables match then assign the positions valuation at that date to the Portfolios Table
                        this.PortfolioTable[row, newColIndex] = positionValuation[secondaryRow, 1];
                        secondaryRow++;
                    }
                    else
                    {
                        this.PortfolioTable[row, newColIndex] = Decimal.Zero;
                    }
                }
            }
        }
        public void TestDailyShortPositionPerformance()
        {
            PositionFormulas position = new PositionFormulas(testSymbol);
            Trade            tradeA   = CreateTransaction(-500, testSymbol, 12.5m, new DateTime(2020, 5, 4));
            Trade            tradeB   = CreateTransaction(-600, testSymbol, 15m, new DateTime(2020, 5, 4));

            position.AddTransaction(tradeA);
            position.AddTransaction(tradeB);

            DataFrame priceTable = CreatePriceTable();

            position.CalculateDailyPerformance(priceTable);

            decimal[] performances = new decimal[] { -2.931m, -10.145m, -17.358m, -24.571m, -31.784m, -38.997m, -46.210m };
            PrimitiveDataFrameColumn <decimal> performance = new PrimitiveDataFrameColumn <decimal>("pct_change", performances);

            Assert.Equal(performance, position.GetDailyPerformance().Columns["pct_change"]);
        }
        public void TestDailyValuation()
        {
            // Two purchases on different days
            PositionFormulas position = new PositionFormulas(testSymbol);
            Trade            tradeA   = CreateTransaction(500, testSymbol, 12.5m, new DateTime(2020, 5, 4));
            Trade            tradeB   = CreateTransaction(600, testSymbol, 15m, new DateTime(2020, 5, 8));

            position.AddTransaction(tradeA);
            position.AddTransaction(tradeB);

            DataFrame priceTable = CreatePriceTable();

            position.CalculateDailyValuation(priceTable);

            decimal[] dailyValues = new decimal[] { 7135m, 7635m, 8135m, 8635m, 20097m, 21197m, 22297m };
            PrimitiveDataFrameColumn <decimal> dailyVals = new PrimitiveDataFrameColumn <decimal>($"{testSymbol}_MarketValue", dailyValues);

            Assert.Equal(dailyVals, position.GetDailyValuation().Columns[$"{testSymbol}_MarketValue"]);
        }
Пример #22
0
        private static FxDataFrame CountCharacters(FxDataFrame dataFrame)
        {
            int characterCount = 0;

            var characterCountColumn = new PrimitiveDataFrameColumn <int>("nameCharCount");
            var ageColumn            = new PrimitiveDataFrameColumn <int>("age");
            ArrowStringDataFrameColumn fieldColumn = dataFrame["name"] as ArrowStringDataFrameColumn;

            for (long i = 0; i < dataFrame.Rows.Count; ++i)
            {
                characterCount += fieldColumn[i].Length;
            }

            if (dataFrame.Rows.Count > 0)
            {
                characterCountColumn.Append(characterCount);
                ageColumn.Append((int?)dataFrame["age"][0]);
            }

            return(new FxDataFrame(ageColumn, characterCountColumn));
        }
        /// <summary>
        /// This method adds primitive column data to a PrimitiveDataFrameColumn
        /// and adds the PrimitiveDataFrameColumn to the DataFrameColumn array.
        /// </summary>
        private unsafe void AddDataFrameColumn <T>(
            ushort columnNumber,
            ulong rowsNumber,
            void *colData,
            int *colMap
            ) where T : unmanaged
        {
            Span <T>   colSpan  = new Span <T>(colData, (int)rowsNumber);
            Span <int> nullSpan = new Span <int>(colMap, (int)rowsNumber);
            PrimitiveDataFrameColumn <T> colDataFrame = new PrimitiveDataFrameColumn <T>(_columns[columnNumber].Name, (int)rowsNumber);

            for (int i = 0; i < (int)rowsNumber; ++i)
            {
                if (_columns[columnNumber].Nullable == 0 || nullSpan[i] != SQL_NULL_DATA)
                {
                    colDataFrame[i] = colSpan[i];
                }
            }

            CSharpDataFrame.Columns.Add(colDataFrame);
        }
Пример #24
0
        public void ColumnAndTableCreationTest()
        {
            BaseDataFrameColumn intColumn   = new PrimitiveDataFrameColumn <int>("IntColumn", Enumerable.Range(0, 10).Select(x => x));
            BaseDataFrameColumn floatColumn = new PrimitiveDataFrameColumn <float>("FloatColumn", Enumerable.Range(0, 10).Select(x => (float)x));
            DataFrame           dataFrame   = new DataFrame();

            dataFrame.InsertColumn(0, intColumn);
            dataFrame.InsertColumn(1, floatColumn);
            Assert.Equal(10, dataFrame.RowCount);
            Assert.Equal(2, dataFrame.ColumnCount);
            Assert.Equal(10, dataFrame.Column(0).Length);
            Assert.Equal("IntColumn", dataFrame.Column(0).Name);
            Assert.Equal(10, dataFrame.Column(1).Length);
            Assert.Equal("FloatColumn", dataFrame.Column(1).Name);

            BaseDataFrameColumn bigColumn    = new PrimitiveDataFrameColumn <float>("BigColumn", Enumerable.Range(0, 11).Select(x => (float)x));
            BaseDataFrameColumn repeatedName = new PrimitiveDataFrameColumn <float>("FloatColumn", Enumerable.Range(0, 10).Select(x => (float)x));

            Assert.Throws <System.ArgumentException>(() => dataFrame.InsertColumn(2, bigColumn));
            Assert.Throws <System.ArgumentException>(() => dataFrame.InsertColumn(2, repeatedName));
            Assert.Throws <System.ArgumentException>(() => dataFrame.InsertColumn(10, repeatedName));

            Assert.Equal(2, dataFrame.ColumnCount);
            BaseDataFrameColumn intColumnCopy = new PrimitiveDataFrameColumn <int>("IntColumn", Enumerable.Range(0, 10).Select(x => x));

            Assert.Throws <System.ArgumentException>(() => dataFrame.SetColumn(1, intColumnCopy));

            BaseDataFrameColumn differentIntColumn = new PrimitiveDataFrameColumn <int>("IntColumn1", Enumerable.Range(0, 10).Select(x => x));

            dataFrame.SetColumn(1, differentIntColumn);
            Assert.True(differentIntColumn == dataFrame.Column(1));

            dataFrame.RemoveColumn(1);
            Assert.Equal(1, dataFrame.ColumnCount);
            Assert.True(intColumn == dataFrame.Column(0));
        }
        public DataFrame GetPortfolioHPR(int?portfolioId, string userId)
        {
            List <Trade> allTrades = _repo.GetAllUserTrades(portfolioId, userId);

            //Creating flows for each trade at a position level.
            // Flows and Cash
            if (allTrades.Count == 0)
            {
                return(null);
            }
            DataFrame flowFrame = CreateFlowTable(allTrades);


            //distinct tickers;
            string        portfolioName = _repo.GetPortfolioName(portfolioId);
            PortfolioData userPortfolio = GetPortfolioData(portfolioName, allTrades);

            //#######
            DataFrame portfolioValuation = userPortfolio.GetValuation();
            int       pvSize             = portfolioValuation.Rows.Count();

            PrimitiveDataFrameColumn <decimal> cashCol2   = new PrimitiveDataFrameColumn <decimal>("cash", pvSize);
            PrimitiveDataFrameColumn <decimal> inflowCol2 = new PrimitiveDataFrameColumn <decimal>("inflow", pvSize);

            portfolioValuation.Columns.Add(cashCol2);
            portfolioValuation.Columns.Add(inflowCol2);
            //populate with flowFrame data

            int secondaryRow = 0;
            int valueIndex   = portfolioValuation.Columns.Count - 3;
            int cashIndex    = portfolioValuation.Columns.Count - 2;
            int inflowIndex  = portfolioValuation.Columns.Count - 1;

            for (int row = 0; row < pvSize; row++)
            {
                if (secondaryRow == flowFrame.Rows.Count)
                {
                    break;
                }
                if (secondaryRow < flowFrame.Rows.Count)
                {
                    if (portfolioValuation[row, 0].Equals(flowFrame[secondaryRow, 0]))
                    {
                        // if the dates match
                        portfolioValuation[row, cashIndex]   = flowFrame[secondaryRow, 1];
                        portfolioValuation[row, inflowIndex] = flowFrame[secondaryRow, 2];
                        secondaryRow++;
                    }
                }
                else
                {
                    portfolioValuation[row, cashIndex]   = Decimal.Zero;
                    portfolioValuation[row, inflowIndex] = Decimal.Zero;
                }
            }
            //forwardfill cashcolumn then replace null with Decimal.Zero
            bool    toFill   = false;
            decimal prevCash = decimal.Zero;

            for (int row = 0; row < pvSize; row++)
            {
                if (((portfolioValuation[row, cashIndex] != null && (decimal?)portfolioValuation[row, cashIndex] != Decimal.Zero) && !toFill) ||
                    ((portfolioValuation[row, cashIndex] != null && (decimal?)portfolioValuation[row, cashIndex] != Decimal.Zero) && toFill))
                {
                    toFill   = true;
                    prevCash = (decimal)portfolioValuation[row, cashIndex];
                }
                else if ((portfolioValuation[row, cashIndex] == null || (decimal?)portfolioValuation[row, cashIndex] == Decimal.Zero) && toFill)
                {
                    portfolioValuation[row, cashIndex] = prevCash;
                }
            }
            portfolioValuation.Columns.GetPrimitiveColumn <decimal>("cash").FillNulls(Decimal.Zero, true);
            portfolioValuation.Columns.GetPrimitiveColumn <decimal>("inflow").FillNulls(Decimal.Zero, true);
            // get total portfolio value

            //

            PrimitiveDataFrameColumn <decimal> PortfolioVal = new PrimitiveDataFrameColumn <decimal>("PortfolioValue", pvSize);

            portfolioValuation.Columns.Add(PortfolioVal);
            int PortfolioValIndex = portfolioValuation.Columns.Count - 1;

            for (int row = 0; row < pvSize; row++)
            {
                portfolioValuation[row, PortfolioValIndex] = (decimal)portfolioValuation[row, cashIndex] + (decimal)portfolioValuation[row, valueIndex];
            }

            PrimitiveDataFrameColumn <decimal> HPRcol = new PrimitiveDataFrameColumn <decimal>("Holding Period Return", pvSize);

            portfolioValuation.Columns.Add(HPRcol);
            int hprIndex = portfolioValuation.Columns.Count - 1;

            for (int row = 1; row < pvSize; row++)
            {
                int     prevRow = row - 1;
                decimal HPR     = (((decimal)portfolioValuation[row, PortfolioValIndex]) / ((decimal)portfolioValuation[prevRow, PortfolioValIndex] + (decimal)portfolioValuation[row, inflowIndex]) - 1) * 100;
                portfolioValuation[row, hprIndex] = Math.Round(HPR, 3);
            }


            // This is HPR performance indexed
            PrimitiveDataFrameColumn <decimal> HPRindexed = new PrimitiveDataFrameColumn <decimal>("Holding Period Return Indexed", pvSize);

            portfolioValuation.Columns.Add(HPRindexed);
            int HPRi = portfolioValuation.Columns.Count - 1;

            portfolioValuation[0, HPRi] = 100m; //initial index.
            for (int row = 1; row < pvSize; row++)
            {
                int     prevRow = row - 1;
                decimal HPRx    = (decimal)portfolioValuation[prevRow, HPRi] * (((decimal)portfolioValuation[row, hprIndex] / 100) + 1);
                portfolioValuation[row, HPRi] = Math.Round(HPRx, 3);
            }

            System.Diagnostics.Debug.WriteLine(portfolioValuation);
            return(portfolioValuation);
        }
Пример #26
0
        public void TestNullCounts()
        {
            PrimitiveDataFrameColumn <int> dataFrameColumn1 = new PrimitiveDataFrameColumn <int>("Int1", Enumerable.Range(0, 10).Select(x => x));

            dataFrameColumn1.Append(null);
            Assert.Equal(1, dataFrameColumn1.NullCount);

            PrimitiveDataFrameColumn <int> column2 = new PrimitiveDataFrameColumn <int>("Int2");

            Assert.Equal(0, column2.NullCount);

            PrimitiveDataFrameColumn <int> column3 = new PrimitiveDataFrameColumn <int>("Int3", 10);

            Assert.Equal(10, column3.NullCount);

            // Test null counts with assignments on Primitive Columns
            column2.Append(null);
            column2.Append(1);
            Assert.Equal(1, column2.NullCount);
            column2[1] = 10;
            Assert.Equal(1, column2.NullCount);
            column2[1] = null;
            Assert.Equal(2, column2.NullCount);
            column2[1] = 5;
            Assert.Equal(1, column2.NullCount);
            column2[0] = null;
            Assert.Equal(1, column2.NullCount);

            // Test null counts with assignments on String Columns
            StringDataFrameColumn strCol = new StringDataFrameColumn("String", 0);

            Assert.Equal(0, strCol.NullCount);

            StringDataFrameColumn strCol1 = new StringDataFrameColumn("String1", 5);

            Assert.Equal(0, strCol1.NullCount);

            StringDataFrameColumn strCol2 = new StringDataFrameColumn("String", Enumerable.Range(0, 10).Select(x => x.ToString()));

            Assert.Equal(0, strCol2.NullCount);

            StringDataFrameColumn strCol3 = new StringDataFrameColumn("String", Enumerable.Range(0, 10).Select(x => (string)null));

            Assert.Equal(10, strCol3.NullCount);

            strCol.Append(null);
            Assert.Equal(1, strCol.NullCount);
            strCol.Append("foo");
            Assert.Equal(1, strCol.NullCount);
            strCol[1] = "bar";
            Assert.Equal(1, strCol.NullCount);
            strCol[1] = null;
            Assert.Equal(2, strCol.NullCount);
            strCol[1] = "foo";
            Assert.Equal(1, strCol.NullCount);
            strCol[0] = null;
            Assert.Equal(1, strCol.NullCount);

            PrimitiveDataFrameColumn <int> intColumn = new PrimitiveDataFrameColumn <int>("Int");

            intColumn.Append(0);
            intColumn.Append(1);
            intColumn.Append(null);
            intColumn.Append(2);
            intColumn.Append(null);
            intColumn.Append(3);
            Assert.Equal(0, intColumn[0]);
            Assert.Equal(1, intColumn[1]);
            Assert.Null(intColumn[2]);
            Assert.Equal(2, intColumn[3]);
            Assert.Null(intColumn[4]);
            Assert.Equal(3, intColumn[5]);
        }
Пример #27
0
        /// <summary>
        /// Performs exploratory data analysis EDA
        /// </summary>
        public void Analyze()
        {
            #region Load Dataset

            var covid19Dataframe = DataFrame.LoadCsv(dataFile);

            #endregion

            #region Data Range

            var dateRangeDataFrame = covid19Dataframe.Columns[LAST_UPDATE].ValueCounts();
            var dataRange          = dateRangeDataFrame.Columns[VALUES].Sort();
            var lastElementIndex   = dataRange.Length - 1;

            var startDate = DateTime.Parse(dataRange[0].ToString()).ToShortDateString();
            var endDate   = DateTime.Parse(dataRange[lastElementIndex].ToString()).ToShortDateString(); // Last Element
            Console.WriteLine($"The data is between {startDate} and {endDate}");

            #endregion

            #region Display data

            // Default Rows
            var topDefaultRows = covid19Dataframe;
            Console.WriteLine("------- Top Default(10) Rows -------");
            topDefaultRows.PrettyPrint();

            // Top 5 Rows
            var topRows = covid19Dataframe.Head(5);
            Console.WriteLine("------- Head: Top Rows(5) -------");
            topRows.PrettyPrint();

            // Random 6 Rows
            var randomRows = covid19Dataframe.Sample(6);
            Console.WriteLine("------- Sample: Random Rows(6) -------");
            randomRows.PrettyPrint();

            // Description
            var description = covid19Dataframe.Description();
            Console.WriteLine("------- Description -------");
            description.PrettyPrint();

            // Information
            var information = covid19Dataframe.Info();
            Console.WriteLine("------- Information -------");
            information.PrettyPrint();

            #endregion

            #region Data Cleaning

            // Active = Confirmed - Deaths - Recovered

            // Filter : Gets active records with negative values
            PrimitiveDataFrameColumn <bool> invalidActiveFilter = covid19Dataframe.Columns[ACTIVE].ElementwiseLessThan(0.0);
            var invalidActiveDataFrame = covid19Dataframe.Filter(invalidActiveFilter);
            Console.WriteLine("------- Invalid Active cases - Before Removal -------");
            invalidActiveDataFrame.PrettyPrint();

            // Active(-13) = Confirmed(10) - Deaths(51) - Recovered(0)

            // Remove invalid active cases by applying filter
            PrimitiveDataFrameColumn <bool> activeFilter = covid19Dataframe.Columns[ACTIVE].ElementwiseGreaterThanOrEqual(0.0);
            covid19Dataframe = covid19Dataframe.Filter(activeFilter);
            Console.WriteLine("------- Invalid Active cases - After Removal -------");
            covid19Dataframe.Description().PrettyPrint();

            // Remove extra columns
            string[] requiredColumns =
            {
                COUNTRY,
                LAST_UPDATE,
                CONFIRMED,
                DEATHS,
                RECOVERED,
                ACTIVE
            };

            covid19Dataframe.RemoveAllColumnsExcept(excludedColumnNames: requiredColumns);
            Console.WriteLine("------- Filtered columns -------");
            covid19Dataframe.PrettyPrint();

            #endregion

            #region Visualization

            #region Global

            #region Confirmed Vs Deaths Vs Receovered cases

            //  Gets the collection of confirmed, deaths and recovered
            var confirmed = covid19Dataframe.Columns[CONFIRMED];
            var deaths    = covid19Dataframe.Columns[DEATHS];
            var recovered = covid19Dataframe.Columns[RECOVERED];

            // Gets the sum of collection by using Sum method of DataFrame
            var totalConfirmed = Convert.ToDouble(confirmed.Sum());
            var totalDeaths    = Convert.ToDouble(deaths.Sum());
            var totalRecovered = Convert.ToDouble(recovered.Sum());

            var confirmedVsDeathsVsRecoveredPlot = Chart.Plot(
                new Graph.Pie()
            {
                values = new double[] { totalConfirmed, totalDeaths, totalRecovered },
                labels = new string[] { CONFIRMED, DEATHS, RECOVERED }
            }
                );

            confirmedVsDeathsVsRecoveredPlot.WithTitle("Confirmed Vs Deaths Vs Recovered cases");

            #endregion

            #region Top 5 Countries with Confirmed cases

            // The data for top 5 countries is not present in the csv file.
            // In order to get that, first DataFrame's GROUPBY is used aginst the country.
            // Then it was aggregated using SUM on Confirmed column.
            // In the last, ORDERBYDESCENDING is used to get the top five countries.

            var countryConfirmedGroup      = covid19Dataframe.GroupBy(COUNTRY).Sum(CONFIRMED).OrderByDescending(CONFIRMED);
            var topCountriesColumn         = countryConfirmedGroup.Columns[COUNTRY];
            var topConfirmedCasesByCountry = countryConfirmedGroup.Columns[CONFIRMED];

            HashSet <string> countries      = new HashSet <string>(TOP_COUNT);
            HashSet <long>   confirmedCases = new HashSet <long>(TOP_COUNT);
            for (int index = 0; index < TOP_COUNT; index++)
            {
                countries.Add(topCountriesColumn[index].ToString());
                confirmedCases.Add(Convert.ToInt64(topConfirmedCasesByCountry[index]));
            }

            var series1 = new Graph.Bar
            {
                x = countries.ToArray(),
                y = confirmedCases.ToArray()
            };

            var chart = Chart.Plot(new[] { series1 });
            chart.WithTitle("Top 5 Countries: Confirmed");
            // display(chart);

            #endregion

            #region Top 5 Countries with Deaths

            // Get the data
            var countryDeathsGroup = covid19Dataframe.GroupBy(COUNTRY).Sum(DEATHS).OrderByDescending(DEATHS);
            topCountriesColumn = countryDeathsGroup.Columns[COUNTRY];
            var topDeathCasesByCountry = countryDeathsGroup.Columns[DEATHS];

            countries = new HashSet <string>(TOP_COUNT);
            HashSet <long> deathCases = new HashSet <long>(TOP_COUNT);
            for (int index = 0; index < TOP_COUNT; index++)
            {
                countries.Add(topCountriesColumn[index].ToString());
                deathCases.Add(Convert.ToInt64(topDeathCasesByCountry[index]));
            }

            #endregion

            #region Top 5 Countries with Recovered cases

            // Get the data
            var countryRecoveredGroup = covid19Dataframe.GroupBy(COUNTRY).Sum(RECOVERED).OrderByDescending(RECOVERED);
            topCountriesColumn = countryRecoveredGroup.Columns[COUNTRY];
            var topRecoveredCasesByCountry = countryRecoveredGroup.Columns[RECOVERED];

            countries = new HashSet <string>(TOP_COUNT);
            HashSet <long> recoveredCases = new HashSet <long>(TOP_COUNT);
            for (int index = 0; index < TOP_COUNT; index++)
            {
                countries.Add(topCountriesColumn[index].ToString());
                recoveredCases.Add(Convert.ToInt64(topRecoveredCasesByCountry[index]));
            }

            series1 = new Graph.Bar
            {
                x = countries.ToArray(),
                y = recoveredCases.ToArray()
            };

            chart = Chart.Plot(new[] { series1 });
            chart.WithTitle("Top 5 Countries : Recovered");
            // display(chart);

            #endregion

            #endregion

            #region India

            #region Confirmed Vs Deaths Vs Receovered cases

            // Filtering on Country column with INDIA as value

            PrimitiveDataFrameColumn <bool> indiaFilter = covid19Dataframe.Columns[COUNTRY].ElementwiseEquals(INDIA);
            var indiaDataFrame = covid19Dataframe.Filter(indiaFilter);

            var indiaConfirmed = indiaDataFrame.Columns[CONFIRMED];
            var indiaDeaths    = indiaDataFrame.Columns[DEATHS];
            var indiaRecovered = indiaDataFrame.Columns[RECOVERED];

            var indiaTotalConfirmed = Convert.ToDouble(indiaConfirmed.Sum());
            var indiaTotalDeaths    = Convert.ToDouble(indiaDeaths.Sum());
            var indiaTotalRecovered = Convert.ToDouble(indiaRecovered.Sum());

            var indiaConfirmedVsDeathsVsRecoveredChart = Chart.Plot(
                new Graph.Pie()
            {
                values = new double[] { indiaTotalConfirmed, indiaTotalDeaths, indiaTotalRecovered },
                labels = new string[] { CONFIRMED, DEATHS, RECOVERED }
            }
                );
            indiaConfirmedVsDeathsVsRecoveredChart.WithTitle("India: Confirmed Vs Deaths Vs Recovered cases");

            #endregion

            #endregion

            #region World Map

            var world = countryConfirmedGroup;
            countries.Clear();
            List <string> worldConfirmedCases = new List <string>();
            for (int index = 0; index < world.Columns[COUNTRY].Length; index++)
            {
                countries.Add(world.Columns[COUNTRY][index].ToString());
                worldConfirmedCases.Add(world.Columns[CONFIRMED][index].ToString());
            }

            var locations = countryConfirmedGroup.Columns[COUNTRY];

            var worldGeoPlot = Chart.Plot(
                new Graph.Choropleth()
            {
                locations      = countries.ToArray(),
                z              = worldConfirmedCases.ToArray(),
                locationmode   = "country names",
                text           = countryConfirmedGroup.Columns[COUNTRY],
                colorscale     = "active",
                hoverinfo      = COUNTRY,
                autocolorscale = true,
            });

            #endregion

            #endregion
        }
Пример #28
0
        static void Main(string[] args)
        {
            Console.WriteLine("Hello World!");
            PrimitiveDataFrameColumn <DateTime> createdDate = new PrimitiveDataFrameColumn <DateTime>("CreatedDate");
            PrimitiveDataFrameColumn <float>    temp        = new PrimitiveDataFrameColumn <float>("Temp");
            PrimitiveDataFrameColumn <bool>     status      = new PrimitiveDataFrameColumn <bool>("Status", 10);
            StringDataFrameColumn deviceName = new StringDataFrameColumn("DeviceName", 10);
            StringDataFrameColumn actions    = new StringDataFrameColumn("Actions", 10);
            StringDataFrameColumn factory    = new StringDataFrameColumn("Factory", 10);



            Random rnd = new Random(Environment.TickCount);

            Enumerable.Range(1, 10).ToList().ForEach(x => { createdDate.Append(DateTime.Now.AddDays(x)); temp.Append(rnd.Next(25, 50)); deviceName[x - 1] = $"device-{x}"; factory[x - 1] = $"factory-{rnd.Next(1, 3)}"; });

            var df = new DataFrame(createdDate, deviceName, factory, temp, status, actions);

            df.Info();

            for (int row = 0; row < temp.Length; row++)
            {
                status[row] = temp[row] <= 30;
            }
            for (int row = 0; row < status.Length; row++)
            {
                if (!status[row].Value)
                {
                    df[row, 5] = "device perlu di reset";
                }
            }
            df["Actions"].FillNulls("-", inPlace: true);

            DataTable dt = new DataTable("data sensor");

            foreach (var dc in df.Columns)
            {
                dt.Columns.Add(dc.Name.Replace(" ", "").Trim());
            }
            dt.AcceptChanges();

            for (long i = 0; i < df.Rows.Count; i++)
            {
                DataFrameRow row    = df.Rows[i];
                var          newRow = dt.NewRow();
                var          cols   = 0;
                foreach (var cell in row)
                {
                    newRow[cols] = cell.ToString();
                    cols++;
                }
                dt.Rows.Add(newRow);
            }
            dt.AcceptChanges();

            /*
             * Formatter<DataTable>.Register((df, writer) =>
             * {
             *  var headers = new List<IHtmlContent>();
             *  headers.Add(th(i("index")));
             *  foreach (DataColumn dc in df.Columns)
             *  {
             *      headers.Add((IHtmlContent)th(dc.ColumnName));
             *  }
             *
             *  var rows = new List<List<IHtmlContent>>();
             *  var take = 20;
             *  for (var i = 0; i < Math.Min(take, df.Rows.Count); i++)
             *  {
             *      var cells = new List<IHtmlContent>();
             *      cells.Add(td(i));
             *      DataRow obj = df.Rows[i];
             *
             *      for (int x = 0; x < df.Columns.Count;x++)
             *      {
             *          cells.Add(td(obj[x].ToString()));
             *      }
             *
             *
             *      rows.Add(cells);
             *  }
             *
             *  var t = table(
             *      thead(
             *          headers),
             *      tbody(
             *          rows.Select(
             *              r => tr(r))));
             *
             *  writer.Write(t);
             * }, "text/html");
             */
            PrimitiveDataFrameColumn <bool> boolFilter = df["Actions"].ElementwiseNotEquals("-");
            DataFrame filtered = df.Filter(boolFilter);

            GroupBy groupBy = df.GroupBy("Factory");

            DataFrame groupCounts  = groupBy.Count();
            DataFrame tempGroupAvg = groupBy.Mean("Temp");

            var lineChart = Chart.Line(df.Rows.Select(g => new Tuple <DateTime, float>(Convert.ToDateTime(g[0]), Convert.ToSingle(g[3]))));

            lineChart.WithTitle("Temperature per Date");
            //display(lineChart);
        }
Пример #29
0
        static void Main()
        {
            #region DataFrame Code
            PrimitiveDataFrameColumn <int>    StudentID   = new PrimitiveDataFrameColumn <int>("Student ID");
            PrimitiveDataFrameColumn <double> mathComp    = new PrimitiveDataFrameColumn <double>("Mathematical Competency");
            PrimitiveDataFrameColumn <double> litComp     = new PrimitiveDataFrameColumn <double>("Literary Competency");
            PrimitiveDataFrameColumn <double> socComp     = new PrimitiveDataFrameColumn <double>("Social Studies Competency");
            PrimitiveDataFrameColumn <double> scienceComp = new PrimitiveDataFrameColumn <double>("Science Competency");
            DataFrame dataFrame = new DataFrame(StudentID, mathComp, litComp, socComp, scienceComp);
            #endregion

            #region Point Generation
            /// <summary>
            /// Generates random points to be used for testing the unsupervised learning algorithm.
            /// Then it adds them to the StudentPointList for use in the dbscan
            /// OR, read points from a csv file and add them to the StudentPointList instead
            /// </summary>
            bool       needPoints = true;
            CsvService csvService = new CsvService();

            List <Point> StudentPointList = new List <Point>();
            if (needPoints)
            {
                Random rand = new Random();

                double RandDouble()
                {
                    double randNum = rand.Next(50, 101);

                    if (randNum < 49)
                    {
                        randNum = 0;
                        return(randNum);
                    }
                    else
                    {
                        return(randNum / 100);
                    }
                }

                for (int i = 0; i < 2000; i++)
                {
                    double   doubleRandOne   = RandDouble();
                    double   doubleRandTwo   = RandDouble();
                    double   doubleRandThree = RandDouble();
                    double[] array           = new double[] { doubleRandOne, doubleRandTwo, doubleRandThree };

                    StudentPointList.Add(new Point(i, array));
                }
                csvService.WriteListToFile(StudentPointList, @"C:\Users\Grennon\source\repos\RecommendationEngine\outfile.csv");
            }

            ////CsvService csvService = new CsvService();
            //csvService.WriteListToFile(StudentPointList, @"C:\Users\Grennon\source\repos\RecommendationEngine\outfile.csv");
            //csvService.ReadListFromFile(@"C:\Users\Grennon\source\repos\RecommendationEngine\outfile.csv");


            #endregion

            #region DBSCAN Code
            /// <summary>
            /// Create an instance of the DBSCAN class and find the ideal value for minK before running
            /// </summary>
            int    idealMinK = StudentPointList[0].featureArray.Length * 2;
            DBSCAN dbScan    = new DBSCAN(StudentPointList, inputEpsilon: 0.05, inputMinNeighbor: idealMinK);
            //Best value for minK is 2 * the amt of dimensions
            dbScan.Run();
            #endregion

            #region Diagnostics & Testing Relevant Output
            /// <summary>
            /// Used for separating out the clustered lists and printing their contents before displaying amt of noise
            /// </summary>
            Dictionary <int, List <Point> > clusteredPoints = dbScan.ReturnClusteredPoints();

            List <Point> GetList(int listID) => clusteredPoints[listID];

            void PrintList(int listID)
            {
                Console.WriteLine($"Cluster: {listID}");
                foreach (Point current in GetList(listID))
                {
                    Console.WriteLine($"ID: {current.StudentID} - Array: {current.ShowArray()}");
                }
            }

            for (int i = 0; i < dbScan.ClusterAmount; i++)
            {
                PrintList(i);
            }
            Console.WriteLine($"Noise Amount: {dbScan.NoiseAmount}");
            double noisePercentage = ((double)dbScan.NoiseAmount / StudentPointList.Count) * 100;
            Console.WriteLine($"Noise Percentage: {Math.Floor(noisePercentage)}%");
            #endregion

            //SimilarityCalculator similarityCalculator = new SimilarityCalculator(clusteredPoints[1], clusteredPoints);

            /*
             * List<List<Point>> listOfListOfPoints = new List<List<Point>>();
             *
             * for (double epsK = 0.1; epsK < 1.0; epsK += 0.1)
             * {
             *  for (int minK = 5; minK < 10; minK++)
             *  {
             *      DBSCAN findValsDBSCAN = new DBSCAN(StudentPointList, epsK, minK);
             *      Console.WriteLine($"Parameters: eps [{epsK}], minK [{minK}] | NOISE: {findValsDBSCAN.Noise}");
             *  }
             * }
             */
        }
        /// <summary>
        /// Predict/Forecast based on time-series
        /// </summary>
        public void Forecast()
        {
            #region Load Data

            var predictedDataFrame = DataFrame.LoadCsv(CONFIRMED_DATASET_FILE);

            #endregion

            #region Display data

            // Top 5 Rows
            var topRows = predictedDataFrame.Head(5);
            Console.WriteLine("------- Head: Top Rows(5) -------");
            topRows.PrettyPrint();

            // Bottom 5 Rows
            var bottomRows = predictedDataFrame.Tail(5);
            Console.WriteLine("------- Tail: Bottom Rows(5) -------");
            bottomRows.PrettyPrint();

            // Description
            var description = predictedDataFrame.Description();
            Console.WriteLine("------- Description -------");
            description.PrettyPrint();

            #endregion

            #region Visualization

            #region Number of Confirmed cases over Time

            // Number of confirmed cases over time
            var totalConfirmedDateColumn = predictedDataFrame.Columns[DATE_COLUMN];
            var totalConfirmedColumn     = predictedDataFrame.Columns[TOTAL_CONFIRMED_COLUMN];

            var dates = new List <DateTime>();
            var totalConfirmedCases = new List <string>();
            for (int index = 0; index < totalConfirmedDateColumn.Length; index++)
            {
                dates.Add(Convert.ToDateTime(totalConfirmedDateColumn[index]));
                totalConfirmedCases.Add(totalConfirmedColumn[index].ToString());
            }

            var title = "Number of Confirmed Cases over Time";
            var confirmedTimeGraph = new Graph.Scattergl()
            {
                x    = dates.ToArray(),
                y    = totalConfirmedCases.ToArray(),
                mode = "lines+markers"
            };

            var chart = Chart.Plot(confirmedTimeGraph);
            chart.WithTitle(title);
            // display(chart);

            #endregion

            #endregion

            #region Prediction

            #region Load Data - ML Context

            var context = new MLContext();
            var data    = context.Data.LoadFromTextFile <ConfirmedData>(CONFIRMED_DATASET_FILE, hasHeader: true, separatorChar: ',');

            #region Split dataset

            var totalRows        = (int)data.GetColumn <float>("TotalConfirmed").ToList().Count;
            int numTrain         = (int)(0.8 * totalRows);
            var confirmedAtSplit = (int)data.GetColumn <float>("TotalConfirmed").ElementAt(numTrain);
            var startingDate     = data.GetColumn <DateTime>("Date").FirstOrDefault();
            var dateAtSplit      = data.GetColumn <DateTime>("Date").ElementAt(numTrain);

            IDataView trainData = context.Data.FilterRowsByColumn(data, "TotalConfirmed", upperBound: confirmedAtSplit);
            IDataView testData  = context.Data.FilterRowsByColumn(data, "TotalConfirmed", lowerBound: confirmedAtSplit);

            var totalRowsTrain = (int)trainData.GetColumn <float>("TotalConfirmed").ToList().Count;
            var totalRowsTest  = (int)testData.GetColumn <float>("TotalConfirmed").ToList().Count;

            Console.WriteLine($"Training dataset range : {startingDate.ToShortDateString()} to {dateAtSplit.ToShortDateString()}");

            #endregion

            #endregion

            #region ML Pipeline

            var pipeline = context.Forecasting.ForecastBySsa(
                nameof(ConfirmedForecast.Forecast),
                nameof(ConfirmedData.TotalConfirmed),
                WINDOW_SIZE,
                SERIES_LENGTH,
                trainSize: numTrain,
                horizon: HORIZON,
                confidenceLevel: CONFIDENCE_LEVEL,
                confidenceLowerBoundColumn: nameof(ConfirmedForecast.LowerBoundConfirmed),
                confidenceUpperBoundColumn: nameof(ConfirmedForecast.UpperBoundConfirmed));

            #endregion

            #region Train Model

            var model = pipeline.Fit(trainData);

            #endregion

            #region Evaluate

            IDataView predictions = model.Transform(testData);

            IEnumerable <float> actual =
                context.Data.CreateEnumerable <ConfirmedData>(testData, true)
                .Select(observed => observed.TotalConfirmed);

            IEnumerable <float> forecast =
                context.Data.CreateEnumerable <ConfirmedForecast>(predictions, true)
                .Select(prediction => prediction.Forecast[0]);

            var metrics = actual.Zip(forecast, (actualValue, forecastValue) => actualValue - forecastValue);

            var MAE  = metrics.Average(error => Math.Abs(error));               // Mean Absolute Error
            var RMSE = Math.Sqrt(metrics.Average(error => Math.Pow(error, 2))); // Root Mean Squared Error

            Console.ForegroundColor = ConsoleColor.Cyan;
            Console.WriteLine("Evaluation Metrics");
            Console.WriteLine("---------------------");
            Console.WriteLine($"Mean Absolute Error: {MAE:F3}");
            Console.WriteLine($"Root Mean Squared Error: {RMSE:F3}\n");

            #endregion

            #region Save Model

            var forecastingEngine = model.CreateTimeSeriesEngine <ConfirmedData, ConfirmedForecast>(context);
            forecastingEngine.CheckPoint(context, MODEL_PATH);

            #endregion

            #region Prediction/Forecasting - 7 days

            var forecasts = forecastingEngine.Predict();

            var forecastOuputs = context.Data.CreateEnumerable <ConfirmedData>(testData, reuseRowObject: false)
                                 .Take(HORIZON)
                                 .Select((ConfirmedData confirmedData, int index) =>
            {
                float lowerEstimate = Math.Max(0, forecasts.LowerBoundConfirmed[index]);
                float estimate      = forecasts.Forecast[index];
                float upperEstimate = forecasts.UpperBoundConfirmed[index];

                return(new ForecastOutput
                {
                    ActualConfirmed = confirmedData.TotalConfirmed,
                    Date = confirmedData.Date,
                    Forecast = estimate,
                    LowerEstimate = lowerEstimate,
                    UpperEstimate = upperEstimate
                });
            });

            PrimitiveDataFrameColumn <DateTime> forecastDates        = new PrimitiveDataFrameColumn <DateTime>("Date");
            PrimitiveDataFrameColumn <float>    actualConfirmedCases = new PrimitiveDataFrameColumn <float>("ActualConfirmed");
            PrimitiveDataFrameColumn <float>    forecastCases        = new PrimitiveDataFrameColumn <float>("Forecast");
            PrimitiveDataFrameColumn <float>    lowerEstimates       = new PrimitiveDataFrameColumn <float>("LowerEstimate");
            PrimitiveDataFrameColumn <float>    upperEstimates       = new PrimitiveDataFrameColumn <float>("UpperEstimate");

            foreach (var output in forecastOuputs)
            {
                forecastDates.Append(output.Date);
                actualConfirmedCases.Append(output.ActualConfirmed);
                forecastCases.Append(output.Forecast);
                lowerEstimates.Append(output.LowerEstimate);
                upperEstimates.Append(output.UpperEstimate);
            }

            Console.WriteLine("Total Confirmed Cases Forecast");
            Console.WriteLine("---------------------");
            var forecastDataFrame = new DataFrame(forecastDates, actualConfirmedCases, lowerEstimates, forecastCases, upperEstimates);
            forecastDataFrame.PrettyPrint();

            Console.WriteLine(Environment.NewLine);
            Console.ForegroundColor = ConsoleColor.White;

            #endregion

            #region Prediction Visualization

            // var lastDate =  // DateTime.Parse(dates.LastOrDefault());
            var predictionStartDate = dateAtSplit.AddDays(-1); // lastDate.AddDays(1);

            var newDates  = new List <DateTime>();
            var fullDates = new List <DateTime>();
            fullDates.AddRange(dates.Take(numTrain));

            var fullTotalConfirmedCases = new List <string>();
            fullTotalConfirmedCases.AddRange(totalConfirmedCases.Take(numTrain));

            for (int index = 0; index < HORIZON; index++)
            {
                var nextDate = predictionStartDate.AddDays(index + 1);
                newDates.Add(nextDate);
                fullTotalConfirmedCases.Add(forecasts.Forecast[index].ToString());
            }

            fullDates.AddRange(newDates);

            var layout = new Layout.Layout();
            layout.shapes = new List <Graph.Shape>
            {
                new Graph.Shape
                {
                    x0   = predictionStartDate.ToShortDateString(),
                    x1   = predictionStartDate.ToShortDateString(),
                    y0   = "0",
                    y1   = "1",
                    xref = 'x',
                    yref = "paper",
                    line = new Graph.Line()
                    {
                        color = "red", width = 2
                    }
                }
            };

            var predictionChart = Chart.Plot(
                new[]
            {
                new Graph.Scattergl()
                {
                    x    = fullDates.ToArray(),
                    y    = fullTotalConfirmedCases.ToArray(),
                    mode = "lines+markers"
                }
            },
                layout
                );

            predictionChart.WithTitle("Number of Confirmed Cases over Time");
            // display(predictionChart);

            Graph.Scattergl[] scatters =
            {
                new Graph.Scattergl()
                {
                    x    = newDates,
                    y    = forecasts.UpperBoundConfirmed,
                    fill = "tonexty",
                    name = "Upper bound"
                },
                new Graph.Scattergl()
                {
                    x    = newDates,
                    y    = forecasts.Forecast,
                    fill = "tonexty",
                    name = "Forecast"
                },
                new Graph.Scattergl()
                {
                    x    = newDates,
                    y    = forecasts.LowerBoundConfirmed,
                    fill = "tonexty",
                    name = "Lower bound"
                }
            };


            var predictionChart2 = Chart.Plot(scatters);
            chart.Width  = 600;
            chart.Height = 600;
            // display(predictionChart2);

            #endregion

            #endregion
        }