public void GetTable()
        {
            var doc = LoadDocument <IHtmlDocument>("Html", "ariva.historical.prices.DE0007664039.html");

            var descriptor = new PathTableDescriptor();

            descriptor.Figure = "HistoricalPrices";
            descriptor.Path   = @"/BODY[0]/DIV[0]/DIV[1]/DIV[6]/DIV[1]/DIV[0]/DIV[0]/TABLE[0]/TBODY[0]";
            descriptor.Columns.Add(new FormatColumn("date", typeof(DateTime), "dd.MM.yy"));
            descriptor.Columns.Add(new FormatColumn("open", typeof(double), "00,00"));
            descriptor.Columns.Add(new FormatColumn("high", typeof(double), "00,00"));
            descriptor.Columns.Add(new FormatColumn("low", typeof(double), "00,00"));
            descriptor.Columns.Add(new FormatColumn("close", typeof(double), "00,00"));
            descriptor.SkipColumns.AddRange(5, 6);
            descriptor.SkipRows.AddRange(0, 23);

            var parser = DocumentProcessingFactory.CreateParser(doc, descriptor);
            var table  = parser.ExtractTable();

            Assert.That(table.Rows.Count, Is.EqualTo(22));

            Assert.That(table.Rows[0][0], Is.EqualTo(new DateTime(2015, 12, 30)));
            Assert.That(table.Rows[0][1], Is.EqualTo(135.45d));
            Assert.That(table.Rows[0][2], Is.EqualTo(135.45d));
            Assert.That(table.Rows[0][3], Is.EqualTo(133.55d));
            Assert.That(table.Rows[0][4], Is.EqualTo(133.75d));

            Assert.That(table.Rows[21][0], Is.EqualTo(new DateTime(2015, 11, 27)));
            Assert.That(table.Rows[21][1], Is.EqualTo(124.50d));
            Assert.That(table.Rows[21][2], Is.EqualTo(125.10d));
            Assert.That(table.Rows[21][3], Is.EqualTo(121.05d));
            Assert.That(table.Rows[21][4], Is.EqualTo(123.85d));
        }
Пример #2
0
        public TestBase()
        {
            var assemblyPath = new Uri(Path.GetDirectoryName(GetType().Assembly.Location)).LocalPath;

            TestDataRoot = Path.Combine(assemblyPath, "TestData");

            myBrowser = new Lazy <IDocumentBrowser>(() => DocumentProcessingFactory.CreateBrowser());
        }
Пример #3
0
        private void Validate(DataSource source, IFigureDescriptor figureDescriptor)
        {
            try
            {
                Browser.Navigate(DocumentType.Html, source.Location, new StockMacroResolver(SelectedStock));

                myValidationReport.NavigationSucceeded(source);
            }
            catch (Exception ex)
            {
                var sb = new StringBuilder();
                sb.AppendLine(ex.Message);

                foreach (var key in ex.Data.Keys)
                {
                    sb.AppendFormat("{0}: {1}", key, ex.Data[key]);
                    sb.AppendLine();
                }

                myValidationReport.FailedToLocateDocument(source, sb.ToString());

                return;
            }

            // The new document is automatically given to the selected FigureDescriptor ViewModel.
            // The MarkupBehavior gets automatically applied

            var parser = DocumentProcessingFactory.CreateParser(Browser.Document, figureDescriptor);

            try
            {
                var table = parser.ExtractTable();

                if (table.Rows.Count == 0)
                {
                    myValidationReport.FailedToParseDocument(figureDescriptor, "Unknown reason");
                }
                else
                {
                    myValidationReport.ParsingSucceeded(figureDescriptor);
                }
            }
            catch (Exception ex)
            {
                var sb = new StringBuilder();
                sb.AppendLine(ex.Message);

                foreach (var key in ex.Data.Keys)
                {
                    sb.AppendFormat("{0}: {1}", key, ex.Data[key]);
                    sb.AppendLine();
                }

                myValidationReport.FailedToParseDocument(figureDescriptor, sb.ToString());
            }
        }
        public void GetCellAndConvertToEntity()
        {
            var doc = LoadDocument <IHtmlDocument>("Html", "ariva.prices.DE0007664039.html");

            var dataSource = new DataSource();

            dataSource.Vendor  = "Ariva";
            dataSource.Name    = "Prices";
            dataSource.Quality = 1;

            var descriptor = new PathCellDescriptor();

            descriptor.Figure = "Price";
            descriptor.Path   = @"/BODY[0]/DIV[0]/DIV[1]/DIV[6]/DIV[1]/DIV[0]/DIV[0]/TABLE[0]/TBODY[0]";
            descriptor.Column = new StringContainsLocator {
                HeaderSeriesPosition = 0, Pattern = "Letzter"
            };
            descriptor.Row = new StringContainsLocator {
                HeaderSeriesPosition = 0, Pattern = "Frankfurt"
            };
            descriptor.ValueFormat = new FormatColumn("value", typeof(double), "00,00")
            {
                ExtractionPattern = new Regex(@"([0-9,\.]+)")
            };
            descriptor.Currency = "EUR";

            var parser = DocumentProcessingFactory.CreateParser(doc, descriptor);
            var table  = parser.ExtractTable();

            Assert.That(table.Rows.Count, Is.EqualTo(1));

            var value = table.Rows[0][0];

            Assert.That(value, Is.EqualTo(134.356d));

            var converter = DocumentProcessingFactory.CreateConverter(descriptor, dataSource, new[] { new Currency {
                                                                                                          Symbol = "EUR"
                                                                                                      } });
            var series = converter.Convert(table, new Stock {
                Isin = "DE0007664039"
            });

            var price = (Price)series.Single();

            Assert.That(price.Stock.Isin, Is.EqualTo("DE0007664039"));
            Assert.That(((DayPeriod)price.Period).Day.Date, Is.EqualTo(DateTime.Today));
            Assert.That(price.Source, Does.Contain("ariva").IgnoreCase.And.Contains("price").IgnoreCase);
            Assert.That(price.Timestamp.Date, Is.EqualTo(DateTime.Today));
            Assert.That(price.Value, Is.EqualTo(134.356d));
            Assert.That(price.Currency.Symbol, Is.EqualTo("EUR"));
        }
Пример #5
0
        public void GetSeries()
        {
            var descriptor = new SeparatorSeriesDescriptor();

            descriptor.Figure        = "EarningsPerShare";
            descriptor.Separator     = ";";
            descriptor.Orientation   = SeriesOrientation.Row;
            descriptor.ValuesLocator = new StringContainsLocator {
                HeaderSeriesPosition = 0, Pattern = "EPS"
            };
            descriptor.TimesLocator = new AbsolutePositionLocator {
                HeaderSeriesPosition = 0, SeriesPosition = 0
            };
            descriptor.Excludes.AddRange(0, 1);
            descriptor.ValueFormat = new FormatColumn("value", typeof(double), "000,00");
            descriptor.TimeFormat  = new FormatColumn("year", typeof(int), "000");

            var doc    = LoadDocument <TextDocument>("Csv", "DE0005151005.csv");
            var parser = DocumentProcessingFactory.CreateParser(doc, descriptor);
            var table  = parser.ExtractTable();

            Assert.AreEqual(10, table.Rows.Count);
            Assert.AreEqual(3.2d, ( double )table.Rows[0]["value"], 0.000001d);
            Assert.AreEqual(3.4d, ( double )table.Rows[1]["value"], 0.000001d);
            Assert.AreEqual(3.4d, ( double )table.Rows[2]["value"], 0.000001d);
            Assert.AreEqual(3.3d, ( double )table.Rows[3]["value"], 0.000001d);
            Assert.AreEqual(2.9d, ( double )table.Rows[4]["value"], 0.000001d);
            Assert.AreEqual(2.8d, ( double )table.Rows[5]["value"], 0.000001d);
            Assert.AreEqual(3.0d, ( double )table.Rows[6]["value"], 0.000001d);
            Assert.AreEqual(3.0d, ( double )table.Rows[7]["value"], 0.000001d);
            Assert.AreEqual(3.1d, ( double )table.Rows[8]["value"], 0.000001d);
            Assert.AreEqual(3.5d, ( double )table.Rows[9]["value"], 0.000001d);

            Assert.AreEqual(1997, ( int )table.Rows[0]["year"]);
            Assert.AreEqual(1998, ( int )table.Rows[1]["year"]);
            Assert.AreEqual(1999, ( int )table.Rows[2]["year"]);
            Assert.AreEqual(2000, ( int )table.Rows[3]["year"]);
            Assert.AreEqual(2001, ( int )table.Rows[4]["year"]);
            Assert.AreEqual(2002, ( int )table.Rows[5]["year"]);
            Assert.AreEqual(2003, ( int )table.Rows[6]["year"]);
            Assert.AreEqual(2004, ( int )table.Rows[7]["year"]);
            Assert.AreEqual(2005, ( int )table.Rows[8]["year"]);
            Assert.AreEqual(2006, ( int )table.Rows[9]["year"]);
        }
        public void GetSingleValue()
        {
            var doc = LoadDocument <IHtmlDocument>("Html", "ariva.overview.US0138171014.html");

            var descriptor = new PathSingleValueDescriptor();

            descriptor.Path        = @"/BODY[0]/DIV[4]/DIV[0]/DIV[3]/DIV[0]";
            descriptor.ValueFormat = new ValueFormat(typeof(int), "00000000")
            {
                ExtractionPattern = new Regex(@"WKN: (\d+)")
            };

            var parser = DocumentProcessingFactory.CreateParser(doc, descriptor);
            var table  = parser.ExtractTable();

            Assert.AreEqual(1, table.Rows.Count);

            Assert.AreEqual(850206, table.Rows[0][0]);
        }
Пример #7
0
        public void GetTable()
        {
            var descriptor = new CsvDescriptor();

            descriptor.Figure    = "HistoricalPrices";
            descriptor.Separator = ";";
            descriptor.SkipColumns.Add(1);
            descriptor.SkipRows.Add(0);
            descriptor.Columns.Add(new FormatColumn("Date", typeof(DateTime)));
            descriptor.Columns.Add(new FormatColumn("High", typeof(double), "000,000.00"));
            descriptor.Columns.Add(new FormatColumn("Low", typeof(double), "000,000.00"));
            descriptor.Columns.Add(new FormatColumn("Open", typeof(double), "000,000.00"));
            descriptor.Columns.Add(new FormatColumn("Close", typeof(double), "000,000.00"));

            var doc    = LoadDocument <TextDocument>("Csv", "Prices.csv");
            var parser = DocumentProcessingFactory.CreateParser(doc, descriptor);
            var table  = parser.ExtractTable();

            Assert.That(table.Rows.Count, Is.EqualTo(3));
            Assert.That(table.Rows[0]["Date"], Is.EqualTo(DateTime.Parse("01.01.2016")));
        }
Пример #8
0
        private void TryFetch()
        {
            if (myDocumentBrowser == null)
            {
                return;
            }

            myData.Clear();

            var descriptors = mySelectedSource.Figures
                              .Cast <IPathDescriptor>()
                              .Where(f => f.Figure == myFigureType.Name);

            foreach (var descriptor in descriptors)
            {
                try
                {
                    ILocatorMacroResolver resolver = new StockMacroResolver(Stock);
                    if (CustomResolverCreator != null)
                    {
                        resolver = CustomResolverCreator(resolver);
                    }
                    myDocumentBrowser.Navigate(DocumentType.Html, mySelectedSource.Location, resolver);

                    var htmlDocument = ( IHtmlDocument )myDocumentBrowser.Document;

                    // Mark the part of the document described by the FigureDescriptor to have a preview

                    var cell = ( HtmlElementAdapter )MarkupFactory.FindElementByDescriptor(htmlDocument, descriptor);
                    if (cell != null)
                    {
                        cell.Element.ScrollIntoView(false);
                    }

                    var marker = MarkupFactory.CreateMarker(descriptor);
                    marker.Mark(cell);

                    // already extract data here to check for format issues etc

                    var parser = DocumentProcessingFactory.CreateParser(htmlDocument, descriptor);
                    var table  = parser.ExtractTable();

                    var converter = DocumentProcessingFactory.CreateConverter(descriptor, mySelectedSource, CurrenciesLut.Currencies);
                    var series    = converter.Convert(table, Stock);
                    myData.AddRange(series);

                    // we found s.th. with this format
                    // -> skip alternative formats
                    break;
                }
                catch (Exception ex)
                {
                    ex.Data["Figure"]            = myFigureType.Name;
                    ex.Data["DataSource.Vendor"] = mySelectedSource.Vendor;
                    ex.Data["DataSource.Name"]   = mySelectedSource.Name;
                    ex.Data["Location"]          = mySelectedSource.Location.ToString();
                    ex.Data["FigureDescriptor"]  = descriptor.GetType().FullName;

                    if (ThrowOnError)
                    {
                        throw new Exception("Failed to extract data from datasource", ex);
                    }
                    else
                    {
                        myLogger.Error(ex, "Failed to fetch '{0}' from site {1}", myFigureType.Name, mySelectedSource.Name);
                    }
                }
            }
        }
        public void GetSeriesAndConvertToEntities()
        {
            var doc = LoadDocument <IHtmlDocument>("Html", "ariva.fundamentals.DE0005190003.html");

            var dataSource = new DataSource();

            dataSource.Vendor  = "Ariva";
            dataSource.Name    = "Fundamentals";
            dataSource.Quality = 1;

            var descriptor = new PathSeriesDescriptor();

            descriptor.Figure        = "Dividend";
            descriptor.Path          = @"/BODY[0]/DIV[5]/DIV[0]/DIV[1]/TABLE[7]/TBODY[0]";
            descriptor.Orientation   = SeriesOrientation.Row;
            descriptor.ValuesLocator = new StringContainsLocator {
                HeaderSeriesPosition = 0, Pattern = "Dividendenausschüttung"
            };
            descriptor.ValueFormat = new FormatColumn("value", typeof(double), "00,00")
            {
                InMillions = true
            };
            descriptor.TimesLocator = new AbsolutePositionLocator {
                HeaderSeriesPosition = 0, SeriesPosition = 1
            };
            descriptor.TimeFormat = new FormatColumn("year", typeof(int), "00000000");
            descriptor.Excludes.Add(0);

            var parser = DocumentProcessingFactory.CreateParser(doc, descriptor);
            var table  = parser.ExtractTable();

            Assert.AreEqual(6, table.Rows.Count);

            Assert.AreEqual(350000000d, table.Rows[0][0]);
            Assert.AreEqual(351000000d, table.Rows[1][0]);
            Assert.AreEqual(392000000d, table.Rows[2][0]);
            Assert.AreEqual(419000000d, table.Rows[3][0]);
            Assert.AreEqual(424000000d, table.Rows[4][0]);
            Assert.AreEqual(458000000d, table.Rows[5][0]);

            Assert.AreEqual(2001, table.Rows[0][1]);
            Assert.AreEqual(2002, table.Rows[1][1]);
            Assert.AreEqual(2003, table.Rows[2][1]);
            Assert.AreEqual(2004, table.Rows[3][1]);
            Assert.AreEqual(2005, table.Rows[4][1]);
            Assert.AreEqual(2006, table.Rows[5][1]);

            var stock = new Stock {
                Isin = "DE0007664039"
            };

            stock.Company = new Company {
                Name = "Volkswagen"
            };
            stock.Company.Stocks.Add(stock);

            var converter = DocumentProcessingFactory.CreateConverter(descriptor, dataSource, Enumerable.Empty <Currency>());
            var series    = converter.Convert(table, stock).Cast <Dividend>().ToList();

            foreach (var dividend in series)
            {
                Assert.That(dividend.Company.Stocks.First().Isin, Is.EqualTo("DE0007664039"));
                Assert.That(dividend.Period, Is.InstanceOf <YearPeriod>());
                Assert.That(dividend.Source, Does.Contain("ariva").IgnoreCase.And.Contains("fundamentals").IgnoreCase);
                Assert.That(dividend.Timestamp.Date, Is.EqualTo(DateTime.Today));

                // Descriptor does not provide static currency
                Assert.That(dividend.Currency, Is.Null);
            }

            Assert.That(series[0].Period, Is.EqualTo(new YearPeriod(2001)));
            Assert.That(series[0].Value, Is.EqualTo(350000000d));
            Assert.That(series[1].Period, Is.EqualTo(new YearPeriod(2002)));
            Assert.That(series[1].Value, Is.EqualTo(351000000d));
            Assert.That(series[2].Period, Is.EqualTo(new YearPeriod(2003)));
            Assert.That(series[2].Value, Is.EqualTo(392000000d));
            Assert.That(series[3].Period, Is.EqualTo(new YearPeriod(2004)));
            Assert.That(series[3].Value, Is.EqualTo(419000000d));
            Assert.That(series[4].Period, Is.EqualTo(new YearPeriod(2005)));
            Assert.That(series[4].Value, Is.EqualTo(424000000d));
            Assert.That(series[5].Period, Is.EqualTo(new YearPeriod(2006)));
            Assert.That(series[5].Value, Is.EqualTo(458000000d));
        }