public void GetTable() { var doc = LoadDocument <IHtmlDocument>("Html", "ariva.historical.prices.DE0007664039.html"); var descriptor = new PathTableDescriptor(); descriptor.Figure = "HistoricalPrices"; descriptor.Path = @"/BODY[0]/DIV[0]/DIV[1]/DIV[6]/DIV[1]/DIV[0]/DIV[0]/TABLE[0]/TBODY[0]"; descriptor.Columns.Add(new FormatColumn("date", typeof(DateTime), "dd.MM.yy")); descriptor.Columns.Add(new FormatColumn("open", typeof(double), "00,00")); descriptor.Columns.Add(new FormatColumn("high", typeof(double), "00,00")); descriptor.Columns.Add(new FormatColumn("low", typeof(double), "00,00")); descriptor.Columns.Add(new FormatColumn("close", typeof(double), "00,00")); descriptor.SkipColumns.AddRange(5, 6); descriptor.SkipRows.AddRange(0, 23); var parser = DocumentProcessingFactory.CreateParser(doc, descriptor); var table = parser.ExtractTable(); Assert.That(table.Rows.Count, Is.EqualTo(22)); Assert.That(table.Rows[0][0], Is.EqualTo(new DateTime(2015, 12, 30))); Assert.That(table.Rows[0][1], Is.EqualTo(135.45d)); Assert.That(table.Rows[0][2], Is.EqualTo(135.45d)); Assert.That(table.Rows[0][3], Is.EqualTo(133.55d)); Assert.That(table.Rows[0][4], Is.EqualTo(133.75d)); Assert.That(table.Rows[21][0], Is.EqualTo(new DateTime(2015, 11, 27))); Assert.That(table.Rows[21][1], Is.EqualTo(124.50d)); Assert.That(table.Rows[21][2], Is.EqualTo(125.10d)); Assert.That(table.Rows[21][3], Is.EqualTo(121.05d)); Assert.That(table.Rows[21][4], Is.EqualTo(123.85d)); }
public TestBase() { var assemblyPath = new Uri(Path.GetDirectoryName(GetType().Assembly.Location)).LocalPath; TestDataRoot = Path.Combine(assemblyPath, "TestData"); myBrowser = new Lazy <IDocumentBrowser>(() => DocumentProcessingFactory.CreateBrowser()); }
private void Validate(DataSource source, IFigureDescriptor figureDescriptor) { try { Browser.Navigate(DocumentType.Html, source.Location, new StockMacroResolver(SelectedStock)); myValidationReport.NavigationSucceeded(source); } catch (Exception ex) { var sb = new StringBuilder(); sb.AppendLine(ex.Message); foreach (var key in ex.Data.Keys) { sb.AppendFormat("{0}: {1}", key, ex.Data[key]); sb.AppendLine(); } myValidationReport.FailedToLocateDocument(source, sb.ToString()); return; } // The new document is automatically given to the selected FigureDescriptor ViewModel. // The MarkupBehavior gets automatically applied var parser = DocumentProcessingFactory.CreateParser(Browser.Document, figureDescriptor); try { var table = parser.ExtractTable(); if (table.Rows.Count == 0) { myValidationReport.FailedToParseDocument(figureDescriptor, "Unknown reason"); } else { myValidationReport.ParsingSucceeded(figureDescriptor); } } catch (Exception ex) { var sb = new StringBuilder(); sb.AppendLine(ex.Message); foreach (var key in ex.Data.Keys) { sb.AppendFormat("{0}: {1}", key, ex.Data[key]); sb.AppendLine(); } myValidationReport.FailedToParseDocument(figureDescriptor, sb.ToString()); } }
public void GetCellAndConvertToEntity() { var doc = LoadDocument <IHtmlDocument>("Html", "ariva.prices.DE0007664039.html"); var dataSource = new DataSource(); dataSource.Vendor = "Ariva"; dataSource.Name = "Prices"; dataSource.Quality = 1; var descriptor = new PathCellDescriptor(); descriptor.Figure = "Price"; descriptor.Path = @"/BODY[0]/DIV[0]/DIV[1]/DIV[6]/DIV[1]/DIV[0]/DIV[0]/TABLE[0]/TBODY[0]"; descriptor.Column = new StringContainsLocator { HeaderSeriesPosition = 0, Pattern = "Letzter" }; descriptor.Row = new StringContainsLocator { HeaderSeriesPosition = 0, Pattern = "Frankfurt" }; descriptor.ValueFormat = new FormatColumn("value", typeof(double), "00,00") { ExtractionPattern = new Regex(@"([0-9,\.]+)") }; descriptor.Currency = "EUR"; var parser = DocumentProcessingFactory.CreateParser(doc, descriptor); var table = parser.ExtractTable(); Assert.That(table.Rows.Count, Is.EqualTo(1)); var value = table.Rows[0][0]; Assert.That(value, Is.EqualTo(134.356d)); var converter = DocumentProcessingFactory.CreateConverter(descriptor, dataSource, new[] { new Currency { Symbol = "EUR" } }); var series = converter.Convert(table, new Stock { Isin = "DE0007664039" }); var price = (Price)series.Single(); Assert.That(price.Stock.Isin, Is.EqualTo("DE0007664039")); Assert.That(((DayPeriod)price.Period).Day.Date, Is.EqualTo(DateTime.Today)); Assert.That(price.Source, Does.Contain("ariva").IgnoreCase.And.Contains("price").IgnoreCase); Assert.That(price.Timestamp.Date, Is.EqualTo(DateTime.Today)); Assert.That(price.Value, Is.EqualTo(134.356d)); Assert.That(price.Currency.Symbol, Is.EqualTo("EUR")); }
public void GetSeries() { var descriptor = new SeparatorSeriesDescriptor(); descriptor.Figure = "EarningsPerShare"; descriptor.Separator = ";"; descriptor.Orientation = SeriesOrientation.Row; descriptor.ValuesLocator = new StringContainsLocator { HeaderSeriesPosition = 0, Pattern = "EPS" }; descriptor.TimesLocator = new AbsolutePositionLocator { HeaderSeriesPosition = 0, SeriesPosition = 0 }; descriptor.Excludes.AddRange(0, 1); descriptor.ValueFormat = new FormatColumn("value", typeof(double), "000,00"); descriptor.TimeFormat = new FormatColumn("year", typeof(int), "000"); var doc = LoadDocument <TextDocument>("Csv", "DE0005151005.csv"); var parser = DocumentProcessingFactory.CreateParser(doc, descriptor); var table = parser.ExtractTable(); Assert.AreEqual(10, table.Rows.Count); Assert.AreEqual(3.2d, ( double )table.Rows[0]["value"], 0.000001d); Assert.AreEqual(3.4d, ( double )table.Rows[1]["value"], 0.000001d); Assert.AreEqual(3.4d, ( double )table.Rows[2]["value"], 0.000001d); Assert.AreEqual(3.3d, ( double )table.Rows[3]["value"], 0.000001d); Assert.AreEqual(2.9d, ( double )table.Rows[4]["value"], 0.000001d); Assert.AreEqual(2.8d, ( double )table.Rows[5]["value"], 0.000001d); Assert.AreEqual(3.0d, ( double )table.Rows[6]["value"], 0.000001d); Assert.AreEqual(3.0d, ( double )table.Rows[7]["value"], 0.000001d); Assert.AreEqual(3.1d, ( double )table.Rows[8]["value"], 0.000001d); Assert.AreEqual(3.5d, ( double )table.Rows[9]["value"], 0.000001d); Assert.AreEqual(1997, ( int )table.Rows[0]["year"]); Assert.AreEqual(1998, ( int )table.Rows[1]["year"]); Assert.AreEqual(1999, ( int )table.Rows[2]["year"]); Assert.AreEqual(2000, ( int )table.Rows[3]["year"]); Assert.AreEqual(2001, ( int )table.Rows[4]["year"]); Assert.AreEqual(2002, ( int )table.Rows[5]["year"]); Assert.AreEqual(2003, ( int )table.Rows[6]["year"]); Assert.AreEqual(2004, ( int )table.Rows[7]["year"]); Assert.AreEqual(2005, ( int )table.Rows[8]["year"]); Assert.AreEqual(2006, ( int )table.Rows[9]["year"]); }
public void GetSingleValue() { var doc = LoadDocument <IHtmlDocument>("Html", "ariva.overview.US0138171014.html"); var descriptor = new PathSingleValueDescriptor(); descriptor.Path = @"/BODY[0]/DIV[4]/DIV[0]/DIV[3]/DIV[0]"; descriptor.ValueFormat = new ValueFormat(typeof(int), "00000000") { ExtractionPattern = new Regex(@"WKN: (\d+)") }; var parser = DocumentProcessingFactory.CreateParser(doc, descriptor); var table = parser.ExtractTable(); Assert.AreEqual(1, table.Rows.Count); Assert.AreEqual(850206, table.Rows[0][0]); }
public void GetTable() { var descriptor = new CsvDescriptor(); descriptor.Figure = "HistoricalPrices"; descriptor.Separator = ";"; descriptor.SkipColumns.Add(1); descriptor.SkipRows.Add(0); descriptor.Columns.Add(new FormatColumn("Date", typeof(DateTime))); descriptor.Columns.Add(new FormatColumn("High", typeof(double), "000,000.00")); descriptor.Columns.Add(new FormatColumn("Low", typeof(double), "000,000.00")); descriptor.Columns.Add(new FormatColumn("Open", typeof(double), "000,000.00")); descriptor.Columns.Add(new FormatColumn("Close", typeof(double), "000,000.00")); var doc = LoadDocument <TextDocument>("Csv", "Prices.csv"); var parser = DocumentProcessingFactory.CreateParser(doc, descriptor); var table = parser.ExtractTable(); Assert.That(table.Rows.Count, Is.EqualTo(3)); Assert.That(table.Rows[0]["Date"], Is.EqualTo(DateTime.Parse("01.01.2016"))); }
private void TryFetch() { if (myDocumentBrowser == null) { return; } myData.Clear(); var descriptors = mySelectedSource.Figures .Cast <IPathDescriptor>() .Where(f => f.Figure == myFigureType.Name); foreach (var descriptor in descriptors) { try { ILocatorMacroResolver resolver = new StockMacroResolver(Stock); if (CustomResolverCreator != null) { resolver = CustomResolverCreator(resolver); } myDocumentBrowser.Navigate(DocumentType.Html, mySelectedSource.Location, resolver); var htmlDocument = ( IHtmlDocument )myDocumentBrowser.Document; // Mark the part of the document described by the FigureDescriptor to have a preview var cell = ( HtmlElementAdapter )MarkupFactory.FindElementByDescriptor(htmlDocument, descriptor); if (cell != null) { cell.Element.ScrollIntoView(false); } var marker = MarkupFactory.CreateMarker(descriptor); marker.Mark(cell); // already extract data here to check for format issues etc var parser = DocumentProcessingFactory.CreateParser(htmlDocument, descriptor); var table = parser.ExtractTable(); var converter = DocumentProcessingFactory.CreateConverter(descriptor, mySelectedSource, CurrenciesLut.Currencies); var series = converter.Convert(table, Stock); myData.AddRange(series); // we found s.th. with this format // -> skip alternative formats break; } catch (Exception ex) { ex.Data["Figure"] = myFigureType.Name; ex.Data["DataSource.Vendor"] = mySelectedSource.Vendor; ex.Data["DataSource.Name"] = mySelectedSource.Name; ex.Data["Location"] = mySelectedSource.Location.ToString(); ex.Data["FigureDescriptor"] = descriptor.GetType().FullName; if (ThrowOnError) { throw new Exception("Failed to extract data from datasource", ex); } else { myLogger.Error(ex, "Failed to fetch '{0}' from site {1}", myFigureType.Name, mySelectedSource.Name); } } } }
public void GetSeriesAndConvertToEntities() { var doc = LoadDocument <IHtmlDocument>("Html", "ariva.fundamentals.DE0005190003.html"); var dataSource = new DataSource(); dataSource.Vendor = "Ariva"; dataSource.Name = "Fundamentals"; dataSource.Quality = 1; var descriptor = new PathSeriesDescriptor(); descriptor.Figure = "Dividend"; descriptor.Path = @"/BODY[0]/DIV[5]/DIV[0]/DIV[1]/TABLE[7]/TBODY[0]"; descriptor.Orientation = SeriesOrientation.Row; descriptor.ValuesLocator = new StringContainsLocator { HeaderSeriesPosition = 0, Pattern = "Dividendenausschüttung" }; descriptor.ValueFormat = new FormatColumn("value", typeof(double), "00,00") { InMillions = true }; descriptor.TimesLocator = new AbsolutePositionLocator { HeaderSeriesPosition = 0, SeriesPosition = 1 }; descriptor.TimeFormat = new FormatColumn("year", typeof(int), "00000000"); descriptor.Excludes.Add(0); var parser = DocumentProcessingFactory.CreateParser(doc, descriptor); var table = parser.ExtractTable(); Assert.AreEqual(6, table.Rows.Count); Assert.AreEqual(350000000d, table.Rows[0][0]); Assert.AreEqual(351000000d, table.Rows[1][0]); Assert.AreEqual(392000000d, table.Rows[2][0]); Assert.AreEqual(419000000d, table.Rows[3][0]); Assert.AreEqual(424000000d, table.Rows[4][0]); Assert.AreEqual(458000000d, table.Rows[5][0]); Assert.AreEqual(2001, table.Rows[0][1]); Assert.AreEqual(2002, table.Rows[1][1]); Assert.AreEqual(2003, table.Rows[2][1]); Assert.AreEqual(2004, table.Rows[3][1]); Assert.AreEqual(2005, table.Rows[4][1]); Assert.AreEqual(2006, table.Rows[5][1]); var stock = new Stock { Isin = "DE0007664039" }; stock.Company = new Company { Name = "Volkswagen" }; stock.Company.Stocks.Add(stock); var converter = DocumentProcessingFactory.CreateConverter(descriptor, dataSource, Enumerable.Empty <Currency>()); var series = converter.Convert(table, stock).Cast <Dividend>().ToList(); foreach (var dividend in series) { Assert.That(dividend.Company.Stocks.First().Isin, Is.EqualTo("DE0007664039")); Assert.That(dividend.Period, Is.InstanceOf <YearPeriod>()); Assert.That(dividend.Source, Does.Contain("ariva").IgnoreCase.And.Contains("fundamentals").IgnoreCase); Assert.That(dividend.Timestamp.Date, Is.EqualTo(DateTime.Today)); // Descriptor does not provide static currency Assert.That(dividend.Currency, Is.Null); } Assert.That(series[0].Period, Is.EqualTo(new YearPeriod(2001))); Assert.That(series[0].Value, Is.EqualTo(350000000d)); Assert.That(series[1].Period, Is.EqualTo(new YearPeriod(2002))); Assert.That(series[1].Value, Is.EqualTo(351000000d)); Assert.That(series[2].Period, Is.EqualTo(new YearPeriod(2003))); Assert.That(series[2].Value, Is.EqualTo(392000000d)); Assert.That(series[3].Period, Is.EqualTo(new YearPeriod(2004))); Assert.That(series[3].Value, Is.EqualTo(419000000d)); Assert.That(series[4].Period, Is.EqualTo(new YearPeriod(2005))); Assert.That(series[4].Value, Is.EqualTo(424000000d)); Assert.That(series[5].Period, Is.EqualTo(new YearPeriod(2006))); Assert.That(series[5].Value, Is.EqualTo(458000000d)); }