public LocatorValidationResult Fetch(ParameterizedDatumLocator datumLocator) { Navigation navigation = null; IDocument doc = null; IFormat format = null; try { var fetchPolicy = new EvaluatorPolicy(datumLocator.Parameters); navigation = fetchPolicy.GetNavigation(datumLocator.Site); doc = myWebScrapSC.GetDocument(navigation); if (doc == null) { return(new LocatorValidationResult(null, datumLocator, navigation, null, null)); } format = fetchPolicy.GetFormat(datumLocator.Site); var result = fetchPolicy.ApplyPreprocessing(doc.ExtractTable(format)); var resultPolicy = new FirstNonNullPolicy(); resultPolicy.Validate(datumLocator.Site, result); return(new LocatorValidationResult(resultPolicy.ResultTable, datumLocator, navigation, doc.Location, format)); } catch (Exception ex) { var result = new LocatorValidationResult(null, datumLocator, navigation, doc != null ? doc.Location : null, format); result.ErrorMessage = ex.Message; return(result); } }
protected IHtmlDocument LoadDocument(string name) { string file = Path.Combine(TestDataRoot, "Recognition", "Html"); file = Path.Combine(file, name); var navi = new Navigation(DocumentType.Html, new NavigatorUrl(UriType.Request, file)); var doc = (HtmlDocumentHandle)myBrowser.GetDocument(navi); return(doc.Content); }
public void WpknFromAriva() { var inputFile = OS.CombinePaths(TestDataRoot, "Recognition", "Core", "ariva.overview.US0138171014.html"); var doc = myWebScrapSC.GetDocument(new Navigation(DocumentType.Html, inputFile)); var format = new PathSingleValueFormat("Ariva.Wpkn"); format.Path = @"/BODY[0]/DIV[4]/DIV[0]/DIV[3]/DIV[0]"; format.ValueFormat = new ValueFormat(typeof(int), "00000000", new Regex(@"WKN: (\d+)")); var table = doc.ExtractTable(format); Assert.AreEqual(1, table.Rows.Count); Assert.AreEqual(850206, table.Rows[0][0]); }
private bool Fetch(Site site, string datum) { Navigation modifiedNavigation = null; IFormat modifiedFormat = null; try { modifiedNavigation = FetchPolicy.GetNavigation(site); var doc = myWebScrapSC.GetDocument(modifiedNavigation); if (doc == null) { throw new Exception("Failed to navigate to the document"); } modifiedFormat = FetchPolicy.GetFormat(site); var result = FetchPolicy.ApplyPreprocessing(doc.ExtractTable(modifiedFormat)); // valid result? stop fetching? if (ResultPolicy.Validate(site, result)) { return(true); } throw new Exception("Result not valid"); } catch (Exception ex) { ex.Data["Datum"] = datum; ex.Data["SiteName"] = site.Name; ex.Data["OriginalFormat"] = site.Format; ex.Data["OriginalNavigation"] = site.Navigation; ex.Data["ModifiedFormat"] = modifiedFormat; ex.Data["ModifiedNavigation"] = modifiedNavigation; myLogger.Warning(ex, "Failed to fetch '{0}' from site {1}", datum, site.Name); } return(false); }