Exemple #1
0
        public LocatorValidationResult Fetch(ParameterizedDatumLocator datumLocator)
        {
            Navigation navigation = null;
            IDocument  doc        = null;
            IFormat    format     = null;

            try
            {
                var fetchPolicy = new EvaluatorPolicy(datumLocator.Parameters);
                navigation = fetchPolicy.GetNavigation(datumLocator.Site);

                doc = myWebScrapSC.GetDocument(navigation);
                if (doc == null)
                {
                    return(new LocatorValidationResult(null, datumLocator, navigation, null, null));
                }

                format = fetchPolicy.GetFormat(datumLocator.Site);
                var result = fetchPolicy.ApplyPreprocessing(doc.ExtractTable(format));

                var resultPolicy = new FirstNonNullPolicy();
                resultPolicy.Validate(datumLocator.Site, result);

                return(new LocatorValidationResult(resultPolicy.ResultTable, datumLocator, navigation, doc.Location, format));
            }
            catch (Exception ex)
            {
                var result = new LocatorValidationResult(null, datumLocator, navigation, doc != null ? doc.Location : null, format);
                result.ErrorMessage = ex.Message;
                return(result);
            }
        }
Exemple #2
0
        protected IHtmlDocument LoadDocument(string name)
        {
            string file = Path.Combine(TestDataRoot, "Recognition", "Html");

            file = Path.Combine(file, name);

            var navi = new Navigation(DocumentType.Html, new NavigatorUrl(UriType.Request, file));
            var doc  = (HtmlDocumentHandle)myBrowser.GetDocument(navi);

            return(doc.Content);
        }
Exemple #3
0
        public void WpknFromAriva()
        {
            var inputFile = OS.CombinePaths(TestDataRoot, "Recognition", "Core", "ariva.overview.US0138171014.html");
            var doc       = myWebScrapSC.GetDocument(new Navigation(DocumentType.Html, inputFile));

            var format = new PathSingleValueFormat("Ariva.Wpkn");

            format.Path        = @"/BODY[0]/DIV[4]/DIV[0]/DIV[3]/DIV[0]";
            format.ValueFormat = new ValueFormat(typeof(int), "00000000", new Regex(@"WKN: (\d+)"));

            var table = doc.ExtractTable(format);

            Assert.AreEqual(1, table.Rows.Count);

            Assert.AreEqual(850206, table.Rows[0][0]);
        }
Exemple #4
0
        private bool Fetch(Site site, string datum)
        {
            Navigation modifiedNavigation = null;
            IFormat    modifiedFormat     = null;

            try
            {
                modifiedNavigation = FetchPolicy.GetNavigation(site);

                var doc = myWebScrapSC.GetDocument(modifiedNavigation);
                if (doc == null)
                {
                    throw new Exception("Failed to navigate to the document");
                }

                modifiedFormat = FetchPolicy.GetFormat(site);
                var result = FetchPolicy.ApplyPreprocessing(doc.ExtractTable(modifiedFormat));

                // valid result? stop fetching?
                if (ResultPolicy.Validate(site, result))
                {
                    return(true);
                }

                throw new Exception("Result not valid");
            }
            catch (Exception ex)
            {
                ex.Data["Datum"]              = datum;
                ex.Data["SiteName"]           = site.Name;
                ex.Data["OriginalFormat"]     = site.Format;
                ex.Data["OriginalNavigation"] = site.Navigation;
                ex.Data["ModifiedFormat"]     = modifiedFormat;
                ex.Data["ModifiedNavigation"] = modifiedNavigation;

                myLogger.Warning(ex, "Failed to fetch '{0}' from site {1}", datum, site.Name);
            }

            return(false);
        }