Esempio n. 1
0
        //  public String XPathSelector { get; set; } = "";

        public IHtmlExtractor GetExtractor()
        {
            IHtmlExtractor extractor = ExtractorTools.HtmlExtractorProvider.GetInstance(ExtractorName);

            extractor.DeployCustomizationSettings(ExtractorCustomizationSettings);
            return(extractor);
        }
Esempio n. 2
0
 public EtlController(IHtmlExtractor htmlExtractor, IHtmlTransformer htmlTransformer, IHtmlLoader htmlLoader, ReviewsDbContext reviewsDbContext)
 {
     _htmlExtractor    = htmlExtractor;
     _htmlTransformer  = htmlTransformer;
     _htmlLoader       = htmlLoader;
     _reviewsDbContext = reviewsDbContext;
 }
 public HtmlScraper(IHtmlExtractor <T> htmlExtractor)
 {
     _htmlExtractor = htmlExtractor;
 }
 public void TestInitialize()
 {
     _sut = new HtmlExtractor();
 }
Esempio n. 5
0
 public TextProcessor(ILinkExtractor linkExtractor, IHtmlExtractor htmlExtractor, IMetaExtractor metaExtractor)
 {
     _linkExtractor = linkExtractor;
     _htmlExtractor = htmlExtractor;
     _metaExtractor = metaExtractor;
 }
        public SourceTableAggregation(List <SourceTable> sources, IHtmlExtractor extractor, TableExtractionTask task)
        {
            if (sources.isNullOrEmpty())
            {
                return;
            }

            aggregatedDescriptions = sources.Select(x => extractor.sourceContentAnalysis.GetDescription(x)).CompileSourceDescription();
            aggregatedAsRows       = sources.Merge(false, true);
            aggregatedAsColumns    = sources.Merge(true, true);

            if (aggregatedDescriptions.sourceHeight.Minimum > 1)
            {
                if (aggregatedDescriptions.sourceHeight.Range == 0)
                {
                    Features.Add(SourceTableCase.stableHeight);
                }
                else
                {
                    Features.Add(SourceTableCase.variableHeight);
                }

                if (aggregatedAsRows.Height == 1)
                {
                    Features.Add(SourceTableCase.staticContent | SourceTableCase.vertically);
                    Features.Add(SourceTableCase.horizontalOrientation);
                }
            }

            if (aggregatedDescriptions.sourceWidth.Minimum > 1)
            {
                if (aggregatedDescriptions.sourceWidth.Range == 0)
                {
                    Features.Add(SourceTableCase.stableWidth);
                }
                else
                {
                    Features.Add(SourceTableCase.variableWidth);
                }


                if (aggregatedAsRows.Width == 1)
                {
                    Features.Add(SourceTableCase.staticContent | SourceTableCase.horizontally);
                    Features.Add(SourceTableCase.verticalOrientation);
                }
            }

            if (Features.ContainsAll(SourceTableCase.stableWidth, SourceTableCase.variableHeight))
            {
                Features.Add(SourceTableCase.verticalOrientation);
            }
            if (Features.ContainsAll(SourceTableCase.variableWidth, SourceTableCase.stableHeight))
            {
                Features.Add(SourceTableCase.horizontalOrientation);
            }

            if (!Features.Any(x => x.HasFlag(SourceTableCase.orientation)))
            {
                //if (Features.Any(x => x.HasFlag(SourceTableCase.variableHeight)) && aggregatedAsRows.Height == 1)
                //{

                //    Features.Add(SourceTableCase.horizontalOrientation);
                //}

                if (aggregatedDescriptions.sourceHeight.Minimum > 1 && aggregatedAsRows.Height == 1)
                {
                    Features.Add(SourceTableCase.horizontalOrientation);
                }

                if (aggregatedDescriptions.sourceWidth.Minimum > 1 && aggregatedAsRows.Width == 1)
                {
                    Features.Add(SourceTableCase.verticalOrientation);
                }
            }

            name = task.name; // taskname;
            Task = task;
        }
Esempio n. 7
0
 public HtmlWrapper(Uri url, string html, IHtmlExtractor htmlExtractor)
 {
     _htmlExtractor = htmlExtractor;
     Url            = url;
     Html           = html;
 }
Esempio n. 8
0
 public TestHtmlExtractor()
 {
     _htmlExtractor = new HtmlExtractor(folder, file);
 }
 private HtmlWrapper CreateInstance(IHtmlExtractor extractor) => new HtmlWrapper(_uri, _html, extractor);
 public HtmlParser(IHtmlExtractor htmlExtractor)
 {
     _htmlExtractor = htmlExtractor ?? throw new ArgumentNullException(nameof(htmlExtractor));
 }