// public String XPathSelector { get; set; } = ""; public IHtmlExtractor GetExtractor() { IHtmlExtractor extractor = ExtractorTools.HtmlExtractorProvider.GetInstance(ExtractorName); extractor.DeployCustomizationSettings(ExtractorCustomizationSettings); return(extractor); }
public EtlController(IHtmlExtractor htmlExtractor, IHtmlTransformer htmlTransformer, IHtmlLoader htmlLoader, ReviewsDbContext reviewsDbContext) { _htmlExtractor = htmlExtractor; _htmlTransformer = htmlTransformer; _htmlLoader = htmlLoader; _reviewsDbContext = reviewsDbContext; }
public HtmlScraper(IHtmlExtractor <T> htmlExtractor) { _htmlExtractor = htmlExtractor; }
public void TestInitialize() { _sut = new HtmlExtractor(); }
public TextProcessor(ILinkExtractor linkExtractor, IHtmlExtractor htmlExtractor, IMetaExtractor metaExtractor) { _linkExtractor = linkExtractor; _htmlExtractor = htmlExtractor; _metaExtractor = metaExtractor; }
public SourceTableAggregation(List <SourceTable> sources, IHtmlExtractor extractor, TableExtractionTask task) { if (sources.isNullOrEmpty()) { return; } aggregatedDescriptions = sources.Select(x => extractor.sourceContentAnalysis.GetDescription(x)).CompileSourceDescription(); aggregatedAsRows = sources.Merge(false, true); aggregatedAsColumns = sources.Merge(true, true); if (aggregatedDescriptions.sourceHeight.Minimum > 1) { if (aggregatedDescriptions.sourceHeight.Range == 0) { Features.Add(SourceTableCase.stableHeight); } else { Features.Add(SourceTableCase.variableHeight); } if (aggregatedAsRows.Height == 1) { Features.Add(SourceTableCase.staticContent | SourceTableCase.vertically); Features.Add(SourceTableCase.horizontalOrientation); } } if (aggregatedDescriptions.sourceWidth.Minimum > 1) { if (aggregatedDescriptions.sourceWidth.Range == 0) { Features.Add(SourceTableCase.stableWidth); } else { Features.Add(SourceTableCase.variableWidth); } if (aggregatedAsRows.Width == 1) { Features.Add(SourceTableCase.staticContent | SourceTableCase.horizontally); Features.Add(SourceTableCase.verticalOrientation); } } if (Features.ContainsAll(SourceTableCase.stableWidth, SourceTableCase.variableHeight)) { Features.Add(SourceTableCase.verticalOrientation); } if (Features.ContainsAll(SourceTableCase.variableWidth, SourceTableCase.stableHeight)) { Features.Add(SourceTableCase.horizontalOrientation); } if (!Features.Any(x => x.HasFlag(SourceTableCase.orientation))) { //if (Features.Any(x => x.HasFlag(SourceTableCase.variableHeight)) && aggregatedAsRows.Height == 1) //{ // Features.Add(SourceTableCase.horizontalOrientation); //} if (aggregatedDescriptions.sourceHeight.Minimum > 1 && aggregatedAsRows.Height == 1) { Features.Add(SourceTableCase.horizontalOrientation); } if (aggregatedDescriptions.sourceWidth.Minimum > 1 && aggregatedAsRows.Width == 1) { Features.Add(SourceTableCase.verticalOrientation); } } name = task.name; // taskname; Task = task; }
public HtmlWrapper(Uri url, string html, IHtmlExtractor htmlExtractor) { _htmlExtractor = htmlExtractor; Url = url; Html = html; }
public TestHtmlExtractor() { _htmlExtractor = new HtmlExtractor(folder, file); }
private HtmlWrapper CreateInstance(IHtmlExtractor extractor) => new HtmlWrapper(_uri, _html, extractor);
public HtmlParser(IHtmlExtractor htmlExtractor) { _htmlExtractor = htmlExtractor ?? throw new ArgumentNullException(nameof(htmlExtractor)); }