public WebBrowserDocument(string url, IHTMLDocument2 msDoc) { _domModifications = new IDomModification[] { new RemoveOrphanElements(), new RemoveUnknownElements(), new RemoveCommentElements(), new RemoveEmptyFreeTextNodes(), new ReplaceFreeTextNodes(), new ReplacePreservedWhitespace(), new ReplaceFreeTextNodes(), }; _elementClassifier = new PreZoningClassification(HtmlElementType.SignificantBlock, HtmlElementType.SignificantInline, HtmlElementType.SignificantLinebreak, HtmlElementType.SignificantInvisible, HtmlElementType.BreakDown, HtmlElementType.Aname, HtmlElementType.Hidden); _zoneTreeBuilder = ZoneTreeBuilder.Create(HtmlElementType.SignificantBlock, HtmlElementType.SignificantInline, HtmlElementType.SignificantLinebreak, HtmlElementType.SignificantInvisible, HtmlElementType.BreakDown, HtmlElementType.Aname, HtmlElementType.Hidden); var defaultStyleLookup = DefaultStyleLookup.CreateForInternetExplorer(); _documentFactory = new MsHtmlDocumentFactory(defaultStyleLookup); _url = url; _msDoc = msDoc; _highlights = new List <HtmlHighlight>(); _domModificationsApplied = false; }
/// <summary> /// Initializes a new instance of the <see cref="AppServices"/> class /// </summary> public AppServices() { var defaultStyleLookup = DefaultStyleLookup.CreateForInternetExplorer(); bool validateSerializationResult = false; _htmlDocumentSerializer = new HtmlDocumentSerializer(defaultStyleLookup, validateSerializationResult); _zoneTreeSerializer = new ZoneTreeSerializer(defaultStyleLookup); _elementClassifier = new PreZoningClassification(HtmlElementType.SignificantBlock, HtmlElementType.SignificantInline, HtmlElementType.SignificantLinebreak, HtmlElementType.SignificantInvisible, HtmlElementType.BreakDown, HtmlElementType.Aname, HtmlElementType.Hidden); _zoneTreeBuilder = ZoneTreeBuilder.Create(HtmlElementType.SignificantBlock, HtmlElementType.SignificantInline, HtmlElementType.SignificantLinebreak, HtmlElementType.SignificantInvisible, HtmlElementType.BreakDown, HtmlElementType.Aname, HtmlElementType.Hidden); _columnTreeBuilder = ColumnTreeBuilder.Create(); var naturalLanguageProcessor = new OpenNaturalLanguageProcessor(); _layoutAnalysisArticleContentLabeler = new ArticleContentLabeler(naturalLanguageProcessor, ZoneLabel.Paragraph, ZoneLabel.ArticleContent, ZoneFeature.Common_Tokens); _articleTagArticleContentLabeler = new SemanticTagArticleContentLabeler(Html.Tags.ARTICLE, ZoneLabel.ArticleContent); _mainTagArticleContentLabeler = new SemanticTagArticleContentLabeler(Html.Tags.MAIN, ZoneLabel.ArticleContent); }