/// <summary> /// Initializes a new instance of the <see cref="AppServices"/> class /// </summary> public AppServices() { var defaultStyleLookup = DefaultStyleLookup.CreateForInternetExplorer(); bool validateSerializationResult = false; _htmlDocumentSerializer = new HtmlDocumentSerializer(defaultStyleLookup, validateSerializationResult); _zoneTreeSerializer = new ZoneTreeSerializer(defaultStyleLookup); _elementClassifier = new PreZoningClassification(HtmlElementType.SignificantBlock, HtmlElementType.SignificantInline, HtmlElementType.SignificantLinebreak, HtmlElementType.SignificantInvisible, HtmlElementType.BreakDown, HtmlElementType.Aname, HtmlElementType.Hidden); _zoneTreeBuilder = ZoneTreeBuilder.Create(HtmlElementType.SignificantBlock, HtmlElementType.SignificantInline, HtmlElementType.SignificantLinebreak, HtmlElementType.SignificantInvisible, HtmlElementType.BreakDown, HtmlElementType.Aname, HtmlElementType.Hidden); _columnTreeBuilder = ColumnTreeBuilder.Create(); var naturalLanguageProcessor = new OpenNaturalLanguageProcessor(); _layoutAnalysisArticleContentLabeler = new ArticleContentLabeler(naturalLanguageProcessor, ZoneLabel.Paragraph, ZoneLabel.ArticleContent, ZoneFeature.Common_Tokens); _articleTagArticleContentLabeler = new SemanticTagArticleContentLabeler(Html.Tags.ARTICLE, ZoneLabel.ArticleContent); _mainTagArticleContentLabeler = new SemanticTagArticleContentLabeler(Html.Tags.MAIN, ZoneLabel.ArticleContent); }
private static Algorithm <ZoneTree> CreateLabeler(string algName) { Algorithm <ZoneTree> alg; switch (algName) { case LAYOUT_ANALYSIS_ALG: alg = new ZoneTreeArticleContentLabeler(_naturalLanguageProcessor, ZoneLabel.Paragraph, ARTICLE_CONTENT_LABEL, TOKENS_FEATURE_NAME); break; case ARTICLE_SEMANTIC_TAG_ALG: alg = new SemanticTagArticleContentLabeler(Html.Tags.ARTICLE, ARTICLE_CONTENT_LABEL); break; case MAIN_SEMANTIC_TAG_ALG: alg = new SemanticTagArticleContentLabeler(Html.Tags.MAIN, ARTICLE_CONTENT_LABEL); break; default: throw new ArgumentException("Algorithm name not recognized", algName); } return(alg); }