コード例 #1
0
        /// <summary>
        /// Initializes a new instance of the <see cref="AppServices"/> class
        /// </summary>
        public AppServices()
        {
            var  defaultStyleLookup          = DefaultStyleLookup.CreateForInternetExplorer();
            bool validateSerializationResult = false;

            _htmlDocumentSerializer = new HtmlDocumentSerializer(defaultStyleLookup, validateSerializationResult);
            _zoneTreeSerializer     = new ZoneTreeSerializer(defaultStyleLookup);

            _elementClassifier = new PreZoningClassification(HtmlElementType.SignificantBlock, HtmlElementType.SignificantInline, HtmlElementType.SignificantLinebreak, HtmlElementType.SignificantInvisible, HtmlElementType.BreakDown, HtmlElementType.Aname, HtmlElementType.Hidden);
            _zoneTreeBuilder   = ZoneTreeBuilder.Create(HtmlElementType.SignificantBlock, HtmlElementType.SignificantInline, HtmlElementType.SignificantLinebreak, HtmlElementType.SignificantInvisible, HtmlElementType.BreakDown, HtmlElementType.Aname, HtmlElementType.Hidden);
            _columnTreeBuilder = ColumnTreeBuilder.Create();

            var naturalLanguageProcessor = new OpenNaturalLanguageProcessor();

            _layoutAnalysisArticleContentLabeler = new ArticleContentLabeler(naturalLanguageProcessor, ZoneLabel.Paragraph, ZoneLabel.ArticleContent, ZoneFeature.Common_Tokens);
            _articleTagArticleContentLabeler     = new SemanticTagArticleContentLabeler(Html.Tags.ARTICLE, ZoneLabel.ArticleContent);
            _mainTagArticleContentLabeler        = new SemanticTagArticleContentLabeler(Html.Tags.MAIN, ZoneLabel.ArticleContent);
        }
コード例 #2
0
ファイル: Program.cs プロジェクト: raear/html-zoning
        private static Algorithm <ZoneTree> CreateLabeler(string algName)
        {
            Algorithm <ZoneTree> alg;

            switch (algName)
            {
            case LAYOUT_ANALYSIS_ALG:
                alg = new ZoneTreeArticleContentLabeler(_naturalLanguageProcessor, ZoneLabel.Paragraph, ARTICLE_CONTENT_LABEL, TOKENS_FEATURE_NAME);
                break;

            case ARTICLE_SEMANTIC_TAG_ALG:
                alg = new SemanticTagArticleContentLabeler(Html.Tags.ARTICLE, ARTICLE_CONTENT_LABEL);
                break;

            case MAIN_SEMANTIC_TAG_ALG:
                alg = new SemanticTagArticleContentLabeler(Html.Tags.MAIN, ARTICLE_CONTENT_LABEL);
                break;

            default:
                throw new ArgumentException("Algorithm name not recognized", algName);
            }
            return(alg);
        }