Example #1
0
 public HtmlParser(IHtmlDocumentFactory htmlDocumentFactory, IMetaKeywordsTagParser metaKeywordsTagParser,
                   IBodyParser bodyParser)
 {
     _htmlDocumentFactory   = htmlDocumentFactory;
     _metaKeywordsTagParser = metaKeywordsTagParser;
     _bodyParser            = bodyParser;
 }
Example #2
0
 private static void ConvertContentTo(IHtmlNode node, TextWriter outText, IHtmlDocumentFactory documentFactory)
 {
     foreach (var subnode in node.ChildNodes)
     {
         ConvertHtmlToText(subnode, outText, documentFactory);
     }
 }
Example #3
0
        //CONSTRUCTOR
        public WordReferenceScraper(IHtmlDocumentFactory htmlDocumentFactory, IWordReferenceURLBuilder urlBuilder, ICaretAnalyser caretAnalyser)
        {
            this.htmlDocumentFactory = htmlDocumentFactory;

            this.wordReferenceURLBuilder = urlBuilder;

            this.caretAnalyser = caretAnalyser;
        }
Example #4
0
        protected BasicHtmlParser(IHtmlDocumentFactory htmlDocumentFactory)
        {
            if (htmlDocumentFactory == null)
            {
                throw new ArgumentNullException(nameof(htmlDocumentFactory));
            }

            this.HtmlDocumentFactory = htmlDocumentFactory;
        }
Example #5
0
        public HtmlParser(IHtmlDocumentFactory htmlDocumentFactory, IHttpEncoder httpEncoder) : base(htmlDocumentFactory)
        {
            if (httpEncoder == null)
            {
                throw new ArgumentNullException(nameof(httpEncoder));
            }

            this.HttpEncoder = httpEncoder;
        }
Example #6
0
        /// <summary>
        /// Действие по инициализации.
        /// </summary>
        /// <param name="moduleProvider">Провайдер модулей.</param>
        protected override async ValueTask <Nothing> OnInitialize(IModuleProvider moduleProvider)
        {
            await base.OnInitialize(moduleProvider);

            _htmlDocumentFactory = await moduleProvider.QueryModuleAsync <IHtmlDocumentFactory>() ?? throw new ModuleNotFoundException(typeof(IHtmlDocumentFactory));

            _youtubeIdService = await moduleProvider.QueryModuleAsync <IYoutubeIdService>() ?? throw new ModuleNotFoundException(typeof(IYoutubeIdService));

            _linkParser = await moduleProvider.QueryEngineCapabilityAsync <IEngineLinkParser>(MakabaConstants.MakabaEngineId) ?? throw new ModuleNotFoundException(typeof(IEngineLinkParser));

            return(Nothing.Value);
        }
Example #7
0
        /// <summary>
        /// Конвертировать HTML в текст.
        /// </summary>
        /// <param name="node"></param>
        /// <param name="outText"></param>
        /// <param name="documentFactory">Фабрика документов.</param>
        public static void ConvertHtmlToText(IHtmlNode node, TextWriter outText, IHtmlDocumentFactory documentFactory)
        {
            if (documentFactory == null)
            {
                throw new ArgumentNullException(nameof(documentFactory));
            }
            switch (node)
            {
            case IHtmlCommentNode _:
                // don't output comments
                break;

            case IHtmlTextNode tn:
                // script and style must not be output
                string parentName = tn.ParentNode.Name;
                if ((parentName == "script") || (parentName == "style"))
                {
                    break;
                }

                // get text
                var html = tn.Text;

                // is it in fact a special closing node output as text?
                if (documentFactory.IsOverlappedClosingElement(html))
                {
                    break;
                }

                // check the text is meaningful and not a bunch of whitespaces
                if (html.Trim().Length > 0)
                {
                    outText.Write(documentFactory.DeEntitize(html));
                }
                break;

            default:
                switch (node.Name)
                {
                case "p":
                    // treat paragraphs as crlf
                    outText.WriteLine();
                    break;
                }

                if (node.HasChildNodes)
                {
                    ConvertContentTo(node, outText, documentFactory);
                }
                break;
            }
        }
Example #8
0
        /// <summary>
        /// Действие по инициализации.
        /// </summary>
        /// <param name="moduleProvider">Провайдер модулей.</param>
        protected override async ValueTask <Nothing> OnInitialize(IModuleProvider moduleProvider)
        {
            await base.OnInitialize(moduleProvider);

            _htmlParser = await moduleProvider.QueryEngineCapabilityAsync <IHtmlParser>(MakabaConstants.MakabaEngineId) ?? throw new ModuleNotFoundException(typeof(IHtmlParser));

            _htmlDocumentFactory = await moduleProvider.QueryModuleAsync <IHtmlDocumentFactory>() ?? throw new ModuleNotFoundException(typeof(IHtmlDocumentFactory));

            _postsParser = await moduleProvider.FindNetworkDtoParserAsync <BoardPost2WithParentLink, IBoardPost>() ?? throw new ModuleNotFoundException(typeof(INetworkDtoParser <BoardPost2WithParentLink, IBoardPost>));

            _threadPreviewParser = await moduleProvider.FindNetworkDtoParserAsync <ThreadPreviewData, IThreadPreviewPostCollection>() ?? throw new ModuleNotFoundException(typeof(INetworkDtoParser <ThreadPreviewData, IThreadPreviewPostCollection>));

            return(Nothing.Value);
        }
        public DefaultHtmlTransformingInitializer(IHtmlInvestigator htmlInvestigator, IHtmlDocumentFactory htmlDocumentFactory, IHtmlTransformingContext htmlTransformingContext)
        {
            if(htmlInvestigator == null)
                throw new ArgumentNullException("htmlInvestigator");

            if(htmlDocumentFactory == null)
                throw new ArgumentNullException("htmlDocumentFactory");

            if(htmlTransformingContext == null)
                throw new ArgumentNullException("htmlTransformingContext");

            this._htmlDocumentFactory = htmlDocumentFactory;
            this._htmlInvestigator = htmlInvestigator;
            this._htmlTransformingContext = htmlTransformingContext;
        }
 public TagBuilderParser(IHtmlDocumentFactory htmlDocumentFactory) : base(htmlDocumentFactory)
 {
 }
Example #11
0
 public ResultParser(IHtmlDocumentFactory htmlDocumentFactory)
 {
     _htmlDocumentFactory = htmlDocumentFactory;
 }