public HtmlParser(IHtmlDocumentFactory htmlDocumentFactory, IMetaKeywordsTagParser metaKeywordsTagParser, IBodyParser bodyParser) { _htmlDocumentFactory = htmlDocumentFactory; _metaKeywordsTagParser = metaKeywordsTagParser; _bodyParser = bodyParser; }
private static void ConvertContentTo(IHtmlNode node, TextWriter outText, IHtmlDocumentFactory documentFactory) { foreach (var subnode in node.ChildNodes) { ConvertHtmlToText(subnode, outText, documentFactory); } }
//CONSTRUCTOR public WordReferenceScraper(IHtmlDocumentFactory htmlDocumentFactory, IWordReferenceURLBuilder urlBuilder, ICaretAnalyser caretAnalyser) { this.htmlDocumentFactory = htmlDocumentFactory; this.wordReferenceURLBuilder = urlBuilder; this.caretAnalyser = caretAnalyser; }
protected BasicHtmlParser(IHtmlDocumentFactory htmlDocumentFactory) { if (htmlDocumentFactory == null) { throw new ArgumentNullException(nameof(htmlDocumentFactory)); } this.HtmlDocumentFactory = htmlDocumentFactory; }
public HtmlParser(IHtmlDocumentFactory htmlDocumentFactory, IHttpEncoder httpEncoder) : base(htmlDocumentFactory) { if (httpEncoder == null) { throw new ArgumentNullException(nameof(httpEncoder)); } this.HttpEncoder = httpEncoder; }
/// <summary> /// Действие по инициализации. /// </summary> /// <param name="moduleProvider">Провайдер модулей.</param> protected override async ValueTask <Nothing> OnInitialize(IModuleProvider moduleProvider) { await base.OnInitialize(moduleProvider); _htmlDocumentFactory = await moduleProvider.QueryModuleAsync <IHtmlDocumentFactory>() ?? throw new ModuleNotFoundException(typeof(IHtmlDocumentFactory)); _youtubeIdService = await moduleProvider.QueryModuleAsync <IYoutubeIdService>() ?? throw new ModuleNotFoundException(typeof(IYoutubeIdService)); _linkParser = await moduleProvider.QueryEngineCapabilityAsync <IEngineLinkParser>(MakabaConstants.MakabaEngineId) ?? throw new ModuleNotFoundException(typeof(IEngineLinkParser)); return(Nothing.Value); }
/// <summary> /// Конвертировать HTML в текст. /// </summary> /// <param name="node"></param> /// <param name="outText"></param> /// <param name="documentFactory">Фабрика документов.</param> public static void ConvertHtmlToText(IHtmlNode node, TextWriter outText, IHtmlDocumentFactory documentFactory) { if (documentFactory == null) { throw new ArgumentNullException(nameof(documentFactory)); } switch (node) { case IHtmlCommentNode _: // don't output comments break; case IHtmlTextNode tn: // script and style must not be output string parentName = tn.ParentNode.Name; if ((parentName == "script") || (parentName == "style")) { break; } // get text var html = tn.Text; // is it in fact a special closing node output as text? if (documentFactory.IsOverlappedClosingElement(html)) { break; } // check the text is meaningful and not a bunch of whitespaces if (html.Trim().Length > 0) { outText.Write(documentFactory.DeEntitize(html)); } break; default: switch (node.Name) { case "p": // treat paragraphs as crlf outText.WriteLine(); break; } if (node.HasChildNodes) { ConvertContentTo(node, outText, documentFactory); } break; } }
/// <summary> /// Действие по инициализации. /// </summary> /// <param name="moduleProvider">Провайдер модулей.</param> protected override async ValueTask <Nothing> OnInitialize(IModuleProvider moduleProvider) { await base.OnInitialize(moduleProvider); _htmlParser = await moduleProvider.QueryEngineCapabilityAsync <IHtmlParser>(MakabaConstants.MakabaEngineId) ?? throw new ModuleNotFoundException(typeof(IHtmlParser)); _htmlDocumentFactory = await moduleProvider.QueryModuleAsync <IHtmlDocumentFactory>() ?? throw new ModuleNotFoundException(typeof(IHtmlDocumentFactory)); _postsParser = await moduleProvider.FindNetworkDtoParserAsync <BoardPost2WithParentLink, IBoardPost>() ?? throw new ModuleNotFoundException(typeof(INetworkDtoParser <BoardPost2WithParentLink, IBoardPost>)); _threadPreviewParser = await moduleProvider.FindNetworkDtoParserAsync <ThreadPreviewData, IThreadPreviewPostCollection>() ?? throw new ModuleNotFoundException(typeof(INetworkDtoParser <ThreadPreviewData, IThreadPreviewPostCollection>)); return(Nothing.Value); }
public DefaultHtmlTransformingInitializer(IHtmlInvestigator htmlInvestigator, IHtmlDocumentFactory htmlDocumentFactory, IHtmlTransformingContext htmlTransformingContext) { if(htmlInvestigator == null) throw new ArgumentNullException("htmlInvestigator"); if(htmlDocumentFactory == null) throw new ArgumentNullException("htmlDocumentFactory"); if(htmlTransformingContext == null) throw new ArgumentNullException("htmlTransformingContext"); this._htmlDocumentFactory = htmlDocumentFactory; this._htmlInvestigator = htmlInvestigator; this._htmlTransformingContext = htmlTransformingContext; }
public TagBuilderParser(IHtmlDocumentFactory htmlDocumentFactory) : base(htmlDocumentFactory) { }
public ResultParser(IHtmlDocumentFactory htmlDocumentFactory) { _htmlDocumentFactory = htmlDocumentFactory; }