/// <summary> /// 创建 HtmlParserResult 对象 /// </summary> /// <param name="parser">得到此解析结果的解析器</param> /// <param name="domProvider">HTML DOM 提供程序</param> /// <param name="provider">解析器提供程序</param> /// <param name="virtualPath">文档的虚拟路径</param> public HtmlParserResult( IHtmlParser parser, IHtmlDomProvider domProvider, IHtmlParserProvider provider, string virtualPath ) { Parser = parser; DomProvider = domProvider; Provider = provider; VirtualPath = virtualPath; }
/// <summary> /// constructor - ensure source statement is passed /// </summary> /// <param name="sourceStatementFile">source statement file name including path</param> /// <param name="parser">the particular tidy-tool</param> /// <param name="statementManagerDateTime">contains the server date to be used in the header</param> public SmileStatementManager(string sourceStatementFile, IHtmlParser parser, IDateTime statementManagerDateTime) { if (string.IsNullOrEmpty(sourceStatementFile)) { throw new ArgumentNullException("sourceStatementFile", @"No source statement file specified"); } if (parser == null) { throw new ArgumentNullException("parser", @"parser is null"); } _sourceStatementFilePath = sourceStatementFile; _smileParser = parser; _statementManagerDateTime = statementManagerDateTime; }
public bool MoveNext(IHtmlParser parser) { var nextPageNode = parser.EvaluateXPath(_nextPageXPath).FirstOrDefault(); var nextPageUrl = ExtractHrefAttributeValue(nextPageNode); if (CanMoveNext(nextPageUrl)) { _visitedUniqueUrls.Add(nextPageUrl); CurrentPageUrl = nextPageUrl; return true; } return false; }
public IHtmlParser Push(bool head, IHtmlParser documentParser) { if (head) { if (injectHead != null && !headInjected) { this.documentParser = documentParser; if (fragmentParser == null) { fragmentParser = new HtmlParser( new ConverterBufferInput(injectHead, progressMonitor), false, (injectionFormat == HeaderFooterFormat.Text), 64, 8, testBoundaryConditions); } else { fragmentParser.Initialize( injectHead, (injectionFormat == HeaderFooterFormat.Text)); } injectingHead = true; return(fragmentParser); } } else { if (injectHead != null && !headInjected) { InternalDebug.Assert(false); headInjected = true; } if (injectTail != null && !tailInjected) { this.documentParser = documentParser; if (fragmentParser == null) { fragmentParser = new HtmlParser( new ConverterBufferInput(injectTail, progressMonitor), false, (injectionFormat == HeaderFooterFormat.Text), 64, 8, testBoundaryConditions); } else { fragmentParser.Initialize( injectTail, (injectionFormat == HeaderFooterFormat.Text)); } injectingHead = false; return(fragmentParser); } } return(documentParser); }
public static ParsedHtml Parse(this IHtmlParser parser, string html) => parser.Parse(html, null);
public EmojiGeneratorCommand() { _fileSystem = new FileSystem(); _environment = new SpectreEnvironment(); _parser = new HtmlParser(); }
public BoohooSearchResultsParser() { _angleSharpHtmlParser = _angleSharpContext.GetService <IHtmlParser>(); }
private static IHtmlDocument ParseDocument(IHtmlParser parser, string htmlContent, string virtualPath) { return(parser.Parse(htmlContent, CreateDocumentUri(virtualPath))); }
/// <summary> /// 分析 HTML 文档,此方法会根据情况缓存文档模型 /// </summary> /// <param name="contentResult">文档加载结果</param> /// <param name="parser">HTML 解析器</param> /// <returns>HTML 文档对象</returns> public static IHtmlDocument ParseDocument( HtmlContentResult contentResult, IHtmlParser parser ) { var domProvider = parser.DomProvider; if ( contentResult.CacheKey != null && domProvider != null )//如果可以缓存 { var key = contentResult.CacheKey; var cacheKey = string.Format( CultureInfo.InvariantCulture, DocumentCacheKey, contentResult.VirtualPath ); var createDocument = Cache.Get( cacheKey ) as Func<IHtmlDomProvider, IHtmlDocument>; if ( createDocument != null ) { return createDocument( domProvider ); } WebServiceLocator.GetTraceService().Trace( TraceLevel.Info, "Jumony Web", "Document cache missed" ); var document = ParseDocument( parser, contentResult.Content, contentResult.VirtualPath ); createDocument = document.Compile();//必须同步编译文档,否则文档对象可能被修改。 new Action( delegate { createDocument( domProvider );//可以异步预热,预热后再存入缓存。 Cache.Insert( cacheKey, createDocument, new CacheDependency( new string[0], new[] { key } ), CacheItemPriority.High ); } ).BeginInvoke( null, null );//立即在新线程预热此方法 return document; } else return ParseDocument( parser, contentResult.Content, contentResult.VirtualPath ); }
///////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> public CHtmlDocument() { m_parser = new CHtmlParser(); }
/// <summary> /// Parses the string asynchronously. /// </summary> public static Task <IHtmlDocument> ParseDocumentAsync(this IHtmlParser parser, String source) { return(parser.ParseDocumentAsync(source, CancellationToken.None)); }
public RichTextContentConverter(IHtmlParser parser) { Parser = parser; }
public EbayTradingService(HttpClient client, IOptions <AppSettings> appSettings, IHtmlParser htmlParser) { _client = client; _appSettings = appSettings; _htmlParser = htmlParser; }
public SinglePageCrawler(IHtmlParser htmlParser, IPageDownloader pageDownloader) { this.htmlParser = htmlParser; this.pageDownloader = pageDownloader; }
public static IObservable <HttpFetch <ParsedHtml> > Html(this IHttpObservable query, IHtmlParser parser) => query.Accept(MediaTypeNames.Text.Html) .WithReader(async fetch => parser.Parse(await fetch.Content.ReadAsStringAsync() .DontContinueOnCapturedContext(), fetch.RequestUrl));
protected RawHtmlSearchProvider(ILogger <TInstance> logger, IHttpClientFactory httpClientFactory, IHtmlParser htmlParser) { Logger = logger; HtmlParser = htmlParser; LazyClient = new Lazy <HttpClient>(() => httpClientFactory.CreateClient(HttpClientName)); }
public BingSearchProvider(ILogger <BingSearchProvider> logger, IHttpClientFactory httpClientFactory, IHtmlParser htmlParser) : base(logger, httpClientFactory, htmlParser) { }
public DialogVm(IFileHandler fileHandler, IHtmlParser parser) : this() { _fileHandler = fileHandler; _parser = parser; }
/// <summary> /// Parses the stream asynchronously. /// </summary> public static Task <IHtmlDocument> ParseDocumentAsync(this IHtmlParser parser, Stream source) => parser.ParseDocumentAsync(source, CancellationToken.None);
///////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /// <param name="html"></param> public CHtmlDocument(string html) { System.Diagnostics.Debug.Assert(html != null); m_parser = new CHtmlParser(); LoadHtml(html); }
/// <summary> /// Parses the stream asynchronously. /// </summary> public static Task <IHtmlHeadElement?> ParseHeadAsync(this IHtmlParser parser, Stream source) => parser.ParseHeadAsync(source, CancellationToken.None);
public ProviderDiscovery(IRequestService requestService, IHtmlParser htmlParser) { _requestService = requestService; _htmlParser = htmlParser; }
/// <summary> /// Populates the given document asynchronously. /// </summary> public static Task <IDocument> ParseDocumentAsync(this IHtmlParser parser, IDocument document) => parser.ParseDocumentAsync(document, CancellationToken.None);
/// <summary> /// performs scrapping of a html content passsed in IHtmlParser based on the xpath definition /// pased in xpath definition /// </summary> /// <param name="xpathDefinitions">key-value pairs of field name and xpath location</param> /// <param name="parser">html parser holding the html to parse</param> /// <returns>key-value pairs of field name and nodes </returns> private IEnumerable<KeyValuePair<string, IEnumerable<ScrappedHtmlNode>>> ScrapeFields(IEnumerable<KeyValuePair<string, string>> xpathDefinitions, IHtmlParser parser) { return xpathDefinitions .Select(xpathDefinition => new { xpathDefinition.Key, Value = parser.EvaluateXPath(xpathDefinition.Value) }) .ToDictionary(i => i.Key, i => i.Value); }
public StatsService(IWebClient webClient, IHtmlParser htmlParser) { _webClient = webClient; _htmlParser = htmlParser; }
public ImgurProvider(IHtmlParser htmlParser, IRequestService requestService) : base("https?://imgur\\.com/(gallery/)?([0-9a-zA-Z]+)", "https?://i\\.imgur\\.com/([0-9a-zA-Z]+)") { _htmlParser = htmlParser; _requestService = requestService; }
public bool MoveNext(IHtmlParser parser) { return false; }
public Stocker5000(IDataGetter dataGetter, IDataSaver dataSaver, IHtmlParser parser) { _dataGetter = dataGetter; _dataSaver = dataSaver; _parser = parser; }
public void InnerHtml(string cssSelection, IHtmlParser parser = null) { this.itemValueResolver = new CssInnerHtmlResolver <string>(cssSelection, parser); }
public static IHtmlParser Wrap(this IHtmlParser parser, Func <IHtmlParser, string, Uri, ParsedHtml> impl) => new DelegatingHtmlParser((html, baseUrl) => impl(parser, html, baseUrl));
public GithubMdContentProvider(HttpClient http, IHtmlParser parser, IOptions <ContentOptions> options) { _http = http; _parser = parser; _options = options.Value; }
public FindSmokeBall(IWebRequestSender webRequestSender, IHtmlParser htmlParser) { _webRequestSender = webRequestSender; _htmlParser = htmlParser; }
public FeedParser(IHtmlParser htmlParser) { _htmlParser = htmlParser; }
public WordDictionaryService(IHtmlParser htmlParser, HttpClient httpClient, IWordDictionaryRepository repository) { _httpClient = httpClient; _htmlParser = htmlParser; _repository = repository; }
public Wrapper(IWebClient webClient, IHtmlParser parser) { _webClient = webClient; _parser = parser; }
///////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /// <param name="html"></param> /// <param name="parser"></param> public CHtmlDocument(string html, IHtmlParser parser) { System.Diagnostics.Debug.Assert(html != null); System.Diagnostics.Debug.Assert(parser != null); m_parser = parser; LoadHtml(html); }
public void Setup() { _parser = new HtmlParser(); }
///////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /// <param name="parser"></param> public CHtmlDocument(IHtmlParser parser) { System.Diagnostics.Debug.Assert(parser != null); m_parser = parser; }
public DefaultMerger(IHtmlParser htmlParser) : base(new MergerToBundle()) { ProcessingSteps.AddLast(new InlineCodeProcessing()); ProcessingSteps.AddLast(new ParagraphSplitter(htmlParser)); }
private void OnParseStarted(IHtmlParser parser) { CurrentParseUrl(parser.Link.Uri); RepositoryContainer.LinkRepository.Update(parser.Link); }
private static IHtmlDocument ParseDocument( IHtmlParser parser, string htmlContent, string virtualPath ) { return parser.Parse( htmlContent, CreateDocumentUri( virtualPath ) ); }
void IHtmlParserProvider.ReleaseParser( IHtmlParser parser ) { }
public HtmlParserV1Tests() { _parser = new HtmlParserV1(new DateTimeProvider()); }
public void RegisterHtmlParser(HtmlParserKey key, IHtmlParser parser) { _parserDictionary.Add(key, parser); }
public ArticleFactory(IHtmlParser htmlParser, IWebPageDownloader webPageDownloader, KeywordsParser keywordsParser) { _htmlParser = htmlParser; _webPageDownloader = webPageDownloader; _keywordsParser = keywordsParser; }
public ParseLinkFromResponse_Cmd_Handler(IHtmlParser htmlParser) { _htmlParser = htmlParser; }
private void InitParser(out IHtmlParser parser, string htmlContent) { parser = new HtmlAgilityParser(htmlContent); }