/// <summary>
 /// 创建 HtmlParserResult 对象
 /// </summary>
 /// <param name="parser">得到此解析结果的解析器</param>
 /// <param name="domProvider">HTML DOM 提供程序</param>
 /// <param name="provider">解析器提供程序</param>
 /// <param name="virtualPath">文档的虚拟路径</param>
 public HtmlParserResult( IHtmlParser parser, IHtmlDomProvider domProvider, IHtmlParserProvider provider, string virtualPath )
 {
     Parser = parser;
       DomProvider = domProvider;
       Provider = provider;
       VirtualPath = virtualPath;
 }
 /// <summary>
 /// constructor - ensure source statement is passed
 /// </summary>
 /// <param name="sourceStatementFile">source statement file name including path</param>
 /// <param name="parser">the particular tidy-tool</param>
 /// <param name="statementManagerDateTime">contains the server date to be used in the header</param>
 public SmileStatementManager(string sourceStatementFile, IHtmlParser parser, IDateTime statementManagerDateTime)
 {
     if (string.IsNullOrEmpty(sourceStatementFile)) {
         throw new ArgumentNullException("sourceStatementFile", @"No source statement file specified");
     }
     if (parser == null) {
         throw new ArgumentNullException("parser", @"parser is null");
     }
     _sourceStatementFilePath = sourceStatementFile;
     _smileParser = parser;
     _statementManagerDateTime = statementManagerDateTime;
 }
 public bool MoveNext(IHtmlParser parser)
 {
     var nextPageNode = parser.EvaluateXPath(_nextPageXPath).FirstOrDefault();
     var nextPageUrl = ExtractHrefAttributeValue(nextPageNode);
     if (CanMoveNext(nextPageUrl))
     {
         _visitedUniqueUrls.Add(nextPageUrl);
         CurrentPageUrl = nextPageUrl;
         return true;
     }
     return false;
 }
Exemple #4
0
        public IHtmlParser Push(bool head, IHtmlParser documentParser)
        {
            if (head)
            {
                if (injectHead != null && !headInjected)
                {
                    this.documentParser = documentParser;

                    if (fragmentParser == null)
                    {
                        fragmentParser = new HtmlParser(
                            new ConverterBufferInput(injectHead, progressMonitor),
                            false,
                            (injectionFormat == HeaderFooterFormat.Text),
                            64,
                            8,
                            testBoundaryConditions);
                    }
                    else
                    {
                        fragmentParser.Initialize(
                            injectHead,
                            (injectionFormat == HeaderFooterFormat.Text));
                    }

                    injectingHead = true;

                    return(fragmentParser);
                }
            }
            else
            {
                if (injectHead != null && !headInjected)
                {
                    InternalDebug.Assert(false);


                    headInjected = true;
                }

                if (injectTail != null && !tailInjected)
                {
                    this.documentParser = documentParser;

                    if (fragmentParser == null)
                    {
                        fragmentParser = new HtmlParser(
                            new ConverterBufferInput(injectTail, progressMonitor),
                            false,
                            (injectionFormat == HeaderFooterFormat.Text),
                            64,
                            8,
                            testBoundaryConditions);
                    }
                    else
                    {
                        fragmentParser.Initialize(
                            injectTail,
                            (injectionFormat == HeaderFooterFormat.Text));
                    }

                    injectingHead = false;

                    return(fragmentParser);
                }
            }


            return(documentParser);
        }
Exemple #5
0
 public static ParsedHtml Parse(this IHtmlParser parser, string html) =>
 parser.Parse(html, null);
 public EmojiGeneratorCommand()
 {
     _fileSystem  = new FileSystem();
     _environment = new SpectreEnvironment();
     _parser      = new HtmlParser();
 }
 public BoohooSearchResultsParser()
 {
     _angleSharpHtmlParser = _angleSharpContext.GetService <IHtmlParser>();
 }
Exemple #8
0
 private static IHtmlDocument ParseDocument(IHtmlParser parser, string htmlContent, string virtualPath)
 {
     return(parser.Parse(htmlContent, CreateDocumentUri(virtualPath)));
 }
Exemple #9
0
    /// <summary>
    /// 分析 HTML 文档,此方法会根据情况缓存文档模型
    /// </summary>
    /// <param name="contentResult">文档加载结果</param>
    /// <param name="parser">HTML 解析器</param>
    /// <returns>HTML 文档对象</returns>
    public static IHtmlDocument ParseDocument( HtmlContentResult contentResult, IHtmlParser parser )
    {

      var domProvider = parser.DomProvider;

      if ( contentResult.CacheKey != null && domProvider != null )//如果可以缓存
      {
        var key = contentResult.CacheKey;
        var cacheKey = string.Format( CultureInfo.InvariantCulture, DocumentCacheKey, contentResult.VirtualPath );

        var createDocument = Cache.Get( cacheKey ) as Func<IHtmlDomProvider, IHtmlDocument>;

        if ( createDocument != null )
        {
          return createDocument( domProvider );
        }

        WebServiceLocator.GetTraceService().Trace( TraceLevel.Info, "Jumony Web", "Document cache missed" );


        var document = ParseDocument( parser, contentResult.Content, contentResult.VirtualPath );
        createDocument = document.Compile();//必须同步编译文档,否则文档对象可能被修改。

        new Action( delegate
        {
          createDocument( domProvider );//可以异步预热,预热后再存入缓存。
          Cache.Insert( cacheKey, createDocument, new CacheDependency( new string[0], new[] { key } ), CacheItemPriority.High );
        }
          ).BeginInvoke( null, null );//立即在新线程预热此方法



        return document;
      }

      else

        return ParseDocument( parser, contentResult.Content, contentResult.VirtualPath );
    }
 /////////////////////////////////////////////////////////////////////////////////
 /// <summary>
 /// 
 /// </summary>
 public CHtmlDocument()
 {
     m_parser = new CHtmlParser();
 }
 /// <summary>
 /// Parses the string asynchronously.
 /// </summary>
 public static Task <IHtmlDocument> ParseDocumentAsync(this IHtmlParser parser, String source)
 {
     return(parser.ParseDocumentAsync(source, CancellationToken.None));
 }
Exemple #12
0
 public RichTextContentConverter(IHtmlParser parser)
 {
     Parser = parser;
 }
Exemple #13
0
 public EbayTradingService(HttpClient client, IOptions <AppSettings> appSettings, IHtmlParser htmlParser)
 {
     _client      = client;
     _appSettings = appSettings;
     _htmlParser  = htmlParser;
 }
 public SinglePageCrawler(IHtmlParser htmlParser, IPageDownloader pageDownloader)
 {
     this.htmlParser     = htmlParser;
     this.pageDownloader = pageDownloader;
 }
Exemple #15
0
 public static IObservable <HttpFetch <ParsedHtml> > Html(this IHttpObservable query, IHtmlParser parser) =>
 query.Accept(MediaTypeNames.Text.Html)
 .WithReader(async fetch => parser.Parse(await fetch.Content.ReadAsStringAsync()
                                         .DontContinueOnCapturedContext(),
                                         fetch.RequestUrl));
Exemple #16
0
 protected RawHtmlSearchProvider(ILogger <TInstance> logger, IHttpClientFactory httpClientFactory, IHtmlParser htmlParser)
 {
     Logger     = logger;
     HtmlParser = htmlParser;
     LazyClient = new Lazy <HttpClient>(() => httpClientFactory.CreateClient(HttpClientName));
 }
Exemple #17
0
 public BingSearchProvider(ILogger <BingSearchProvider> logger, IHttpClientFactory httpClientFactory, IHtmlParser htmlParser)
     : base(logger, httpClientFactory, htmlParser)
 {
 }
 public DialogVm(IFileHandler fileHandler, IHtmlParser parser)
     : this()
 {
     _fileHandler = fileHandler;
     _parser = parser;
 }
 /// <summary>
 /// Parses the stream asynchronously.
 /// </summary>
 public static Task <IHtmlDocument> ParseDocumentAsync(this IHtmlParser parser, Stream source) => parser.ParseDocumentAsync(source, CancellationToken.None);
 /////////////////////////////////////////////////////////////////////////////////
 /// <summary>
 /// 
 /// </summary>
 /// <param name="html"></param>
 public CHtmlDocument(string html)
 {
     System.Diagnostics.Debug.Assert(html != null);
     m_parser = new CHtmlParser();
     LoadHtml(html);
 }
 /// <summary>
 /// Parses the stream asynchronously.
 /// </summary>
 public static Task <IHtmlHeadElement?> ParseHeadAsync(this IHtmlParser parser, Stream source) => parser.ParseHeadAsync(source, CancellationToken.None);
Exemple #22
0
 public ProviderDiscovery(IRequestService requestService, IHtmlParser htmlParser)
 {
     _requestService = requestService;
     _htmlParser = htmlParser;
 }
 /// <summary>
 /// Populates the given document asynchronously.
 /// </summary>
 public static Task <IDocument> ParseDocumentAsync(this IHtmlParser parser, IDocument document) => parser.ParseDocumentAsync(document, CancellationToken.None);
 /// <summary>
 /// performs scrapping of a html content passsed in IHtmlParser based on the xpath definition
 /// pased in xpath definition
 /// </summary>
 /// <param name="xpathDefinitions">key-value pairs of field name and xpath location</param>
 /// <param name="parser">html parser holding the html to parse</param>
 /// <returns>key-value pairs of field name and nodes </returns>
 private IEnumerable<KeyValuePair<string, IEnumerable<ScrappedHtmlNode>>> ScrapeFields(IEnumerable<KeyValuePair<string, string>> xpathDefinitions, IHtmlParser parser)
 {
     return
         xpathDefinitions
             .Select(xpathDefinition =>
                 new {
                         xpathDefinition.Key,
                         Value = parser.EvaluateXPath(xpathDefinition.Value)
                     })
             .ToDictionary(i => i.Key, i => i.Value);
 }
 public StatsService(IWebClient webClient, IHtmlParser htmlParser)
 {
     _webClient  = webClient;
     _htmlParser = htmlParser;
 }
Exemple #26
0
 public ImgurProvider(IHtmlParser htmlParser, IRequestService requestService)
     : base("https?://imgur\\.com/(gallery/)?([0-9a-zA-Z]+)", "https?://i\\.imgur\\.com/([0-9a-zA-Z]+)")
 {
     _htmlParser = htmlParser;
     _requestService = requestService;
 }
 public bool MoveNext(IHtmlParser parser)
 {
     return false;
 }
Exemple #28
0
 public Stocker5000(IDataGetter dataGetter, IDataSaver dataSaver, IHtmlParser parser)
 {
     _dataGetter = dataGetter;
     _dataSaver  = dataSaver;
     _parser     = parser;
 }
 public void InnerHtml(string cssSelection, IHtmlParser parser = null)
 {
     this.itemValueResolver = new CssInnerHtmlResolver <string>(cssSelection, parser);
 }
Exemple #30
0
 public static IHtmlParser Wrap(this IHtmlParser parser, Func <IHtmlParser, string, Uri, ParsedHtml> impl) =>
 new DelegatingHtmlParser((html, baseUrl) => impl(parser, html, baseUrl));
Exemple #31
0
 public GithubMdContentProvider(HttpClient http, IHtmlParser parser, IOptions <ContentOptions> options)
 {
     _http    = http;
     _parser  = parser;
     _options = options.Value;
 }
Exemple #32
0
 public FindSmokeBall(IWebRequestSender webRequestSender, IHtmlParser htmlParser)
 {
     _webRequestSender = webRequestSender;
     _htmlParser       = htmlParser;
 }
Exemple #33
0
 public FeedParser(IHtmlParser htmlParser)
 {
     _htmlParser = htmlParser;
 }
 public WordDictionaryService(IHtmlParser htmlParser, HttpClient httpClient, IWordDictionaryRepository repository)
 {
     _httpClient = httpClient;
     _htmlParser = htmlParser;
     _repository = repository;
 }
 public Wrapper(IWebClient webClient, IHtmlParser parser)
 {
     _webClient = webClient;
     _parser    = parser;
 }
 /////////////////////////////////////////////////////////////////////////////////
 /// <summary>
 /// 
 /// </summary>
 /// <param name="html"></param>
 /// <param name="parser"></param>
 public CHtmlDocument(string html, IHtmlParser parser)
 {
     System.Diagnostics.Debug.Assert(html != null);
     System.Diagnostics.Debug.Assert(parser != null);
     m_parser = parser;
     LoadHtml(html);
 }
Exemple #37
0
 public void Setup()
 {
     _parser = new HtmlParser();
 }
 /////////////////////////////////////////////////////////////////////////////////
 /// <summary>
 /// 
 /// </summary>
 /// <param name="parser"></param>
 public CHtmlDocument(IHtmlParser parser)
 {
     System.Diagnostics.Debug.Assert(parser != null);
     m_parser = parser;
 }
Exemple #39
0
 public DefaultMerger(IHtmlParser htmlParser) : base(new MergerToBundle())
 {
     ProcessingSteps.AddLast(new InlineCodeProcessing());
     ProcessingSteps.AddLast(new ParagraphSplitter(htmlParser));
 }
Exemple #40
0
 private void OnParseStarted(IHtmlParser parser)
 {
     CurrentParseUrl(parser.Link.Uri);
     RepositoryContainer.LinkRepository.Update(parser.Link);
 }
 public SinglePageCrawler(IHtmlParser htmlParser, IPageDownloader pageDownloader)
 {
     this.htmlParser = htmlParser;
     this.pageDownloader = pageDownloader;
 }
Exemple #42
0
 private static IHtmlDocument ParseDocument( IHtmlParser parser, string htmlContent, string virtualPath )
 {
   return parser.Parse( htmlContent, CreateDocumentUri( virtualPath ) );
 }
Exemple #43
0
 void IHtmlParserProvider.ReleaseParser( IHtmlParser parser )
 {
 }
Exemple #44
0
 public HtmlParserV1Tests()
 {
     _parser = new HtmlParserV1(new DateTimeProvider());
 }
Exemple #45
0
 public void Setup()
 {
     _parser = new HtmlParser();
 }
 public void RegisterHtmlParser(HtmlParserKey key, IHtmlParser parser)
 {
     _parserDictionary.Add(key, parser);
 }
Exemple #47
0
 public ArticleFactory(IHtmlParser htmlParser, IWebPageDownloader webPageDownloader, KeywordsParser keywordsParser)
 {
     _htmlParser        = htmlParser;
     _webPageDownloader = webPageDownloader;
     _keywordsParser    = keywordsParser;
 }
Exemple #48
0
 public ParseLinkFromResponse_Cmd_Handler(IHtmlParser htmlParser)
 {
     _htmlParser = htmlParser;
 }
 private void InitParser(out IHtmlParser parser, string htmlContent)
 {
     parser = new HtmlAgilityParser(htmlContent);
 }