Beispiel #1
0
 private async Task<int> Save(PageResult data, int serialNumber)
 {
     Trace.WriteLine(serialNumber + ".2");
     var result = await (new HttpClient() { Timeout = new TimeSpan(0, 0, 20) }).GetStringAsync("http://www.126.com");
     Trace.WriteLine(serialNumber + ".3, length:" + result.Length + ",126-html:" + result.Substring(0, 20));
     return result.Length;
 }
        public PageResult Analyze(Request request, Response response)
        {
            var doc = new HtmlDocument();
            if (string.IsNullOrWhiteSpace(response.Content))
                return PageResult.EmptyResponse(Site.Topic, request, response, Channel.Product);
            doc.LoadHtml(response.Content);
            var newRequests = FindNewRequest(doc, request, @"[\s\S]*", "/Product/Details/");//todo: regexPattern
            List<ResultField> resultFields = null;
            if (request.UrlType == UrlType.Extracting)
            {
                resultFields = XpathSelect(doc, _fieldXPaths);
                resultFields.Add(new ResultField() { Name = "Uri", Value = request.Url });
                resultFields.Add(new ResultField() { Name = "SiteName", Value = Site.Name });
                resultFields.Add(new ResultField() { Name = "SiteDomain", Value = Site.Domain });
                resultFields.Add(new ResultField() { Name = "ElapsedSecond", Value = response.MillisecondTime.ToString() });
                resultFields.Add(new ResultField() { Name = "Downloader", Value = response.Downloader });
                resultFields.Add(new ResultField() { Name = "CommentCount", Value = "0" });
            }

            var pageResult = new PageResult
            {
                Request = request,
                Response = response,
                NewRequests = newRequests,
                Channel = Channel.Product,
                Data = resultFields,
                Topic = Site.Topic
            };
            return pageResult;
        }
Beispiel #3
0
 public void Inject(PageResult data)
 {
     Interlocked.Increment(ref _resultTotal);
     foreach (IResultPipelineModule module in _modules)
     {
         module.ProcessAsync(data);
     }
 }
Beispiel #4
0
 public void Inject(PageResult data)
 {
     var i = 1;
     foreach (IPipelineModule module in _modules)
     {
         module.ProcessAsync(data, i);
         i++;
     }
 }
Beispiel #5
0
 public async Task ProcessAsync(PageResult data)
 {
     await Task.Run(() =>
     {
         var newReqCount = data.NewRequests == null ? 0 : data.NewRequests.Count();
         var baseDir = AppDomain.CurrentDomain.BaseDirectory;
         File.AppendAllLines(baseDir + "page_result.txt", new List<string>() { String.Format("Url:{0}, StatusCode:{1}, New Request Count:{2}", data.Request.Url, data.Response.HttpStatusCode, newReqCount) }, Encoding.UTF8);
     });
     
 }
Beispiel #6
0
 public Task<int> ProcessAsync(PageResult data, int serialNumber)
 {
     Trace.WriteLine(serialNumber + ".1");
     return Save(data, serialNumber);
 }
Beispiel #7
0
 public async Task ProcessAsync(PageResult result)
 {
     Trace.WriteLine("处理PageResult");
     await Task.Run(() => result.Data);
 }