Пример #1
0
 public void Before()
 {
     _resultItems = new ResultItems();
     _resultItems.AddOrUpdateResultItem("content", "爬虫工具");
     Request request = new Request("http://www.baidu.com", 1, null);
     _resultItems.Request = request;
     _spider = new DefaultSpider();
 }
 //[MethodImplAttribute(MethodImplOptions.Synchronized)]
 public virtual void Process(ResultItems resultItems, ITask task)
 {
     dynamic o = resultItems.Get(_type.FullName);
     if (o != null)
     {
         //check
         _classPipeline.Process(o, task);
     }
 }
Пример #3
0
 public void Process(ResultItems resultItems, ITask task)
 {
     string path = BasePath + "/" + task.Identify + "/";
     try
     {
         FileInfo file = GetFile(path + Encrypt.Md5Encrypt(resultItems.Request.Url) + ".json");
         using (StreamWriter printWriter = new StreamWriter(file.OpenWrite(), Encoding.UTF8))
         {
             printWriter.WriteLine(JsonConvert.SerializeObject(resultItems.GetAll()));
         }
     }
     catch (IOException e)
     {
         _logger.Warn("write file error", e);
         throw;
     }
 }
        public void TestCollectorPipeline()
        {
            ResultItems resultItems = new ResultItems();
            resultItems.AddOrUpdateResultItem("a", "a");
            resultItems.AddOrUpdateResultItem("b", "b");
            resultItems.AddOrUpdateResultItem("c", "c");
            _resultItemsCollectorPipeline.Process(resultItems, null);
            foreach (var result in _resultItemsCollectorPipeline.GetCollected())
            {
                ResultItems items = result as ResultItems;

                Assert.AreEqual(items.Results.Count, 3);
                Assert.AreEqual(items.Results["a"], "a");
                Assert.AreEqual(items.Results["b"], "b");
                Assert.AreEqual(items.Results["c"], "c");
            }
        }
Пример #5
0
 public void Process(ResultItems resultItems, ITask task)
 {
     string path = BasePath + PathSeperator + task.Identify + PathSeperator;
     try
     {
         FileInfo fileInfo = GetFile(path + Encrypt.Md5Encrypt(resultItems.Request.Url) + ".html");
         using (StreamWriter writer = new StreamWriter(fileInfo.OpenWrite(), Encoding.UTF8))
         {
             writer.WriteLine("url:\t" + resultItems.Request.Url);
             writer.WriteLine("html:\t" + resultItems.Get("html"));
         }
     }
     catch (IOException e)
     {
         _logger.Warn("write file error", e);
     }
 }
Пример #6
0
        public void Process(ResultItems resultItems, ITask task)
        {
            foreach (var classPageModelPipelineEntry in _pageModelPipelines)
            {
                object o = resultItems.Get(classPageModelPipelineEntry.Key.FullName);
                if (o != null)
                {
                    Attribute annotation = classPageModelPipelineEntry.Key.GetCustomAttribute(typeof(ExtractBy), false);

                    if (annotation == null || !((ExtractBy)annotation).Multi)
                    {
                        classPageModelPipelineEntry.Value.Process(o, task);
                    }
                    else
                    {
                        IList<object> list = (List<object>)o;
                        foreach (object o1 in list)
                        {
                            classPageModelPipelineEntry.Value.Process(o1, task);
                        }
                    }
                }
            }
        }
Пример #7
0
 public void Process(ResultItems resultItems, ITask task)
 {
     if ((from subPipeline in _subPipelines where subPipeline.Match(resultItems.Request) select subPipeline.ProcessResult(resultItems, task)).Any(matchOtherProcessorProcessor => matchOtherProcessorProcessor != MatchOther.Yes))
     {
     }
 }
Пример #8
0
 public void Process(ResultItems resultItems, ISpider spider)
 {
     foreach (var entry in resultItems.Results)
     {
         Console.WriteLine($"{entry.Key}:{entry.Value}");
     }
 }
Пример #9
0
 public abstract MatchOther ProcessResult(ResultItems resultItems, ITask task);
Пример #10
0
 /// <summary>
 /// Store extract results
 /// </summary>
 /// <param name="key"></param>
 /// <param name="field"></param>
 public void AddResultItem(string key, dynamic field)
 {
     ResultItems.AddOrUpdateResultItem(key, field);
 }
Пример #11
0
 public void Process(ResultItems resultItems, ISpider spider)
 {
     string path = BasePath + PathSeperator + spider.Identity + PathSeperator;
     try
     {
         FileInfo fileInfo = PrepareFile(path + Encrypt.Md5Encrypt(resultItems.Request.Url.ToString()) + ".html");
         using (StreamWriter writer = new StreamWriter(fileInfo.OpenWrite(), Encoding.UTF8))
         {
             writer.WriteLine("url:\t" + resultItems.Request.Url);
             writer.WriteLine("html:\t" + resultItems.GetResultItem("html"));
         }
     }
     catch (IOException e)
     {
         spider.Logger.Warn("Write file error.", e);
     }
 }