public void Before() { _resultItems = new ResultItems(); _resultItems.AddOrUpdateResultItem("content", "爬虫工具"); Request request = new Request("http://www.baidu.com", 1, null); _resultItems.Request = request; _spider = new DefaultSpider(); }
//[MethodImplAttribute(MethodImplOptions.Synchronized)] public virtual void Process(ResultItems resultItems, ITask task) { dynamic o = resultItems.Get(_type.FullName); if (o != null) { //check _classPipeline.Process(o, task); } }
public void Process(ResultItems resultItems, ITask task) { string path = BasePath + "/" + task.Identify + "/"; try { FileInfo file = GetFile(path + Encrypt.Md5Encrypt(resultItems.Request.Url) + ".json"); using (StreamWriter printWriter = new StreamWriter(file.OpenWrite(), Encoding.UTF8)) { printWriter.WriteLine(JsonConvert.SerializeObject(resultItems.GetAll())); } } catch (IOException e) { _logger.Warn("write file error", e); throw; } }
public void TestCollectorPipeline() { ResultItems resultItems = new ResultItems(); resultItems.AddOrUpdateResultItem("a", "a"); resultItems.AddOrUpdateResultItem("b", "b"); resultItems.AddOrUpdateResultItem("c", "c"); _resultItemsCollectorPipeline.Process(resultItems, null); foreach (var result in _resultItemsCollectorPipeline.GetCollected()) { ResultItems items = result as ResultItems; Assert.AreEqual(items.Results.Count, 3); Assert.AreEqual(items.Results["a"], "a"); Assert.AreEqual(items.Results["b"], "b"); Assert.AreEqual(items.Results["c"], "c"); } }
public void Process(ResultItems resultItems, ITask task) { string path = BasePath + PathSeperator + task.Identify + PathSeperator; try { FileInfo fileInfo = GetFile(path + Encrypt.Md5Encrypt(resultItems.Request.Url) + ".html"); using (StreamWriter writer = new StreamWriter(fileInfo.OpenWrite(), Encoding.UTF8)) { writer.WriteLine("url:\t" + resultItems.Request.Url); writer.WriteLine("html:\t" + resultItems.Get("html")); } } catch (IOException e) { _logger.Warn("write file error", e); } }
public void Process(ResultItems resultItems, ITask task) { foreach (var classPageModelPipelineEntry in _pageModelPipelines) { object o = resultItems.Get(classPageModelPipelineEntry.Key.FullName); if (o != null) { Attribute annotation = classPageModelPipelineEntry.Key.GetCustomAttribute(typeof(ExtractBy), false); if (annotation == null || !((ExtractBy)annotation).Multi) { classPageModelPipelineEntry.Value.Process(o, task); } else { IList<object> list = (List<object>)o; foreach (object o1 in list) { classPageModelPipelineEntry.Value.Process(o1, task); } } } } }
public void Process(ResultItems resultItems, ITask task) { if ((from subPipeline in _subPipelines where subPipeline.Match(resultItems.Request) select subPipeline.ProcessResult(resultItems, task)).Any(matchOtherProcessorProcessor => matchOtherProcessorProcessor != MatchOther.Yes)) { } }
public void Process(ResultItems resultItems, ISpider spider) { foreach (var entry in resultItems.Results) { Console.WriteLine($"{entry.Key}:{entry.Value}"); } }
public abstract MatchOther ProcessResult(ResultItems resultItems, ITask task);
/// <summary> /// Store extract results /// </summary> /// <param name="key"></param> /// <param name="field"></param> public void AddResultItem(string key, dynamic field) { ResultItems.AddOrUpdateResultItem(key, field); }
public void Process(ResultItems resultItems, ISpider spider) { string path = BasePath + PathSeperator + spider.Identity + PathSeperator; try { FileInfo fileInfo = PrepareFile(path + Encrypt.Md5Encrypt(resultItems.Request.Url.ToString()) + ".html"); using (StreamWriter writer = new StreamWriter(fileInfo.OpenWrite(), Encoding.UTF8)) { writer.WriteLine("url:\t" + resultItems.Request.Url); writer.WriteLine("html:\t" + resultItems.GetResultItem("html")); } } catch (IOException e) { spider.Logger.Warn("Write file error.", e); } }