public void CrawlerTest() { CookieContainer cookieContainer = new CookieContainer(); HttpWebRequest request = (HttpWebRequest)WebRequest.Create(@"https://bbs.sjtu.edu.cn/bbslogin?id=guest"); request.ProtocolVersion = HttpVersion.Version10; request.ContentType = "application/x-www-form-urlencoded"; request.UserAgent = @"Mozilla/5.0"; HttpWebResponse response = (HttpWebResponse)(request.GetResponse()); cookieContainer.Add(response.Cookies); string url = @"https://bbs.sjtu.edu.cn/bbsdoc?board=PPPerson"; var model = new XMLEntityModel(@".//tr[position() > 1]"); model.AddXMLColumn("ID", @"./td[1]"); WebCrawlerSourceNode crawler = new WebCrawlerSourceNode(new string[] { url }, model, cookieContainer, @"bbs.sjtu.edu.cn"); PipelineTask.Create(crawler) .AddMonitor( (entity) => { Console.WriteLine(entity); }) .Start(); }
public void FromFileToTemplateFile() { PipelineTask.Create(new SingleLineFileSourceNode(SimpleSourceT)) .Spilt(Entity.DefaultColumn, separator: "\t") .ToTemplateFile(TemplateFileOutput, "##col1## dddd ##col2##") .Start(); Assert.IsTrue(TestHelper.CompareTwoFile(SimpleSource, SimpleFileOutput)); }
public void SpiltByT() { PipelineTask.Create(new SingleLineFileSourceNode(SimpleSourceT)) .Spilt(Entity.DefaultColumn, separator: "\t") .To(new TextFileConsumer(SimpleFileOutput)) .Start(); Assert.IsTrue(TestHelper.CompareTwoFile(SimpleSource, SimpleFileOutput)); }
public void AddTemplateColumn() { PipelineTask.Create(new SingleLineFileSourceNode(SimpleSourceT)) .Spilt(Entity.DefaultColumn, separator: "\t") .AddTemplateColumn("Template", "##col1## ##col2") .ToTextFile(TemplateFileOutput) .Start(); Assert.IsTrue(TestHelper.CompareTwoFile(SampleTemplateFileOutput, TemplateFileOutput)); }
public void MonitorConsumer() { int count = 0; PipelineTask.Create(new SingleLineFileSourceNode(SimpleSource)) .AddMonitor((sender, args) => { count++; }) .Start(); Assert.AreEqual(2, count); }
public void FileSerilization() { PipelineTask.Create(new SingleLineFileSourceNode(SimpleSource)) .Spilt(Entity.DefaultColumn) .ToFile(SimpleFileOutput) .Start(); PipelineTask.Create(new FileSourceNode(SimpleFileOutput)) .ToTextFile(SimpleSourceT) .Start(); Assert.IsTrue(TestHelper.CompareTwoFile(SimpleSource, SimpleSourceT)); }
public void WebSource() { int count = 0; string content = null; PipelineTask.Create(new WebSourceNode(@"http://www.bing.com")) .AddMonitor((sender, args) => { count++; content = args.CurrentEntity.GetValue <string>(Entity.DefaultColumn); }) .Start(); Assert.AreEqual(1, count); Assert.IsNotNull(content); }
public void Convert() { PipelineTask.Create(new SingleLineFileSourceNode(SimpleSource)) .Spilt(Entity.DefaultColumn) .AddMonitor((sender, args) => { Assert.AreEqual(2, args.CurrentEntity.Values.Keys.Count); }) .Convert((entity) => { return(new Entity()); }) .AddMonitor((sender, args) => { Assert.AreEqual(0, args.CurrentEntity.Values.Keys.Count); }) .Start(); }
public void Extend() { PipelineTask.Create(new SingleLineFileSourceNode(SimpleSource)) .Spilt(Entity.DefaultColumn) .AddMonitor((sender, args) => { Assert.AreEqual(2, args.CurrentEntity.Values.Keys.Count); }) .Extend((entity) => { entity.SetValue("a", ""); }) .AddMonitor((sender, args) => { Assert.AreEqual(3, args.CurrentEntity.Values.Keys.Count); }) .Start(); }
public void AzureTableTest() { PipelineTask.FromCsvFile("SimpleAzureSource") .AddMonitor((entity) => { Console.WriteLine(); }) .ToAzureTable(info, "##col1##", "##col1####col2##") .Start(); int count = 0; PipelineTask.Create(new AzureTableSourceNode(info)) .AddMonitor((entity) => { count++; }) .Start(); Assert.AreEqual(5, count); }
public void Filter() { int beforeFilter = 0, afterFilter = 0; PipelineTask.Create(new SingleLineFileSourceNode(SimpleSource)) .Spilt(Entity.DefaultColumn) .AddMonitor((sender, args) => { beforeFilter++; }) .Filter((entity) => { return(true); }) .AddMonitor((sender, args) => { afterFilter++; }) .Start(); Assert.AreEqual(1, beforeFilter); Assert.AreEqual(0, afterFilter); }