public void JsonBasic() { XMLEntityModel model = new XMLEntityModel(@".//Results"); model.AddXMLColumn("Name", "./Name"); model.AddXMLColumn("Desc", "./Desc"); PipelineTask.FromJsonFile("Course", model) .ToTextFile(Output) .Start(); Assert.IsTrue(TestHelper.CompareTwoFile(SampleJsonOutput, Output)); }
public void XmlBasic() { XMLEntityModel model = new XMLEntityModel(@".//Entity"); model.AddXMLColumn("col1", "./col1"); model.AddXMLColumn("col2", "./col2"); PipelineTask.FromXmlFile(XmlSource, model) .ToTextFile(Output) .Start(); Assert.IsTrue(TestHelper.CompareTwoFile(SampleXMLOutput, Output)); }
public void CrawlerTest() { CookieContainer cookieContainer = new CookieContainer(); HttpWebRequest request = (HttpWebRequest)WebRequest.Create(@"https://bbs.sjtu.edu.cn/bbslogin?id=guest"); request.ProtocolVersion = HttpVersion.Version10; request.ContentType = "application/x-www-form-urlencoded"; request.UserAgent = @"Mozilla/5.0"; HttpWebResponse response = (HttpWebResponse)(request.GetResponse()); cookieContainer.Add(response.Cookies); string url = @"https://bbs.sjtu.edu.cn/bbsdoc?board=PPPerson"; var model = new XMLEntityModel(@".//tr[position() > 1]"); model.AddXMLColumn("ID", @"./td[1]"); WebCrawlerSourceNode crawler = new WebCrawlerSourceNode(new string[] { url }, model, cookieContainer, @"bbs.sjtu.edu.cn"); PipelineTask.Create(crawler) .AddMonitor( (entity) => { Console.WriteLine(entity); }) .Start(); }
public void HtmlBasic() { XMLEntityModel model = new XMLEntityModel(@"//table[@class='wikitable sortable']/tr[not(@*)]"); model.AddXMLColumn("GB", "./td[1]"); model.AddXMLColumn("Province", "./td[3]"); PipelineTask.FromWeb("http://en.wikipedia.org/wiki/China_provinces") .ParseHtml(model) .AddMonitor((entity) => { Console.WriteLine(); }) .ToTextFile(Output, model) .Start(); TestHelper.CompareTwoFile(Province, Output); }