public IContentSource Crawl(string xpath) { var content = new XHtmlContent(); foreach (var node in HtmlNodes) { var selNode = node.SelectSingleNode(xpath); if (selNode != null) content.HtmlNodes.Add(selNode); } return content; }
public IContentSource CrawlList(string xpath) { var content = new XHtmlContent(); foreach (var node in HtmlNodes) { var selNodes = node.SelectNodes(xpath); if (selNodes != null) { foreach (var selNode in selNodes) content.HtmlNodes.Add(selNode); } } return content; }
public IContentSource Crawl(string xpath) { var content = new XHtmlContent(); foreach (var node in HtmlNodes) { var selNode = node.SelectSingleNode(xpath); if (selNode != null) { content.HtmlNodes.Add(selNode); } } return(content); }
public IContentSource CrawlList(string xpath) { var content = new XHtmlContent(); foreach (var node in HtmlNodes) { var selNodes = node.SelectNodes(xpath); if (selNodes != null) { foreach (var selNode in selNodes) { content.HtmlNodes.Add(selNode); } } } return(content); }
internal override IContentSource Crawl(IContentSource content, bool asList) { if (content == null) return GetDefault(string.Format("Null content")); if (content.GetType() != typeof(XmlContent) && content.GetType() != typeof(XHtmlContent)) { // convert to XHTML var strContent = content.ToString(); content = new XHtmlContent(); content.LoadRaw(strContent); } if (content.GetType() == typeof(XHtmlContent)) { content = asList ? (content as XHtmlContent).CrawlList(XPath) : (content as XHtmlContent).Crawl(XPath); } else { //if (content.GetType() == typeof(XmlContent)) // last case content = asList ? (content as XmlContent).CrawlList(XPath) : (content as XmlContent).Crawl(XPath); } if (content == null) return GetDefault(string.Format("Null content")); return content; }
public void TestCrawlDataTypes() { var content = new XHtmlContent(); content.LoadFromFile("data-types.html"); var crawler = new NetCrawler(content); var dateTypes = crawler.Crawl<TestDateTypesModel>(); Assert.AreEqual(123, dateTypes.IntData); Assert.AreEqual(true, dateTypes.Bool1Data); Assert.AreEqual(true, dateTypes.Bool2Data); Assert.AreEqual(123.123, dateTypes.FloatData); Assert.AreEqual(123.123, dateTypes.DecimalData); Assert.AreEqual(123.123, dateTypes.DoubleData); Assert.AreEqual(eTestDateTypesModelEnum.EnumValue2, dateTypes.EnumData); Assert.AreEqual(new DateTime(2012, 06, 02), dateTypes.DateTimeData.Date); }
public void TestCrawlList() { var content = new XHtmlContent(); content.LoadFromFile("product-list.html"); var crawler = new NetCrawler(content); var productList = crawler.CrawlList<TestAppStoreProductSummary>(); Assert.AreEqual(15, productList.Count); //Assert.IsTrue(!string.IsNullOrEmpty(quickTimeProduct.Description)); //Assert.IsTrue(quickTimeProduct.IconUrl.IndexOf("http://store.storeimages.cdn-apple.com/6270") == 0); }
public void TestCrawSimpleStructure() { var content = new XHtmlContent(); content.LoadFromFile("product-info.html"); var crawler = new NetCrawler(content); var quickTimeProduct = crawler.Crawl<TestAppStoreProduct>(); Assert.AreEqual("QuickTime 7 Pro for Windows", quickTimeProduct.Title); Assert.IsTrue(!string.IsNullOrEmpty(quickTimeProduct.Description)); Assert.IsTrue(quickTimeProduct.IconUrl.IndexOf("http://store.storeimages.cdn-apple.com/6270") == 0); }