public void ParseProductTest() { SpiderRrxf target = new SpiderRrxf(); // TODO: 初始化为适当的值 ATag a = new ATag(); // TODO: 初始化为适当的值 a.Link = "http://rrxf.cn/product/lookcp.php?cpid=5041"; target.ParseProduct(a); }
public void ParseProductsTest() { SpiderRrxf target = new SpiderRrxf(); // TODO: 初始化为适当的值 ATag a = new ATag(); // TODO: 初始化为适当的值 a.Link = "http://rrxf.cn/product/index.php?cplm=-252-"; target.ParseProducts(a); }
public void ParseProductTest() { SpiderRrxf target = new SpiderRrxf(); // TODO: 初始化为适当的值 ATag a = new ATag(); // TODO: 初始化为适当的值 a.Link = "http://www.rrxf.cn/product/lookcp.php?cpid=4163"; target.ParseProduct(a); Assert.Inconclusive("无法验证不返回值的方法。"); }
public void ParseProductsTest() { SpiderRrxf target = new SpiderRrxf(); // TODO: 初始化为适当的值 ATag a = new ATag(); // TODO: 初始化为适当的值 a.Link = "http://www.rrxf.cn/product/index.php?keyword=荷叶边蕾丝性感吊带睡裙"; target.ParseProducts(a); Assert.Inconclusive("无法验证不返回值的方法。"); }
public void GetJobInfoParserTest() { Category catalog = new Category(); // TODO: 初始化为适当的值 Get1010Jobs target = new Get1010Jobs(catalog); // TODO: 初始化为适当的值 string url = "http://sh.1010jz.com/html/shanghai/011_1522688.html"; // TODO: 初始化为适当的值 Job expected = null; // TODO: 初始化为适当的值 Job actual; ATag node = new ATag(); node.Link = url; actual = target.GetJobInfoParser(url); Console.WriteLine("title:{0}", actual.title); Console.WriteLine("email:{0}", actual.poster_email); }
private void ProcessLinkTag(ATag obTag) { LinkData obLinkData = new LinkData(this.m_obPageData, obTag); m_obPageData.m_Outlinks.Add(obLinkData); }
/// <summary> /// /// </summary> /// <param name="obPage"></param> /// <param name="obTag"></param> public LinkData(PageData obPage, ATag obTag) :this(obPage) { this.ConvertFromLinkTag(obTag); }
private void ConvertFromLinkTag(ATag obTag) { if (null == obTag) { throw new ArgumentNullException("obTag", "Null ATag object specified"); } base.ConvertFromTag(obTag); this.m_strText = obTag.LinkText; this.m_strUrl = obTag.Link; this.m_LinkType = LinkType.Outlink; if (obTag.MailLink) { this.m_LinkType = LinkType.MailLink; } else if (obTag.FTPLink) { this.m_LinkType = LinkType.Ftp; } else if (obTag.IRCLink) { this.m_LinkType = LinkType.Other; } }
public void ParseProducts(ATag a) { string html = GetHtml(a.Link.Replace("../", "http://rrxf.cn/")); Lexer lexer = new Lexer(html); Parser parser = new Parser(lexer); NodeFilter nav = new HasAttributeFilter("class", "photoyi"); NodeList navNodes = parser.Parse(nav); if (navNodes == null) return; int length = navNodes.Count; for (int i = 0; i < length; i++) { ATag link = ParseProductUrl(navNodes[i].ToHtml()); Console.WriteLine(link.Link); ParseProduct(link); } }
public void ParseProduct(ATag a) { string html = GetHtml(a.Link); Lexer lexer = new Lexer(html); Parser parser = new Parser(lexer); NodeFilter productArea = new HasAttributeFilter("id", "productyou"); NodeList nodes = parser.ExtractAllNodesThatMatch(productArea); ParseProductTitle(nodes); ParseProductShowPhoto(nodes); ParseProductDemoPhoto(nodes); ParsePorductDescribe(nodes); NodeFilter productAttributeArea = new HasAttributeFilter("class", "chans"); NodeList productAttributeAreaNodes = nodes.ExtractAllNodesThatMatch(productAttributeArea,true); NodeFilter productAttributes = new HasAttributeFilter("class", "cph"); NodeList productAttributeNodes = nodes.ExtractAllNodesThatMatch(productAttributes, true); int length = productAttributeNodes.Count; for (int i = 0; i < length; i++) { INode n = productAttributeNodes[i].Children[0]; string t =n.ToPlainTextString(); if (Regex.Match(t, @"^\s{0,}颜色", RegexOptions.IgnoreCase).Success) { ParseProductColors(n); } Console.WriteLine(); } }