public ATag ParseProductUrl(string html) { Lexer lexer = new Lexer(html); Parser parser = new Parser(lexer); NodeFilter filter = new LinkRegexFilter(@"lookcp\.php\?cpid\=\d{0,}"); NodeList alist = parser.Parse(filter); ATag a = alist[0] as ATag; a.Link = "http://rrxf.cn/product/" + a.Link; return a; }
public void InitPage() { Lexer lexer = new Lexer(GetHtml(urlBase)); Parser parse = new Parser(lexer); parse.Encoding = "gb2312"; NodeFilter linkFilter = new LinkRegexFilter(@"s\=\d+\#J_FilterTabBar"); _linkResult = parse.Parse(linkFilter); }
public List<ATag> ParseCatelog(string html) { List<ATag> atags = new List<ATag>(); Lexer lexer = new Lexer(html); Parser parser = new Parser(lexer); NodeFilter nav = new HasAttributeFilter("class", "fenlei_list"); NodeList navNodes = parser.Parse(nav); NodeFilter catelog = new LinkRegexFilter(@"^\.\./product/index\.php\?cplm\=\-\d\d\d\-$"); catelog = new HasChildFilter(catelog); NodeList catelogNodes = navNodes[0].Children.ExtractAllNodesThatMatch(catelog); if(catelogNodes==null){ return atags; } int length = catelogNodes.Count; for (int i=0;i<length;i++) { INode node = catelogNodes[i]; ATag a = node.Children[0] as ATag; atags.Add(a); } return atags; }
public void GetLinkForPage(string url) { Lexer lexer = new Lexer(GetHtml(url)); Parser parse = new Parser(lexer); parse.Encoding = "gb2312"; NodeFilter linkFilter = new LinkRegexFilter(@"^http\://item\.taobao\.com/item\.htm\?id\=\d+$"); NodeFilter classFilter = new HasAttributeFilter("class", "EventCanSelect"); AndFilter andFilter = new AndFilter(linkFilter, classFilter); NodeList result = parse.Parse(andFilter); int length = result.Count; for (int i = 0; i < length; i++) { ItemLink.Add(result[i]); } }