private void ParseProductShowPhoto(NodeList nodes) { NodeFilter show = new HasAttributeFilter("class", "Picture220"); NodeList showNodes = nodes.ExtractAllNodesThatMatch(show, true); ImageTag showTag = showNodes[0] as ImageTag; showTag.ImageURL = showTag.ImageURL.Replace("../../", "http://rrxf.cn/"); Console.WriteLine(showTag.ImageURL); DownloadPicture(showTag.ImageURL); }
private void ParseProductDemoPhoto(NodeList nodes) { NodeFilter photo = new HasAttributeFilter("class", "Picture40"); NodeList photoNodes = nodes.ExtractAllNodesThatMatch(photo, true); DownloadPictures(photoNodes); }
private static void ParseProductTitle(NodeList nodes) { NodeFilter title = new HasAttributeFilter("class", "prouductx"); NodeList titleNodes = nodes.ExtractAllNodesThatMatch(title, true); Console.WriteLine(titleNodes[0].ToPlainTextString()); }
private void ParsePorductDescribe(NodeList nodes) { NodeFilter miao = new HasAttributeFilter("class", "miao"); NodeList miaoArea = nodes.ExtractAllNodesThatMatch(miao, true); NodeFilter pictures = new NodeClassFilter(typeof(ImageTag)); NodeList pictureNodes = miaoArea.ExtractAllNodesThatMatch(pictures, true); DownloadPictures(pictureNodes); string miaoshu = miaoArea.AsHtml(); miaoshu = Regex.Replace(miaoshu, @"http\://(www\.|)rrxf\.cn/", pictureURL + "/", RegexOptions.IgnoreCase); miaoshu = Regex.Replace(miaoshu, @"(pic|bigpic)/", "$1_", RegexOptions.IgnoreCase); miaoshu = miaoshu.Replace("-", "_"); Console.WriteLine(miaoshu); }
public NodeList GetPicturesForDetailHtml(NodeList result) { NodeFilter imgFilter = new NodeClassFilter(typeof(ImageTag)); XorFilter xorFilter = new XorFilter(); string[] s = new string[] { "http://a.tbcdn.cn/sys/common/icon/btn/add_to_share.png", "http://img04.taobaocdn.com/tps/i4/T1qU4sXiXxXXXXXXXX-114-25.png" }; for(int i=0;i<s.Length;i++){ if(i==0){ xorFilter = new XorFilter(imgFilter, new HasAttributeFilter("src", s[i])); }else{ xorFilter = new XorFilter(xorFilter, new HasAttributeFilter("src", s[i])); } } NodeList imgResult = result.ExtractAllNodesThatMatch(xorFilter, true); return imgResult; }