Beispiel #1
0
 public static string GetAttr(NSoup.Nodes.Element el, string name)
 {
     if (el != null)
     {
         return(el.Attributes[name] ?? null);
     }
     return(null);
 }
Beispiel #2
0
        public static 列表 <NSoup.Nodes.Element> Select(NSoup.Nodes.Element element, string cssQuery)
        {
            NSoup.Select.Elements    elements = element.Select(cssQuery);
            NSoup.Nodes.Element[]    earray   = elements.ToArray();
            列表 <NSoup.Nodes.Element> list     = new 列表 <NSoup.Nodes.Element>(earray);

            return(list);
        }
Beispiel #3
0
        public List <String> FindImages(String question, String userAgent)
        {
            List <String> imagesList = new List <String>();

            try
            {
                String googleUrl = "https://www.google.com/search?tbm=isch&q=" + question.Replace(",", "");

                NSoup.Nodes.Document htmlDoc = NSoupClient.Connect(googleUrl).UserAgent(userAgent).Timeout(10 * 1000).Get();
                //Handling correctly auto redirects...
                checkForRedirectsOnHTMLDocument(ref htmlDoc, userAgent);

                /*
                 * //This is old method
                 * NSoup.Select.Elements images = htmlDoc.Select("div.rg_di.rg_el.ivg-i img"); //div with class="rg_di rg_el ivg-i" containing img
                 * foreach (NSoup.Nodes.Element img in images) {
                 *  NSoup.Select.Elements links = img.Parent.Select("a[href]");
                 *  if (links.Count() > 0) { //is there a link around img?
                 *      NSoup.Nodes.Element link = img.Parent.Select("a[href]").First();
                 *      String href = img.Parent.Attr("abs:href"); //link which needs to be parsed to get the full img url
                 *      Regex regex = new Regex("imgurl=(.*?)&imgrefurl="); //Everything between "imgurl=" and "&imgrefurl="
                 *      var v = regex.Match(href);
                 *      if (v != null && v.Groups.Count == 2) {
                 *          if (v.Groups[1].Value != String.Empty) {
                 *              String imgURL = v.Groups[1].ToString();
                 *              imagesList.Add(imgURL);
                 *          }
                 *      }
                 *  }
                 * }
                 */
                NSoup.Select.Elements div_with_images = htmlDoc.Select("div.y.yi div.rg_di.rg_bx.rg_el.ivg-i");     //div with class="y yi" containing div with class="rg_di rg_bx rg_el ivg-i"
                foreach (NSoup.Nodes.Element div_with_image in div_with_images)
                {
                    NSoup.Nodes.Element rg_meta_div = div_with_image.Select("div.rg_meta").First();
                    String text_where_the_img_is    = rg_meta_div.ToString();
                    Regex  regex = new Regex("ou&quot;:&quot;(.*?)&quot;");    //Everything between "ou&quot;:&quot;" and "&quot;"
                    var    v     = regex.Match(text_where_the_img_is);
                    if (v != null && v.Groups.Count == 2)
                    {
                        if (v.Groups[1].Value != String.Empty)
                        {
                            String imgURL = v.Groups[1].ToString();
                            imagesList.Add(imgURL);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                this.Error = ex;
            }

            return(imagesList);
        }
Beispiel #4
0
        private void btnContent_Click(object sender, EventArgs e)
        {
            ContentHandle handle = new ContentHandle();
            string        url    = txtUrl.Text.Trim();
            string        html   = handle.ClearTag(url);

            NSoup.Nodes.Element list = handle.BodyElement(html);
            string content           = handle.GenerateElement(list);

            webContent.DocumentText = content;
        }
Beispiel #5
0
        /// <summary>
        /// 解析主页的访问用户
        /// </summary>
        /// <returns></returns>
        public void VisitParser()
        {
            this.Login();
            Result result   = this.Request(this.HomePageUrl);
            string pageHtml = result.Msg;

            if (string.IsNullOrWhiteSpace(pageHtml))
            {
                return;
            }
            NSoup.Nodes.Document  doc              = NSoup.NSoupClient.Parse(pageHtml);
            NSoup.Nodes.Element   wrapElement      = doc.GetElementById("show_style_01");
            NSoup.Select.Elements userElementNodes = wrapElement.GetElementsByTag("li");
            if (userElementNodes != null && userElementNodes.Count > 0)
            {
                //倒序排列,最新的在最后面
                IEnumerable <NSoup.Nodes.Element> userElements = userElementNodes.Reverse();
                foreach (NSoup.Nodes.Element userElement in userElements)
                {
                    NSoup.Nodes.Element picElement      = GetElementFirst(userElement.GetElementsByClass("pic"));
                    NSoup.Nodes.Element nameElement     = GetElementFirst(userElement.GetElementsByClass("user_name"));
                    NSoup.Nodes.Element userInfoElement = GetElementFirst(userElement.GetElementsByClass("user_info"));
                    NSoup.Nodes.Element dateElement     = GetElementFirst(userElement.GetElementsByClass("date"));
                    string   userName = nameElement == null ? "" : nameElement.Child(0).Text();
                    string   homePage = UriHelper.RemoveParams(nameElement == null ? "" : nameElement.Child(0).Attr("href"));
                    string   pic      = picElement.Child(0).Child(0).Attr("src");
                    DateTime date     = Convert.ToDateTime(dateElement.Text().Replace("到访:", ""));
                    string[] userInfo = StringHelper.SplitWhiteSpace(userInfoElement.Child(0).Text());
                    int      age      = Convert.ToInt32(userInfo[0].Replace("岁", ""));
                    string   addr     = userInfo.Length > 1 ? userInfo[1] : string.Empty;
                    string   userCode = homePage.Substring(homePage.LastIndexOf('/') + 1);

                    if (addr.Contains("广州") && !string.IsNullOrWhiteSpace(userCode))
                    {
                        FateUserInfo user = FateUserInfoManager.GetUser(userCode);
                        if (user == null)
                        {
                            user = new FateUserInfo()
                            {
                                CreateTime = DateTime.Now
                            };
                        }
                        user.ModifyTime   = DateTime.Now;
                        user.UserCode     = userCode;
                        user.Address      = addr;
                        user.Age          = age;
                        user.HeadFileName = pic;
                        user.UserName     = userName;

                        FateUserInfoManager.SaveOrUpdateUser(user);
                    }
                }
            }
        }
Beispiel #6
0
        /// <summary>
        /// 解析H3中的文本链接
        /// </summary>
        private string GetContentLink(NSoup.Nodes.Element el)
        {
            string content = el.Text();
            string link    = string.Empty;

            if (!string.IsNullOrWhiteSpace(content))
            {
                List <string> contents = Regex.Split(content, ":|:").ToList();
                if (contents != null && contents.Count > 1)
                {
                    contents.RemoveAt(0);
                    link = string.Join(":", contents);
                }
            }
            return(link);
        }
Beispiel #7
0
        private List <string> GetSpanContentLink(NSoup.Nodes.Element el)
        {
            string        content = el.Html();
            List <string> links   = new List <string>();

            if (!string.IsNullOrEmpty(content))
            {
                List <string> contents = Regex.Split(content, "<br>|<br />|<br/>|<br >", RegexOptions.IgnoreCase).ToList();
                if (contents != null && contents.Count > 1)
                {
                    foreach (string c in contents)
                    {
                        if (IsDownloadLink(c))
                        {
                            links.Add(c);
                        }
                    }
                }
            }
            return(links);
        }
Beispiel #8
0
 public static string GetText(NSoup.Nodes.Element el)
 {
     return(el == null ? null : el.Text());
 }
Beispiel #9
0
 private string GetHref(NSoup.Nodes.Element el)
 {
     return((el.Attr("href") ?? string.Empty).Trim());
 }