Exemple #1
0
        public Objects.Post GetPostDetail(string link)
        {
            Objects.Post p = new Objects.Post();
            HtmlAgilityPack.HtmlDocument detailpage = new HtmlAgilityPack.HtmlDocument();
            detailpage.LoadUri(link);

            int startLine = detailpage.DocumentNode.SelectNodes("//h2").First().Line;
            int endLine = detailpage.DocumentNode.ChildNodes.Where(x => x.NodeType == HtmlNodeType.Comment && x.InnerHtml.Contains("END CLTAGS")).First().Line;

            StringBuilder sb = new StringBuilder();
            foreach (HtmlNode c in detailpage.DocumentNode.ChildNodes.Where(x => x.Line >= startLine && x.Line <= endLine))
                sb.Append(c.OuterHtml);
            p.Content = sb.ToString();

            if (detailpage.DocumentNode.SelectNodes("//table").Where(x => x.Attributes.Where(y => y.Name == "summary").Count() > 0 && x.Attributes["summary"].Value.ToString() == "craigslist hosted images").Count() > 0)
            {
                foreach (HtmlNode n in detailpage.DocumentNode.SelectNodes("//table").Where(x => x.Attributes.Where(y => y.Name == "summary").Count() > 0 && x.Attributes["summary"].Value.ToString() == "craigslist hosted images").First().DescendantNodes().Where(x => x.Name == "img"))
                {
                    Objects.PostImage pi = new Objects.PostImage();
                    pi.Link = n.Attributes["src"].Value.ToString();
                    p.PostImages.Add(pi);
                }
            }

            return p;
        }
Exemple #2
0
        public List<Objects.Post> GetPosts(string sitename, string category)
        {
            List<Objects.Post> posts = new List<Objects.Post>();

            HtmlAgilityPack.HtmlDocument page = new HtmlAgilityPack.HtmlDocument();
            page.LoadUri("http://" + sitename + ".craigslist.org/" + category);
            foreach (HtmlNode tempPost in page.DocumentNode.SelectNodes("//p"))
            {
                if (tempPost.DescendantNodes().Where(x => x.Name == "a").Count() > 0)
                {
                    Objects.Post p = new Objects.Post();

                    HtmlNode a = tempPost.DescendantNodes().Where(x => x.Name == "a").First();
                    p.Title = a.InnerText;
                    p.Link = a.Attributes["href"].Value.ToString();

                    string price = tempPost.InnerText.Split('-')[1];
                    if (price.Contains('$'))
                        p.Price = price.Split('(')[0];
                       // p.Price = Regex.Replace(price, "[^.0-9]", "");

                    if (tempPost.DescendantNodes().Where(x => x.Name == "font").Count() > 0)
                    {
                        string l = tempPost.DescendantNodes().Where(x => x.Name == "font").First().InnerText;
                        l = l.Substring(2, l.Length - 3);
                        if (l != "")
                            p.Location = l;
                    }

                    DateTime d = DateTime.Today;
                    if (page.DocumentNode.SelectNodes("//h4").Where(x => x.Attributes.Where(y => y.Name == "class").Count() > 0 && x.Attributes["class"].Value.ToString() == "ban" && x.Line <= tempPost.Line).Count() > 0)
                    {
                        string t1 = page.DocumentNode.SelectNodes("//h4").Where(x => x.Attributes.Where(y => y.Name == "class").Count() > 0 && x.Attributes["class"].Value.ToString() == "ban" && x.Line <= tempPost.Line).Last().InnerText;
                        d = DateTime.Parse(t1.Split(' ')[1] + "/" + t1.Split(' ')[2] + "/" + DateTime.Today.Year.ToString());
                    }

                    p.Date = d;

                    if (tempPost.DescendantNodes().Where(x => x.Name == "span" && x.Attributes.Where(y => y.Name == "class").Count() > 0 && x.Attributes["class"].Value.ToString() == "p" && (x.InnerText == "pic" || x.InnerText == "img")).Count() > 0)
                    {
                        HtmlAgilityPack.HtmlDocument detailpage = new HtmlAgilityPack.HtmlDocument();
                        detailpage.LoadUri(p.Link);

                        int startLine = detailpage.DocumentNode.SelectNodes("//h2").First().Line;
                        int endLine = detailpage.DocumentNode.ChildNodes.Where(x => x.NodeType == HtmlNodeType.Comment && x.InnerHtml.Contains("END CLTAGS")).First().Line;

                        StringBuilder sb = new StringBuilder();
                        foreach (HtmlNode c in detailpage.DocumentNode.ChildNodes.Where(x => x.Line >= startLine && x.Line <= endLine))
                            sb.Append(c.OuterHtml);
                        p.Content = sb.ToString();

                        if (detailpage.DocumentNode.SelectNodes("//table").Where(x => x.Attributes.Where(y => y.Name == "summary").Count() > 0 && x.Attributes["summary"].Value.ToString() == "craigslist hosted images").Count() > 0)
                        {
                            foreach (HtmlNode n in detailpage.DocumentNode.SelectNodes("//table").Where(x => x.Attributes.Where(y => y.Name == "summary").Count() > 0 && x.Attributes["summary"].Value.ToString() == "craigslist hosted images").First().DescendantNodes().Where(x => x.Name == "img"))
                            {
                                Objects.PostImage pi = new Objects.PostImage();
                                pi.Link = n.Attributes["src"].Value.ToString();
                                p.PostImages.Add(pi);
                            }
                        }
                    }

                    posts.Add(p);
                }
            }

            return posts;
        }