Exemplo n.º 1
0
        public Post GetSinglePost(SearchResult result)
        {
            Post post = new Post(result);
            int currentChild = 1;
            var document = RetrieveDocumentAsync(result.Link);

            var body = document.GetElementbyId("ContainerMain");

            var content = body.ChildNodes.Where(node => node.Attributes.Contains("class") &&
                                             node.Attributes["class"].Value.Contains("content-border"))
                                      .FirstOrDefault();
            if (content == null)
            {
                throw new Exception("Couldn't find any div with a .content-border class");
            }
            // HAP Parses text as a #text node. ChildNodes[0] is always a #text node.
            // The next one is the one we're interested in.
            //Debug.WriteLine("Parsing the content");
            content = content.ChildNodes[1].ChildNodes.Where(node => node.Attributes.Contains("class") &&
                                                                     node.Attributes["class"].Value.Contains("lbcContainer"))
                                                      .FirstOrDefault();

            if (content == null)
            {
                throw new Exception("Couldn't find .lbcContainer");
            }

            // -- User Info
            post.UserName = StripLBCSpaces(content.ChildNodes[1].ChildNodes[3].ChildNodes[1].InnerText);
            post.UserMail = StripLBCSpaces(content.ChildNodes[1].ChildNodes[3].ChildNodes[1].Attributes["href"].Value);

            // -- Images
            // If there is no image, there will be an empty div in place of the image container
            // However, .print-lbcImages won't be there. We need to go up one div, or we'd try to access an unexistant div
            currentChild = 1;
            if (String.IsNullOrWhiteSpace(StripLBCSpaces(content.ChildNodes[5].ChildNodes[currentChild].InnerHtml)))
            {
                currentChild = 3;
                Debug.WriteLine("No images on node :" + result.Title);
            }
            else
            {
                currentChild = 5;
            }

            var infos = content.ChildNodes[5].ChildNodes[currentChild].ChildNodes[1];
            foreach (var node in infos.ChildNodes)
            {
                if (node.Name.Equals("tr"))
                {
                    //Debug.WriteLine("Found a tr");
                    post.Properties.Add(new Tuple<string, string>(StripLBCSpaces(node.ChildNodes[1].InnerText),
                                                                  StripLBCSpaces(node.ChildNodes[3].InnerText)));
                }
            }
            currentChild += 2;
            post.Description = StripLBCSpaces(content.ChildNodes[5].ChildNodes[currentChild].ChildNodes[3].InnerText);

            return post;
        }
Exemplo n.º 2
0
        private HtmlDocument RetrieveDocumentAsync(string uri)
        {
            Post p = new Post(null);

            Debug.WriteLine("Retrieving " + uri + "...");
            var connection = (HttpWebRequest)HttpWebRequest.Create(uri);
            var response = connection.GetResponse();

            HtmlDocument doc = new HtmlDocument();

            using (StreamReader sr = new StreamReader(response.GetResponseStream()))
            {
                string content = sr.ReadToEnd();
                //Debug.WriteLine("Document size : " + content.Length + " chars");
                doc.OptionAutoCloseOnEnd = true;
                doc.LoadHtml(content);
            }
            Debug.WriteLine("Done.");
            return doc;
        }
Exemplo n.º 3
0
        public Post GetSinglePost(SearchResult result)
        {
            Post post         = new Post(result);
            int  currentChild = 1;
            var  document     = RetrieveDocumentAsync(result.Link);

            var body = document.GetElementbyId("ContainerMain");


            var content = body.ChildNodes.Where(node => node.Attributes.Contains("class") &&
                                                node.Attributes["class"].Value.Contains("content-border"))
                          .FirstOrDefault();

            if (content == null)
            {
                throw new Exception("Couldn't find any div with a .content-border class");
            }
            // HAP Parses text as a #text node. ChildNodes[0] is always a #text node.
            // The next one is the one we're interested in.
            //Debug.WriteLine("Parsing the content");
            content = content.ChildNodes[1].ChildNodes.Where(node => node.Attributes.Contains("class") &&
                                                             node.Attributes["class"].Value.Contains("lbcContainer"))
                      .FirstOrDefault();

            if (content == null)
            {
                throw new Exception("Couldn't find .lbcContainer");
            }

            // -- User Info
            post.UserName = StripLBCSpaces(content.ChildNodes[1].ChildNodes[3].ChildNodes[1].InnerText);
            post.UserMail = StripLBCSpaces(content.ChildNodes[1].ChildNodes[3].ChildNodes[1].Attributes["href"].Value);

            // -- Images
            // If there is no image, there will be an empty div in place of the image container
            // However, .print-lbcImages won't be there. We need to go up one div, or we'd try to access an unexistant div
            currentChild = 1;
            if (String.IsNullOrWhiteSpace(StripLBCSpaces(content.ChildNodes[5].ChildNodes[currentChild].InnerHtml)))
            {
                currentChild = 3;
                Debug.WriteLine("No images on node :" + result.Title);
            }
            else
            {
                currentChild = 5;
            }


            var infos = content.ChildNodes[5].ChildNodes[currentChild].ChildNodes[1];

            foreach (var node in infos.ChildNodes)
            {
                if (node.Name.Equals("tr"))
                {
                    //Debug.WriteLine("Found a tr");
                    post.Properties.Add(new Tuple <string, string>(StripLBCSpaces(node.ChildNodes[1].InnerText),
                                                                   StripLBCSpaces(node.ChildNodes[3].InnerText)));
                }
            }
            currentChild    += 2;
            post.Description = StripLBCSpaces(content.ChildNodes[5].ChildNodes[currentChild].ChildNodes[3].InnerText);

            return(post);
        }