Ejemplo n.º 1
0
        private void AddBooksToPublisher(Publisher publisher, string url, List<string> visitedLinks, List<string> linksToVisit)
        {
            visitedLinks.Add(url);
            linksToVisit.Remove(url);
            HtmlDocument doc = new HtmlDocument();
            int httpCode = 0;
            doc.LoadHtml(GetContent(url, ref httpCode));

            if (httpCode == 200)
            {
                HtmlNodeCollection books = doc.DocumentNode.SelectNodes("//div[@id='conteudo']//a[not(@class='l11')]");
                if (books != null)
                {
                    string href = string.Empty;
                    foreach (HtmlNode link in books)
                    {
                        href = link.Attributes["href"].Value;
                        if (href.StartsWith("/editora/livros/"))
                        {
                            href = "http://www.skoob.com.br" + href;
                            if (!visitedLinks.Contains(href) && !linksToVisit.Contains(href))
                                linksToVisit.Add(href);
                        }
                        else
                        {
                            href = href.Replace("/livro/", "").Split('-')[0];
                            publisher.Books.Add(href);
                        }
                    }
                }
            }
        }
Ejemplo n.º 2
0
        public void ParsePublishers(object info)
        {
            JavaScriptSerializer oSerializer = new JavaScriptSerializer();
            Parameters parameters = (Parameters)info;
            string url = string.Empty;
            int httpCode = 0;
            foreach (int id in parameters.Ids)
            {
                url = string.Format(baseUrl, "editora", id);
                HtmlDocument doc = new HtmlDocument();
                doc.LoadHtml(GetContent(url, ref httpCode));

                if (httpCode == 200)
                {
                    Publisher publisher = new Publisher();
                    publisher.Id = id;

                    HtmlNode name = doc.DocumentNode.SelectSingleNode("//h1[@class='titulo_editora']");
                    publisher.Name = string.Empty;
                    if (name != null)
                        publisher.Name = name.InnerText;

                    HtmlNode description = doc.DocumentNode.SelectSingleNode("//div[@id='historico']");
                    publisher.Description = string.Empty;
                    if (description != null)
                        publisher.Description = description.InnerHtml;

                    HtmlNode image = doc.DocumentNode.SelectSingleNode("//div[@id='menu']//img");
                    publisher.Image = string.Empty;
                    if (image != null)
                        publisher.Image = "http://www.skoob.com.br" + image.Attributes["src"].Value;

                    HtmlNodeCollection authors = doc.DocumentNode.SelectNodes("//div[@class='viewport']//ul[@class='overview']//li//img");

                    if (authors != null)
                    {
                        string[] parts;
                        foreach (HtmlNode link in authors)
                        {
                            parts = link.Attributes["src"].Value.Split('/');
                            publisher.Authors.Add(parts[parts.Length-2]);
                        }
                    }

                    HtmlNode blog = doc.DocumentNode.SelectSingleNode("//a[text()='Blog']");
                    publisher.Blog = string.Empty;
                    if (blog != null)
                        publisher.Blog = blog.Attributes["href"].Value;

                    HtmlNode site = doc.DocumentNode.SelectSingleNode("//a[text()='Site oficial']");
                    publisher.Site = string.Empty;
                    if (site != null)
                        publisher.Site = site.Attributes["href"].Value;

                    string network = string.Empty;
                    HtmlNodeCollection networks = doc.DocumentNode.SelectNodes("//a[child::img[@class='logoredes']]");
                    if (networks != null)
                    {
                        foreach (HtmlNode link in networks)
                        {
                            publisher.AddNetworkLink(link.Attributes["href"].Value);
                        }
                    }

                    List<string> visitedLinks = new List<string>();
                    List<string> linksToVisit = new List<string>();
                    url = string.Concat(string.Format(baseUrl, "editora/livros", id), "/mpage:1");
                    AddBooksToPublisher(publisher, url, visitedLinks, linksToVisit);
                    while (linksToVisit.Count > 0)
                    {
                        AddBooksToPublisher(publisher, linksToVisit[0], visitedLinks, linksToVisit);
                    }

                    CustomSave("Publishers", oSerializer.Serialize(publisher), 0, 100, id);
                    SaveImage(publisher.Image, 0, 100, id, "Publishers");
                }
            }
        }