private Book SetBookData(Book book, string field, string value) { switch (field) { case "Edição": book.Edition = SafeConvert.ToInt(value); break; case "Editora": book.Publisher = value; break; case "ISBN": ISBN isbn = new ISBN(value); book.ISBN10 = isbn.ISBN10; book.ISBN13 = isbn.ISBN13; break; case "Ano": book.Year = SafeConvert.ToInt(value); break; case "Páginas": book.Pages = SafeConvert.ToInt(value); break; case "Tradutor": book.Translators = value; break; } return book; }
public void ParseBooks(object info) { JavaScriptSerializer oSerializer = new JavaScriptSerializer(); Parameters parameters = (Parameters)info; List<Book> books = null; string url = string.Empty; int httpCode = 0; for (int id = parameters.StartId; id <= parameters.EndId; id++) { books = new List<Book>(); url = string.Format(baseUrl, "livro/edicoes", id); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(GetContent(url, ref httpCode)); if (httpCode == 200) { Book book = null; HtmlNode title = doc.DocumentNode.SelectSingleNode("//div[preceding-sibling::div[@id='menubusca']]//div[position()=1]//h1"); string bookTitle = string.Empty; if (title != null) bookTitle = title.InnerText; HtmlNode subtitle = doc.DocumentNode.SelectSingleNode("//div[preceding-sibling::div[@id='menubusca']]//div[position()=1]//h2"); string bookSubTitle = string.Empty; if (subtitle != null) bookSubTitle = subtitle.InnerText; HtmlNode author = doc.DocumentNode.SelectSingleNode("//div[preceding-sibling::div[@id='menubusca']]//div[position()=1]//a[@class='l11']"); string bookAuthor = string.Empty; if (author != null) bookAuthor = author.InnerText; HtmlNodeCollection editions = doc.DocumentNode.SelectNodes("//div[preceding-sibling::div[@id='menubusca']]//div[position()=3]//div[@style='float:left; font-size:11px; font-family:arial; margin:10px 8px 0px 0px; width:250px; border:red 0px solid; line-height:18px;']"); if (editions != null) { foreach (HtmlNode edition in editions) { HtmlNode img = edition.SelectSingleNode(".//img"); string bookCover = string.Empty; if (img != null && img.Attributes["src"].Value != "/img/geral/semcapa_m.gif") bookCover = img.Attributes["src"].Value; string rx = @"(\w+):<\/span>\s([\w\s,]+)<br>"; MatchCollection matches = Regex.Matches(edition.InnerHtml, rx, RegexOptions.IgnoreCase); book = new Book(); book.Id = id; book.Title = bookTitle; book.Author = bookAuthor; book.SubTitle = bookSubTitle; book.Cover = bookCover; foreach (Match m in matches) { if (m.Success && m.Groups.Count == 3) SetBookData(book, m.Groups[1].Value, m.Groups[2].Value); } books.Add(book); } } CustomSave("Books", oSerializer.Serialize(books), parameters.StartId, parameters.EndId, id); } } }