void ParseImagesContent(HtmlNode node, Novel novel) { foreach (var screenNode in node.Descendants("a").Where(w => w.Descendants("img").Any())) { var isAdult = screenNode.HasClass("nsfw"); var image = new NovelImage() { UrlPath = screenNode.Attributes["href"].Value, IsAdult = isAdult, ImageType = isAdult ? NovelImageType.Event : NovelImageType.Sample }; novel.Images.Add(image); } }
void ParseCharactersContent(HtmlNode node, Novel novel) { foreach (var characterNode in node.Descendants("div").Where(w => w.HasClass("chardetails"))) { Character character = new Character(); NovelCharacterInfo novelCharacterInfo = new NovelCharacterInfo(); novelCharacterInfo.Character = character; var imageNode = characterNode.Descendants("img").FirstOrDefault(); if (imageNode != null) { var image = new NovelImage() { ImageType = NovelImageType.Character, UrlPath = imageNode.Attributes["src"].Value }; novelCharacterInfo.Image = image; novel.Images.Add(image); } var detailsTable = characterNode.Descendants("table").FirstOrDefault(); //Parse Name var headerWithName = detailsTable.FirstChild; var engNameNode = headerWithName.Descendants("a").FirstOrDefault(); if (engNameNode != null) character.EngName = engNameNode.InnerText; var japNameNode = headerWithName.Descendants("b").FirstOrDefault(); if (japNameNode != null) character.JapName = japNameNode.InnerText; var sexNode = headerWithName.Descendants("acronym").FirstOrDefault(); if (sexNode != null) character.Sex = GetSex(sexNode.Attributes["title"].Value); //Parse Sizes var measureRow = detailsTable.Descendants("td").FirstOrDefault(w => w.InnerText == "Measurements"); if (measureRow != null) { var measureNode = measureRow.NextSibling; if (measureNode != null) { var sizes = new Sizes(); var measureValue = measureNode.InnerText; var match = Regex.Match(measureValue, MeasurePattern); if (match.Success) { sizes.Bust = match.Groups[1].Value.ToNullableInt(); sizes.Waist = match.Groups[2].Value.ToNullableInt(); sizes.Hip = match.Groups[3].Value.ToNullableInt(); } character.Sizes = sizes; } } novel.CharacterInfos.Add(novelCharacterInfo); } }
void ParseImages(HtmlNode imagesTable, Novel novel) { if (imagesTable == null) return; foreach (var imageNode in imagesTable.Descendants("img")) { var image = new NovelImage() { ImageType = NovelImageType.Event, UrlPath = imageNode.GetchuImgValue(), IsAdult = true }; novel.Images.Add(image); } }
public Novel ParsePage(string url) { var novel = new Novel(); var document = HtmlHelper.LoadDocumentWithEncoding(url); var upperContainer = document.DocumentNode.Descendants("table").First(w => w.HasId("soft_table")); if (upperContainer == null) throw new Exception(String.Format(ErrorMessagePattern, url, "no upper content")); var upperHeader = upperContainer.Descendants("tr").First(); //TITLE var titleHeader = upperHeader.Descendants().First(w => w.HasId("soft-title")); novel.JapName = titleHeader.InnerText; //TITLE IMAGE var titleImageNode = upperHeader.Descendants("img").First().ParentNode; if (titleImageNode.Name == "a")//if "td" -> no image { var image = new NovelImage() { ImageType = NovelImageType.Title, UrlPath = titleImageNode.Attributes["href"].Value }; novel.Images.Add(image); } //INFO TABLE var infoTable = upperHeader.NextSibling("tr").Descendants("table").First(); var companyRow = infoTable.Descendants("td").First(w => CompanyTitles.Any(t=> w.InnerText.Contains(t))); var companyName = companyRow.NextSibling("td").Descendants("a").First().InnerText; var company = new Company() { JapName = companyName }; novel.Companies.Add(company); var releaseDateRow = infoTable.Descendants("td").First(w => w.InnerText.Contains(DateTitle)); var releaseDate = DateTime.Parse(releaseDateRow.NextSibling.Descendants("a").First().InnerText); novel.ReleaseDate = releaseDate; var artistRow = infoTable.Descendants("td").FirstOrDefault(w => w.InnerText.Contains(ArtistTitle)); if (artistRow != null) { var artistName = artistRow.NextSibling("td").Descendants("a").First().InnerText; var artist = new Artist() { JapName = artistName }; novel.Artists.Add(artist); } var tagsRow = infoTable.Descendants("td").FirstOrDefault(w => w.InnerText.Contains(TagsTitle)); if (tagsRow != null) { var novelTags = new List<Tag>(); var tagsContainer = tagsRow.NextSibling("td"); novelTags.AddRange(tagsContainer.InnerText.Replace(GenreLinkText, "").Split('、').Where(w => !String.IsNullOrEmpty(w)).Select(w => new Tag() { TagType = 1, TagValue = w })); foreach (var tag in novelTags) { novel.Tags.Add(tag); } } //CHARACTERS var characterTable = document.DocumentNode.Descendants("table").FirstOrDefault(w => w.Attr("width") == "96%"); if (characterTable != null) { foreach (var characterNode in characterTable.Descendants("tr")) { ParseCharacterNode(characterNode, novel); } } //IMAGES var storyTableHeaders = document.DocumentNode.Descendants("div").Where(w => w.HasClass("tabletitle") && _imageTableHeaders.Any(header => w.InnerText.Contains(header))); foreach (var storyTableHeader in storyTableHeaders) { ParseImages(storyTableHeader.NextSibling("div"), novel); } return novel; }
void ParseCharacterNode(HtmlNode characterNode, Novel novel) { if (characterNode.Descendants("td").Count() < 2) return;//border Character character = new Character(); NovelCharacterInfo novelCharacterInfo = new NovelCharacterInfo(); novelCharacterInfo.Character = character; //IMAGE var imageNode = characterNode.Descendants("td").First(); var imageTag = imageNode.Descendants("img").FirstOrDefault(); if (imageTag != null) { var image = new NovelImage() { ImageType = NovelImageType.Character, UrlPath = imageTag.GetchuImgValue() }; novelCharacterInfo.Image = image; novel.Images.Add(image); } //INFO var infoNode = imageNode.NextSibling("td"); ParseCharacterInfo(infoNode, novelCharacterInfo); //DETAILED IMAGE var detailedImageNode = infoNode.NextSibling("td"); if (detailedImageNode != null) { var detailedImageTag = detailedImageNode.Descendants("img").FirstOrDefault(); if (detailedImageTag != null) { var detailedImage = new NovelImage() { ImageType = NovelImageType.DetailedCharacter, UrlPath = detailedImageTag.GetchuImgValue() }; novelCharacterInfo.DetailedImage = detailedImage; novel.Images.Add(detailedImage); } } novel.CharacterInfos.Add(novelCharacterInfo); }