Example #1
0
        public void ProcessIndex(int index)
        {
            var novel = new Novel();
            HtmlWeb htmlWeb = new HtmlWeb();

            HtmlDocument htmlDocument = htmlWeb.Load(String.Format(MainUrlPattern, index));

            var mainboxes = htmlDocument.DocumentNode.Descendants("div").Where(w => w.HasClass("mainbox")).ToArray();

            var mainContent = mainboxes[0];
            ParseMainContent(mainContent, novel);

            var releasesNode = htmlDocument.DocumentNode.Descendants("div").FirstOrDefault(w => w.HasClass("releases"));
            if (releasesNode != null)
            {
                ParseReleasesContent(releasesNode, novel);
            }

            var screenshotsNode = htmlDocument.DocumentNode.Descendants("div").FirstOrDefault(w => w.HasId("screenshots"));
            if (screenshotsNode != null)
            {
                ParseImagesContent(screenshotsNode, novel);
            }
            //staff (extract artists)
            htmlDocument = htmlWeb.Load(String.Format(StaffPattern, index));
            var staffNode = htmlDocument.DocumentNode.Descendants("div").FirstOrDefault(w => w.HasClass("staff") && w.NotContainsClass("cast"));
            if (staffNode != null)
            {
                ParseStaffContent(staffNode, novel);
            }
            //characters
            htmlDocument = htmlWeb.Load(String.Format(CharacterPattern, index));
            mainboxes = htmlDocument.DocumentNode.Descendants("div").Where(w => w.HasClass("mainbox")).ToArray();
            if (mainboxes.Length > 1)
            {
                for (int i = 1; i < mainboxes.Length; i++)
                {
                    ParseCharactersContent(mainboxes[i], novel);
                }
            }

            using (var ctx = new VNContext("VNConnectionString"))
            {
                NovelManager.SaveNovel(novel, ctx);
                Logs.Debug($@"Novel {index} finished");
            }

            Console.WriteLine(index + @" finished");
        }
Example #2
0
 void ParseStaffContent(HtmlNode node, Novel novel)
 {
     var artistRow = node.Descendants("td").FirstOrDefault(w => w.InnerText == "Artist");
     if (artistRow == null)
     {
         return;
     }
     while (true)
     {
         foreach (var artistNode in artistRow.ParentNode.Descendants("a"))
         {
             var engName = artistNode.InnerText;
             var japName = artistNode.Attributes["title"].Value;
             if (novel.Artists.All(w => w.EngName != engName))
             {
                 novel.Artists.Add(new Artist() { EngName = artistNode.InnerText, JapName = japName });
             }
             if (artistRow.NextSibling != null && artistRow.NextSibling.Name == "tr" && String.IsNullOrEmpty(artistRow.NextSibling.FirstChild.InnerText))
             {
                 artistRow = artistRow.NextSibling;
             }
             else
             {
                 return;
             }
         }
     }
 }
Example #3
0
        void ParseMainContent(HtmlNode node, Novel novel)
        {
            var engName = node.Descendants("h1").FirstOrDefault();
            if (engName != null)
                novel.EngName = engName.InnerText;
            var japName = node.Descendants("h2").FirstOrDefault();
            if (japName != null)
                novel.JapName = japName.InnerText;

            var imageNode = node.Descendants().First(w => w.HasClass("vnimg")).Descendants("img").First().Attributes["src"].Value;
            if (!String.IsNullOrEmpty(imageNode))
            {
                novel.Images.Add(new NovelImage() { LocalPath = imageNode, ImageType = NovelImageType.Title });
            }

            var developersTable = node.Descendants("td").FirstOrDefault(w => w.InnerText == "Developer");
            if (developersTable != null)
            {
                var developersRow = developersTable.NextSibling;
                foreach (var tableRow in developersRow.Descendants("a"))
                {
                    var companyName = tableRow.InnerText;
                    if (novel.Companies.All(w => w.EngName != companyName))
                    {
                        novel.Companies.Add(new Company() { EngName = tableRow.InnerText });
                    }

                }
            }

            var publishersTable = node.Descendants("td").FirstOrDefault(w => w.InnerText == "Publishers");
            if (publishersTable != null)
            {
                var publishersRow = publishersTable.NextSibling;
                foreach (var tableRow in publishersRow.Descendants("a"))
                {
                    var companyName = tableRow.InnerText;
                    if (novel.Companies.All(w => w.EngName != companyName))
                    {
                        novel.Companies.Add(new Company() { EngName = tableRow.InnerText });
                    }
                }
            }
        }
Example #4
0
 void ParseReleasesContent(HtmlNode node, Novel novel)
 {
     var releaseNodes = node.Descendants("td");
     List<DateTime> releaseDates = new List<DateTime>();
     foreach (var releaseNode in releaseNodes)
     {
         DateTime date;
         if (DateTime.TryParse(releaseNode.InnerText, out date))
         {
             releaseDates.Add(date);
         }
     }
     novel.ReleaseDate = releaseDates.OrderBy(w => w).FirstOrDefault();
 }
Example #5
0
 void ParseImagesContent(HtmlNode node, Novel novel)
 {
     foreach (var screenNode in node.Descendants("a").Where(w => w.Descendants("img").Any()))
     {
         var isAdult = screenNode.HasClass("nsfw");
         var image = new NovelImage() { UrlPath = screenNode.Attributes["href"].Value, IsAdult = isAdult, ImageType = isAdult ? NovelImageType.Event : NovelImageType.Sample };
         novel.Images.Add(image);
     }
 }
Example #6
0
        void ParseCharactersContent(HtmlNode node, Novel novel)
        {
            foreach (var characterNode in node.Descendants("div").Where(w => w.HasClass("chardetails")))
            {
                Character character = new Character();
                NovelCharacterInfo novelCharacterInfo = new NovelCharacterInfo();
                novelCharacterInfo.Character = character;
                var imageNode = characterNode.Descendants("img").FirstOrDefault();
                if (imageNode != null)
                {
                    var image = new NovelImage() { ImageType = NovelImageType.Character, UrlPath = imageNode.Attributes["src"].Value };
                    novelCharacterInfo.Image = image;
                    novel.Images.Add(image);
                }

                var detailsTable = characterNode.Descendants("table").FirstOrDefault();

                //Parse Name
                var headerWithName = detailsTable.FirstChild;

                var engNameNode = headerWithName.Descendants("a").FirstOrDefault();
                if (engNameNode != null) character.EngName = engNameNode.InnerText;

                var japNameNode = headerWithName.Descendants("b").FirstOrDefault();
                if (japNameNode != null) character.JapName = japNameNode.InnerText;

                var sexNode = headerWithName.Descendants("acronym").FirstOrDefault();
                if (sexNode != null) character.Sex = GetSex(sexNode.Attributes["title"].Value);

                //Parse Sizes
                var measureRow = detailsTable.Descendants("td").FirstOrDefault(w => w.InnerText == "Measurements");
                if (measureRow != null)
                {
                    var measureNode = measureRow.NextSibling;
                    if (measureNode != null)
                    {
                        var sizes = new Sizes();
                        var measureValue = measureNode.InnerText;
                        var match = Regex.Match(measureValue, MeasurePattern);
                        if (match.Success)
                        {
                            sizes.Bust = match.Groups[1].Value.ToNullableInt();
                            sizes.Waist = match.Groups[2].Value.ToNullableInt();
                            sizes.Hip = match.Groups[3].Value.ToNullableInt();

                        }
                        character.Sizes = sizes;
                    }
                }
                novel.CharacterInfos.Add(novelCharacterInfo);
            }
        }
Example #7
0
        public Novel ParsePage(string url)
        {
            var novel = new Novel();
            var document = HtmlHelper.LoadDocumentWithEncoding(url);
            var upperContainer = document.DocumentNode.Descendants("table").First(w => w.HasId("soft_table"));
            if (upperContainer == null) throw new Exception(String.Format(ErrorMessagePattern, url, "no upper content"));

            var upperHeader = upperContainer.Descendants("tr").First();
            //TITLE
            var titleHeader = upperHeader.Descendants().First(w => w.HasId("soft-title"));
            novel.JapName = titleHeader.InnerText;

            //TITLE IMAGE
            var titleImageNode = upperHeader.Descendants("img").First().ParentNode;
            if (titleImageNode.Name == "a")//if "td" -> no image
            {
                var image = new NovelImage() { ImageType = NovelImageType.Title, UrlPath = titleImageNode.Attributes["href"].Value };
                novel.Images.Add(image);
            }

            //INFO TABLE
            var infoTable = upperHeader.NextSibling("tr").Descendants("table").First();

            var companyRow = infoTable.Descendants("td").First(w => CompanyTitles.Any(t=> w.InnerText.Contains(t)));
            var companyName = companyRow.NextSibling("td").Descendants("a").First().InnerText;
            var company = new Company() { JapName = companyName };
            novel.Companies.Add(company);

            var releaseDateRow = infoTable.Descendants("td").First(w => w.InnerText.Contains(DateTitle));
            var releaseDate = DateTime.Parse(releaseDateRow.NextSibling.Descendants("a").First().InnerText);
            novel.ReleaseDate = releaseDate;

            var artistRow = infoTable.Descendants("td").FirstOrDefault(w => w.InnerText.Contains(ArtistTitle));
            if (artistRow != null)
            {
                var artistName = artistRow.NextSibling("td").Descendants("a").First().InnerText;
                var artist = new Artist() { JapName = artistName };
                novel.Artists.Add(artist);
            }

            var tagsRow = infoTable.Descendants("td").FirstOrDefault(w => w.InnerText.Contains(TagsTitle));
            if (tagsRow != null)
            {
                var novelTags = new List<Tag>();
                var tagsContainer = tagsRow.NextSibling("td");
                novelTags.AddRange(tagsContainer.InnerText.Replace(GenreLinkText, "").Split('、').Where(w => !String.IsNullOrEmpty(w)).Select(w => new Tag() { TagType = 1, TagValue = w }));
                foreach (var tag in novelTags)
                {
                    novel.Tags.Add(tag);
                }
            }

            //CHARACTERS

            var characterTable = document.DocumentNode.Descendants("table").FirstOrDefault(w => w.Attr("width") == "96%");
            if (characterTable != null)
            {

                foreach (var characterNode in characterTable.Descendants("tr"))
                {
                    ParseCharacterNode(characterNode, novel);
                }
            }

            //IMAGES
            var storyTableHeaders = document.DocumentNode.Descendants("div").Where(w => w.HasClass("tabletitle") && _imageTableHeaders.Any(header => w.InnerText.Contains(header)));
            foreach (var storyTableHeader in storyTableHeaders)
            {
                ParseImages(storyTableHeader.NextSibling("div"), novel);
            }
            return novel;
        }
Example #8
0
 void ParseImages(HtmlNode imagesTable, Novel novel)
 {
     if (imagesTable == null) return;
     foreach (var imageNode in imagesTable.Descendants("img"))
     {
         var image = new NovelImage() { ImageType = NovelImageType.Event, UrlPath = imageNode.GetchuImgValue(), IsAdult = true };
         novel.Images.Add(image);
     }
 }
Example #9
0
        void ParseCharacterNode(HtmlNode characterNode, Novel novel)
        {
            if (characterNode.Descendants("td").Count() < 2) return;//border
            Character character = new Character();
            NovelCharacterInfo novelCharacterInfo = new NovelCharacterInfo();
            novelCharacterInfo.Character = character;

            //IMAGE
            var imageNode = characterNode.Descendants("td").First();
            var imageTag = imageNode.Descendants("img").FirstOrDefault();
            if (imageTag != null)
            {
                var image = new NovelImage() { ImageType = NovelImageType.Character, UrlPath = imageTag.GetchuImgValue() };
                novelCharacterInfo.Image = image;
                novel.Images.Add(image);
            }

            //INFO
            var infoNode = imageNode.NextSibling("td");
            ParseCharacterInfo(infoNode, novelCharacterInfo);

            //DETAILED IMAGE
            var detailedImageNode = infoNode.NextSibling("td");
            if (detailedImageNode != null)
            {
                var detailedImageTag = detailedImageNode.Descendants("img").FirstOrDefault();
                if (detailedImageTag != null)
                {
                    var detailedImage = new NovelImage() { ImageType = NovelImageType.DetailedCharacter, UrlPath = detailedImageTag.GetchuImgValue() };
                    novelCharacterInfo.DetailedImage = detailedImage;
                    novel.Images.Add(detailedImage);
                }
            }
            novel.CharacterInfos.Add(novelCharacterInfo);
        }