private void ParseInfoBox(ImportedAlbumDataContract data, HtmlNode infoBox) { var statusRows = infoBox.SelectNodes("//p[@class='albumstatus']"); var artistRow = GetInfoElem(statusRows, "Artist"); if (artistRow != null) { var links = artistRow.SelectNodes("a"); data.ArtistNames = links.Select(l => l.InnerText).ToArray(); } var releaseDateRow = GetInfoElem(statusRows, "Release"); if (releaseDateRow != null) { DateTime releaseDate; if (DateTime.TryParseExact(releaseDateRow.Element("#text").InnerText, "yyyy.MM.dd", null, DateTimeStyles.None, out releaseDate)) { data.ReleaseYear = releaseDate.Year; } } var charaRow = GetInfoElem(statusRows, "Characters"); if (charaRow != null) { var charaImgs = charaRow.SelectNodes("a/img"); data.VocalistNames = charaImgs.Select(l => GetVocalistName(l.Attributes["src"].Value)).Where(l => l != null).ToArray(); } }
public void SetUp() { _karenTDoc = ResourceHelper.ReadHtmlDocument("KarenT_SystemindParadox.htm"); _importer = new KarenTAlbumImporter(new PictureDownloaderStub()); _importedAlbum = _importer.GetAlbumData(_karenTDoc, "http://"); _importedData = _importedAlbum.Data; }
public MikuDbAlbumContract GetAlbumData(HtmlDocument doc, string url) { var data = new ImportedAlbumDataContract(); var titleElem = doc.DocumentNode.SelectSingleNode("//div[@class = 'pgtitle_in']/h1/span"); if (titleElem != null) { data.Title = HtmlEntity.DeEntitize(titleElem.InnerText); } var mainPanel = doc.GetElementbyId("main_ref"); if (mainPanel != null) { var descBox = mainPanel.SelectSingleNode("p[@class = 'overview']"); if (descBox != null) { data.Description = descBox.InnerText; } var infoBox = mainPanel.SelectSingleNode("div[1]"); if (infoBox != null) { ParseInfoBox(data, infoBox); } var tracklistElem = mainPanel.SelectSingleNode("div[@class = 'songlistbox']"); if (tracklistElem != null) { ParseTracklist(data, tracklistElem); } } var coverElem = doc.DocumentNode.SelectSingleNode("//div[@id = 'sub_ref']/div[@class = 'artwork']/div/a/img"); PictureDataContract coverPic = null; if (coverElem != null) { coverPic = DownloadCoverPicture("https://karent.jp" + coverElem.Attributes["src"].Value); } return(new MikuDbAlbumContract(data) { CoverPicture = coverPic, SourceUrl = url }); }
public MikuDbAlbumContract Parse(Stream input) { var tracks = new List <ImportedAlbumTrack>(); var parser = new DataRowParser(); var data = new ImportedAlbumDataContract(); data.Title = "Unknown"; using (var reader = new StreamReader(input)) { string row; while ((row = reader.ReadLine()) != null) { if (!parser.IsConfigured) { parser.Configure(row); } else { var dataRow = new DataRow(parser, row); var albumName = dataRow.GetString(AlbumFileField.Album, string.Empty); if (albumName != string.Empty) { data.Title = albumName; } var year = dataRow.GetIntOrDefault(AlbumFileField.Year, 0); if (year != 0) { data.ReleaseYear = year; } var track = ParseTrack(dataRow, tracks.Count + 1); tracks.Add(track); } } } data.ArtistNames = tracks.SelectMany(t => t.ArtistNames).Distinct().ToArray(); data.VocalistNames = tracks.SelectMany(t => t.VocalistNames).Distinct().ToArray(); data.Tracks = tracks.OrderBy(t => t.TrackNum).ToArray(); return(new MikuDbAlbumContract(data)); }
private void ParseTracklist(ImportedAlbumDataContract data, HtmlNode tracklistElem) { var songElems = tracklistElem.SelectNodes("//div[@class = 'song']"); var tracks = new List <ImportedAlbumTrack>(); for (int i = 1; i <= songElems.Count; ++i) { var songLink = songElems[i - 1].Element("a"); var track = ParseTrackRow(i, songLink.InnerText); if (track != null) { tracks.Add(track); } } data.Tracks = tracks.ToArray(); }
private MikuDbAlbumContract GetAlbumData(HtmlDocument doc, string url) { var data = new ImportedAlbumDataContract(); string title = string.Empty; var titleElem = doc.DocumentNode.SelectSingleNode(".//h2[@class='posttitle']/a"); if (titleElem != null) { title = HtmlEntity.DeEntitize(titleElem.InnerText); } var coverPicLink = doc.DocumentNode.SelectSingleNode(".//div[@class='postcontent']/table/tr[1]/td[1]/a/img"); PictureDataContract coverPicture = null; if (coverPicLink != null) { var address = coverPicLink.Attributes["src"].Value; coverPicture = DownloadCoverPicture(address); } var infoBox = doc.DocumentNode.SelectSingleNode(".//div[@class='postcontent']/table/tr[1]/td[2]"); if (infoBox != null) { ParseInfoBox(data, infoBox); } var trackListRow = FindTracklistRow(doc, (infoBox != null ? infoBox.ParentNode.NextSibling : null)); if (trackListRow != null) { ParseTrackList(data, trackListRow); } return(new MikuDbAlbumContract { Title = title, Data = data, CoverPicture = coverPicture, SourceUrl = url }); }
private void ParseTrackList(ImportedAlbumDataContract data, HtmlNode cell) { var lines = cell.InnerText.Split(new[] { "<br>", "<br />", "\n" }, StringSplitOptions.RemoveEmptyEntries); int discNum = 1; var tracks = new List <ImportedAlbumTrack>(); foreach (var line in lines) { var dotPos = line.IndexOf('.'); if (dotPos <= 0) { continue; } var trackText = line.Substring(0, dotPos); if (int.TryParse(trackText, out int trackNum)) { if (trackNum == 1 && tracks.Any()) { discNum++; } var trackTitle = line.Substring(dotPos + 1, line.Length - dotPos - 1).Trim(); trackTitle = trackTitle.Replace("(lyrics)", string.Empty); tracks.Add(new ImportedAlbumTrack { DiscNum = discNum, Title = HtmlEntity.DeEntitize(trackTitle), TrackNum = trackNum }); } } data.Tracks = tracks.ToArray(); }
private void ParseInfoBox(ImportedAlbumDataContract data, HtmlNode infoBox) { var text = infoBox.InnerHtml; var rows = text.Split(new[] { "<br>", "<br />", "\n" }, StringSplitOptions.RemoveEmptyEntries); foreach (var row in rows) { var stripped = HtmlEntity.DeEntitize(StripHtml(row)); if (stripped.StartsWith("\"") && stripped.EndsWith("\"")) { stripped = stripped.Substring(1, stripped.Length - 2).Trim(); } if (LineMatch(stripped, "Artist") || LineMatch(stripped, "Artists")) { var artists = ParseArtists(stripped.Substring(8)); data.ArtistNames = artists; } else if (LineMatch(stripped, "Vocals")) { var vocals = ParseArtists(stripped.Substring(8)); data.VocalistNames = vocals; } else if (LineMatch(stripped, "Circle")) { var artists = ParseArtist(stripped.Substring(8)); data.CircleName = artists; } else if (LineMatch(stripped, "Year")) { int year; if (int.TryParse(stripped.Substring(6), out year)) { data.ReleaseYear = year; } } } }