public async static Task ScrapeCompositionDetailPageAsync(string url, Composition composition, ClassicalMusicContext classicalMusicContext) { var compositionWebpage = composition.Links.FirstOrDefault(l => l.Url.Contains("klassika.info")); if (compositionWebpage == null) { compositionWebpage = new Link(); compositionWebpage.Compositions.Add(composition); compositionWebpage.Url = url; composition.Links.Add(compositionWebpage); } var webClient = new WebClient(); var htmlSource = await webClient.DownloadStringTaskAsync(new Uri(url)); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(htmlSource); var generalInformationHeader = htmlDocument.DocumentNode .Descendants("h2") .FirstOrDefault(n => n.InnerText == "Allgemeine Angaben zum Werk:"); if (generalInformationHeader != null) { var generalInformationTable = generalInformationHeader.NextSibling.NextSibling; var tableRows = generalInformationTable.Descendants("tr"); foreach (var tableRow in tableRows) { var tableDatas = tableRow .Elements("td") .ToArray(); var headerTableData = tableDatas[0]; var valueTableData = tableDatas[1]; var header = HtmlEntity.DeEntitize(headerTableData.InnerText); var value = HtmlEntity.DeEntitize(valueTableData.InnerText); if (header == "Tonart:") { var key = classicalMusicContext.Keys.FirstOrDefault(k => k.Name == value); if (key == null) { key = new Key(); key.Name = value; key.Compositions.Add(composition); composition.Key = key; classicalMusicContext.Keys.Add(key); } if (composition.Key == null) { composition.Key = key; } } else if (header == "Widmung:") { if (composition.Dedication == null) { composition.Dedication = value; } } else if (header == "Besetzung:") { var instrumentation = classicalMusicContext.Instrumentations.FirstOrDefault(i => i.Name == value); if (instrumentation == null) { instrumentation = new Instrumentation(); instrumentation.Name = value; instrumentation.Compositions.Add(composition); composition.Instrumentation = instrumentation; classicalMusicContext.Instrumentations.Add(instrumentation); } if (composition.Instrumentation == null) { composition.Instrumentation = instrumentation; } } else if (header == "Uraufführung:") { if (composition.Premiere == null) { composition.Premiere = value; } } else if (header == "Entstehungszeit:") { if (composition.Dates == null) { composition.Dates = value; } } else if (header == "Anlass:") { if (composition.Occasion == null) { composition.Occasion = value; } } else if (header == "Bemerkung:") { if (composition.Comment == null) { composition.Comment = value; } } } } var movementsHeader = htmlDocument.DocumentNode .Descendants("h2") .FirstOrDefault(n => n.InnerText == "Sätze:"); if (movementsHeader != null) { var movementTable = movementsHeader.NextSibling.NextSibling; var tableRows = movementTable.Descendants("tr"); foreach (var tableRow in tableRows) { var movementNumberTableData = tableRow .Elements("td") .ElementAt(0); var movementNameTableData = tableRow .Elements("td") .ElementAt(1); short movementNumber; if (!short.TryParse(HtmlEntity.DeEntitize(movementNumberTableData.InnerText).Replace(". Satz:", ""), out movementNumber)) { var tableRowsArray = tableRows.ToArray(); var tableRowIndex = Array.IndexOf(tableRowsArray, tableRow); movementNumber = Convert.ToInt16(tableRowIndex + 1); } var movementName = HtmlEntity.DeEntitize(movementNameTableData.InnerText); var movement = composition.Movements.FirstOrDefault(m => m.Number == movementNumber); if (movement == null) { movement = new Movement(); movement.Name = movementName; movement.Composition = composition; movement.Number = movementNumber; composition.Movements.Add(movement); } } } }
public async static Task ScrapeComposerDetailPageAsync(string url, Composer composer, ClassicalMusicContext classicalMusicContext, IProgress<double> progress = null, CancellationToken? cancellationToken = null) { var composerLink = composer.Links.FirstOrDefault(l => l.Url.Contains("klassika.info")); if (composerLink == null) { composerLink = new Link(); composerLink.Composers.Add(composer); composerLink.Url = url; composer.Links.Add(composerLink); } var urlParts = url.Split('/'); if (urlParts.Length < 2) { return; } var composerKey = urlParts.ElementAt(urlParts.Length - 2); await ScrapeCompositionsPageAsync($"http://www.klassika.info/Komponisten/{composerKey}/wv_abc.html", composer, classicalMusicContext, progress, cancellationToken); }