public override async Task <ImportStats> ImportDataForArtist(Artist artist, ArtistUpstreamSource src, PerformContext ctx) { var stats = new ImportStats(); await PreloadData(artist); var contents = await FetchUrl(PanicIndexUrl(), ctx); var matches = ShowDirMatcher.Matches(contents); ctx?.WriteLine($"Check {matches.Count} subdirectories"); var prog = ctx?.WriteProgressBar(); var counter = 1; foreach (Match match in ShowDirMatcher.Matches(contents)) { var panicDate = match.Groups[1].Value; var panicRecLetter = match.Groups[2].Value; // 27-Jul-2016 19:14 var panicUpdatedAt = DateTime.ParseExact(match.Groups[3].Value.Trim(), "dd-MMM-yyyy HH:mm", CultureInfo.InvariantCulture); await ProcessShow(stats, artist, panicDate, panicRecLetter, panicUpdatedAt, ctx); prog.SetValue(100.0 * counter / matches.Count); counter++; } await RebuildShows(artist); await RebuildYears(artist); return(stats); }
public override async Task <ImportStats> ImportDataForArtist(Artist artist, ArtistUpstreamSource src, PerformContext ctx) { int page = 1; Tuple <bool, ImportStats> result = null; var stats = ImportStats.None; await PreloadData(artist); var prog = ctx?.WriteProgressBar(); do { result = await ProcessSetlistPage(artist, await this.http.GetAsync(SetlistUrlForArtist(artist, page)), ctx, prog); // max 10 per second await Task.Delay(100); page++; stats += result.Item2; } while (result != null && result.Item1); if (artist.features.tours) { await UpdateTourStartEndDates(artist); } // update shows await RebuildShows(artist); // update years await RebuildYears(artist); return(stats); }
public override async Task <ImportStats> ImportDataForArtist(Artist artist, ArtistUpstreamSource src, PerformContext ctx) { uint page = 1; var stats = new ImportStats(); await PreloadData(artist); ctx?.WriteLine($"Requesting page #{page}"); while (await ImportPage(artist, stats, ctx, await http.GetAsync(UrlForArtist(src, page)))) { page++; await Task.Delay(100); ctx?.WriteLine($"Requesting page #{page}"); } ctx?.WriteLine("Updating tour start/end dates"); await UpdateTourStartEndDates(artist); // update shows await RebuildShows(artist); // update years await RebuildYears(artist); return(stats); }
IEnumerable <Link> LinksForSource(Artist artist, Source dbSource, ArtistUpstreamSource src) { var links = new List <Link> { new Link { source_id = dbSource.id, for_ratings = true, for_source = true, for_reviews = true, upstream_source_id = src.upstream_source_id, url = "https://archive.org/details/" + dbSource.upstream_identifier, label = "View on archive.org" } }; if (artist.upstream_sources.Any(s => s.upstream_source_id == 6 /* setlist.fm */)) { links.Add(new Link() { source_id = dbSource.id, for_ratings = false, for_source = false, for_reviews = false, upstream_source_id = 6 /* setlist.fm */, url = "https://www.setlist.fm/", label = "Setlist Information from setlist.fm" }); } return(links); }
public override async Task <ImportStats> ImportDataForArtist(Artist artist, ArtistUpstreamSource src, PerformContext ctx) { await PreloadData(artist); var stats = new ImportStats(); ctx?.WriteLine("Processing Eras"); stats += await ProcessEras(artist, ctx); ctx?.WriteLine("Processing Tours"); stats += await ProcessTours(artist, ctx); ctx?.WriteLine("Processing Songs"); stats += await ProcessSongs(artist, ctx); ctx?.WriteLine("Processing Venues"); stats += await ProcessVenues(artist, ctx); ctx?.WriteLine("Processing Shows"); stats += await ProcessShows(artist, src, ctx); ctx?.WriteLine("Rebuilding"); await RebuildShows(artist); await RebuildYears(artist); return(stats); //return await ProcessIdentifiers(artist, await this.http.GetAsync(SearchUrlForArtist(artist))); }
public override async Task <ImportStats> ImportDataForArtist(Artist artist, ArtistUpstreamSource src, PerformContext ctx) { var stats = new ImportStats(); await PreloadData(artist); var resp = await http.GetAsync(ShowPagesListingUrl(src)); var showFilesResponse = await resp.Content.ReadAsStringAsync(); var showFiles = JsonConvert.DeserializeObject <List <string> >(showFilesResponse); var files = showFiles .Select(f => { var fileName = Path.GetFileName(f); return(new FileMetaObject { DisplayDate = fileName.Substring(0, 10), Date = DateTime.Parse(fileName.Substring(0, 10)), FilePath = f, Identifier = fileName.Remove(fileName.LastIndexOf(".html", StringComparison.OrdinalIgnoreCase)) }); }) .ToList() ; ctx?.WriteLine($"Checking {files.Count} html files"); var prog = ctx?.WriteProgressBar(); await files.AsyncForEachWithProgress(prog, async f => { if (existingSetlistShows.ContainsKey(f.Identifier)) { return; } var url = ShowPageUrl(src, f.FilePath); var pageResp = await http.GetAsync(url); var pageContents = await pageResp.Content.ReadAsStringAsync(); await ProcessPage(stats, artist, f, pageContents, pageResp.Content.Headers.LastModified?.UtcDateTime ?? DateTime.UtcNow, ctx); }); if (artist.features.tours) { await UpdateTourStartEndDates(artist); } ctx.WriteLine("Rebuilding shows and years"); // update shows await RebuildShows(artist); // update years await RebuildYears(artist); return(stats); }
public override async Task <ImportStats> ImportDataForArtist(Artist artist, ArtistUpstreamSource src, PerformContext ctx) { var stats = new ImportStats(); await PreloadData(artist); var contents = await FetchUrl(PanicIndexUrl(), ctx); var tracks = JsonConvert.DeserializeObject <List <PanicStream.PanicStreamTrack> >(contents); var tracksByShow = tracks .Where(t => t.SourceName != null) .GroupBy(t => t.ShowDate) .Select(g => new { ShowDate = g.Key, Sources = g .GroupBy(subg => subg.SourceName) .Select(subg => new { SourceName = subg.Key, Tracks = subg.ToList() }) }) .ToList(); ctx?.WriteLine($"Found {tracksByShow.Count} shows"); var prog = ctx?.WriteProgressBar(); await tracksByShow.AsyncForEachWithProgress(prog, async grp => { foreach (var source in grp.Sources) { try { await ProcessShow(stats, artist, src, grp.ShowDate, source.SourceName, source.Tracks, ctx); } catch (Exception e) { ctx?.WriteLine("EXCEPTION: " + e.Message); ctx?.WriteLine("Source name: " + source.SourceName); ctx?.WriteLine(e.ToString()); ctx?.WriteLine(JsonConvert.SerializeObject(source)); } } }); ctx?.WriteLine("Rebuilding shows..."); await RebuildShows(artist); ctx?.WriteLine("Rebuilding years..."); await RebuildYears(artist); return(stats); }
public override async Task <ImportStats> ImportDataForArtist(Artist artist, ArtistUpstreamSource src, PerformContext ctx) { var stats = new ImportStats(); var shows = (await _sourceService.AllForArtist(artist)).OrderBy(s => s.display_date).ToList(); var prog = ctx?.WriteProgressBar(); ctx?.WriteLine($"Processing {shows.Count} shows"); await shows.ForEachAsync(async dbSource => { stats += await ProcessSource(artist, src, dbSource, ctx); }, prog, 1); ctx?.WriteLine("Rebuilding..."); await RebuildShows(artist); await RebuildYears(artist); return(stats); }
public override async Task <ImportStats> ImportDataForArtist(Artist artist, ArtistUpstreamSource src, PerformContext ctx) { var stats = new ImportStats(); await PreloadData(artist); var files = Directory.EnumerateFiles(ShowFilesDirectory) .Where(f => f.EndsWith(".html", StringComparison.OrdinalIgnoreCase)) .Select(f => { var fileName = Path.GetFileName(f); return(new FileMetaObject { DisplayDate = fileName.Substring(0, 10), Date = DateTime.Parse(fileName.Substring(0, 10)), FilePath = f, Identifier = fileName.Remove(fileName.LastIndexOf(".html", StringComparison.OrdinalIgnoreCase)) }); }) .ToList() ; ctx?.WriteLine($"Checking {files.Count} html files"); var prog = ctx?.WriteProgressBar(); await files.AsyncForEachWithProgress(prog, async f => { await ProcessPage(stats, artist, f, File.ReadAllText(f.FilePath), ctx); }); if (artist.features.tours) { await UpdateTourStartEndDates(artist); } return(stats); }
public override async Task <ImportStats> ImportDataForArtist(Artist artist, ArtistUpstreamSource src, PerformContext ctx) { return(await ImportSpecificShowDataForArtist(artist, src, null, ctx)); }
public abstract Task <ImportStats> ImportDataForArtist(Artist artist, ArtistUpstreamSource src, PerformContext ctx);
public override async Task <ImportStats> ImportSpecificShowDataForArtist(Artist artist, ArtistUpstreamSource src, string showIdentifier, PerformContext ctx) { await PreloadData(artist); var url = SearchUrlForArtist(artist, src); ctx?.WriteLine($"All shows URL: {url}"); return(await ProcessIdentifiers(artist, await this.http.GetAsync(url), src, showIdentifier, ctx)); }
private async Task <ImportStats> ProcessSource(Artist artist, ArtistUpstreamSource src, Source dbSource, PerformContext ctx) { var stats = new ImportStats(); var ratings = await ScrapePhishNetForSource(dbSource, ctx); var dirty = false; if (dbSource.num_ratings != ratings.RatingVotesCast) { dbSource.num_ratings = ratings.RatingVotesCast; dbSource.avg_rating = ratings.RatingAverage * 2.0; dirty = true; } if (dbSource.num_reviews != ratings.NumberOfReviewsWritten) { var reviewsTask = GetPhishNetApiReviews(dbSource, ctx); var setlistTask = GetPhishNetApiSetlist(dbSource, ctx); await Task.WhenAll(reviewsTask, setlistTask); var dbReviews = reviewsTask.Result.Select(rev => { return(new SourceReview() { rating = null, title = null, review = rev.review, author = rev.author, updated_at = DateTimeOffset.FromUnixTimeSeconds(rev.tstamp).UtcDateTime }); }).ToList(); dbSource.num_reviews = dbReviews.Count(); dbSource.description = setlistTask.Result.setlistnotes + "\n\n\n" + setlistTask.Result.setlistdata; dirty = true; await ReplaceSourceReviews(stats, dbSource, dbReviews); } if (dirty) { await _sourceService.Save(dbSource); stats.Updated++; } stats.Created += (await linkService.AddLinksForSource(dbSource, new[] { new Link { source_id = dbSource.id, for_ratings = true, for_source = false, for_reviews = true, upstream_source_id = src.upstream_source_id, url = PhishNetUrlForSource(dbSource), label = "View on phish.net" } })).Count(); return(stats); }
private async Task <Source> ProcessShow(ImportStats stats, Artist artist, PhishinShow fullShow, ArtistUpstreamSource src, Source dbSource, PerformContext ctx) { dbSource.has_jamcharts = fullShow.tags.Count(t => t.name == "Jamcharts") > 0; dbSource = await _sourceService.Save(dbSource); var sets = new Dictionary <string, SourceSet>(); foreach (var track in fullShow.tracks) { var set = sets.GetValue(track.set); if (set == null) { set = new SourceSet() { source_id = dbSource.id, index = SetIndexForIdentifier(track.set), name = track.set_name, is_encore = track.set[0] == 'E', updated_at = dbSource.updated_at }; // this needs to be set after loading from the db set.tracks = new List <SourceTrack>(); sets[track.set] = set; } } var setMaps = (await _sourceSetService.UpdateAll(dbSource, sets.Values)) .GroupBy(s => s.index) .ToDictionary(kvp => kvp.Key, kvp => kvp.Single()); foreach (var kvp in setMaps) { kvp.Value.tracks = new List <SourceTrack>(); } foreach (var track in fullShow.tracks) { var set = setMaps[SetIndexForIdentifier(track.set)]; set.tracks.Add(new SourceTrack() { source_set_id = set.id, source_id = dbSource.id, title = track.title, duration = track.duration / 1000, track_position = track.position, slug = SlugifyTrack(track.title), mp3_url = track.mp3.Replace("http:", "https:"), updated_at = dbSource.updated_at, artist_id = artist.id }); } stats.Created += (await _sourceTrackService.InsertAll(dbSource, setMaps.SelectMany(kvp => kvp.Value.tracks))).Count(); await ProcessSetlistShow(stats, fullShow, artist, src, dbSource, sets); ResetTrackSlugCounts(); return(dbSource); }
string ShowPageUrl(ArtistUpstreamSource src, string filename) { return($"https://phish.alecgorge.com/relisten/{src.upstream_identifier}/show_pages/{filename}"); }
public override Task <ImportStats> ImportSpecificShowDataForArtist(Artist artist, ArtistUpstreamSource src, string showIdentifier, PerformContext ctx) { return(Task.FromResult(new ImportStats())); }
private async Task <ImportStats> ImportSingleIdentifier( Artist artist, Source dbSource, Relisten.Vendor.ArchiveOrg.SearchDoc searchDoc, Relisten.Vendor.ArchiveOrg.Metadata.RootObject detailsRoot, ArtistUpstreamSource upstreamSrc, string properDisplayDate, PerformContext ctx ) { var stats = new ImportStats(); var upstream_identifier = searchDoc.identifier; var isUpdate = dbSource != null; var meta = detailsRoot.metadata; var mp3Files = detailsRoot.files?.Where(file => file?.format == "VBR MP3"); var flacFiles = detailsRoot.files?.Where(file => file?.format == "Flac" || file?.format == "24bit Flac"); if (mp3Files == null || mp3Files.Count() == 0) { ctx?.WriteLine("\tNo VBR MP3 files found for {0}", searchDoc.identifier); throw new NoVBRMp3FilesException(); } var dbReviews = detailsRoot.reviews == null ? new List <SourceReview>() : detailsRoot.reviews.Select(rev => { return(new SourceReview() { rating = rev.stars * 2, // scale to out of 10 title = rev.reviewtitle, review = rev.reviewbody?.Replace("Â", "") ?? "", author = rev.reviewer, updated_at = rev.reviewdate }); }).ToList(); Venue dbVenue = null; if (artist.features.per_source_venues) { var venueName = String.IsNullOrEmpty(meta.venue) ? meta.coverage : meta.venue; if (String.IsNullOrEmpty(venueName)) { venueName = "Unknown Venue"; } var venueUpstreamId = venueName + (String.IsNullOrEmpty(meta.coverage) ? "blank coverage" : meta.coverage); dbVenue = await _venueService.ForUpstreamIdentifier(artist, venueUpstreamId); if (dbVenue == null) { dbVenue = await _venueService.Save(new Venue() { artist_id = artist.id, name = venueName, location = String.IsNullOrEmpty(meta.coverage) ? "Unknown Location" : meta.coverage, upstream_identifier = venueUpstreamId, slug = Slugify(venueName), updated_at = searchDoc._iguana_updated_at }); } } if (isUpdate) { var src = CreateSourceForMetadata(artist, detailsRoot, searchDoc, properDisplayDate); src.id = dbSource.id; src.venue_id = dbVenue.id; dbSource = await _sourceService.Save(src); dbSource.venue = dbVenue; stats.Updated++; stats.Created += (await ReplaceSourceReviews(dbSource, dbReviews)).Count(); } else { dbSource = await _sourceService.Save(CreateSourceForMetadata(artist, detailsRoot, searchDoc, properDisplayDate, dbVenue)); stats.Created++; existingSources[dbSource.upstream_identifier] = dbSource; stats.Created += (await ReplaceSourceReviews(dbSource, dbReviews)).Count(); } stats.Created += (await linkService.AddLinksForSource(dbSource, LinksForSource(artist, dbSource, upstreamSrc))).Count(); var dbSet = (await _sourceSetService.UpdateAll(dbSource, new[] { CreateSetForSource(dbSource) })).First(); stats.Created++; var flacTracksByName = flacFiles.GroupBy(f => f.name).ToDictionary(g => g.Key, g => g.First()); var dbTracks = CreateSourceTracksForFiles(artist, dbSource, meta, mp3Files, flacTracksByName, dbSet); stats.Created += (await _sourceTrackService.InsertAll(dbSource, dbTracks)).Count(); ResetTrackSlugCounts(); return(stats); }
private string SearchUrlForArtist(Artist artist, ArtistUpstreamSource src) { return($"http://archive.org/advancedsearch.php?q=collection%3A{src.upstream_identifier}&fl%5B%5D=date&fl%5B%5D=identifier&fl%5B%5D=year&fl%5B%5D=oai_updatedate&sort%5B%5D=year+asc&sort%5B%5D=&sort%5B%5D=&rows=9999999&page=1&output=json&save=yes"); }
public override async Task <ImportStats> ImportDataForArtist(Artist artist, ArtistUpstreamSource src, PerformContext ctx) { await PreloadData(artist); return(await ProcessIdentifiers(artist, await this.http.GetAsync(SearchUrlForArtist(artist, src)), ctx)); }
public abstract Task <ImportStats> ImportSpecificShowDataForArtist(Artist artist, ArtistUpstreamSource src, string showIdentifier, PerformContext ctx);
string UrlForArtist(ArtistUpstreamSource src, uint page) { return($"https://www.phantasytour.com/api/bands/{uint.Parse(src.upstream_identifier)}/shows?pageSize={ITEMS_PER_PAGE}&page={page}"); }
public async Task <ImportStats> ProcessShows(Artist artist, ArtistUpstreamSource src, PerformContext ctx) { var stats = new ImportStats(); var pages = 80; var prog = ctx?.WriteProgressBar(); var pageSize = 20; for (var currentPage = 1; currentPage <= pages; currentPage++) { var apiShows = await PhishinApiRequest <IEnumerable <PhishinShow> >("shows", ctx, "date", per_page : pageSize, page : currentPage); pages = apiShows.total_pages; var shows = apiShows.data.ToList(); foreach (var(idx, show) in shows.Select((s, i) => (i, s))) { try { await processShow(show); } catch (Exception e) { ctx?.WriteLine($"Error processing show (but continuing): {show.date} (id: {show.id})"); ctx?.LogException(e); } prog?.SetValue(100.0 * ((currentPage - 1) * pageSize + idx + 1) / apiShows.total_entries); } } async Task processShow(PhishinShow show) { using (var scope = new TransactionScope(TransactionScopeAsyncFlowOption.Enabled)) { var dbSource = existingSources.GetValue(show.id.ToString()); if (dbSource == null) { dbSource = await ProcessShow(stats, artist, show, src, new Source() { updated_at = show.updated_at, artist_id = artist.id, venue_id = existingVenues[show.venue.id.ToString()].id, display_date = show.date, upstream_identifier = show.id.ToString(), is_soundboard = show.sbd, is_remaster = show.remastered, description = "", taper_notes = show.taper_notes }, ctx); existingSources[dbSource.upstream_identifier] = dbSource; stats.Created++; stats.Created += (await linkService.AddLinksForSource(dbSource, new[] { new Link { source_id = dbSource.id, for_ratings = false, for_source = true, for_reviews = false, upstream_source_id = src.upstream_source_id, url = $"http://phish.in/{dbSource.display_date}", label = "View on phish.in" } })).Count(); } else if (show.updated_at > dbSource.updated_at) { dbSource.updated_at = show.updated_at; dbSource.venue_id = existingVenues[show.venue.id.ToString()].id; dbSource.display_date = show.date; dbSource.upstream_identifier = show.id.ToString(); dbSource.is_soundboard = show.sbd; dbSource.is_remaster = show.remastered; dbSource.description = ""; dbSource.taper_notes = show.taper_notes; dbSource = await ProcessShow(stats, artist, show, src, dbSource, ctx); existingSources[dbSource.upstream_identifier] = dbSource; stats.Updated++; } scope.Complete(); } } return(stats); }
private async Task ProcessSetlistShow(ImportStats stats, PhishinShow show, Artist artist, ArtistUpstreamSource src, Source dbSource, IDictionary <string, SourceSet> sets) { var dbShow = existingSetlistShows.GetValue(show.date); var addSongs = false; if (dbShow == null) { dbShow = await _setlistShowService.Save(new SetlistShow() { artist_id = artist.id, upstream_identifier = show.date, date = DateTime.Parse(show.date), venue_id = existingVenues[show.venue.id.ToString()].id, tour_id = existingTours[show.tour_id.ToString()].id, era_id = yearToEraMapping.GetValue(show.date.Substring(0, 4), yearToEraMapping["1983-1987"]).id, updated_at = dbSource.updated_at }); stats.Created++; addSongs = true; } else if (show.updated_at > dbShow.updated_at) { dbShow.date = DateTime.Parse(show.date); dbShow.venue_id = existingVenues[show.venue.id.ToString()].id; dbShow.tour_id = existingTours[show.tour_id.ToString()].id; dbShow.era_id = yearToEraMapping.GetValue(show.date.Substring(0, 4), yearToEraMapping["1983-1987"]).id; dbShow.updated_at = dbSource.updated_at; dbShow = await _setlistShowService.Save(dbShow); stats.Updated++; addSongs = true; } if (addSongs) { var dbSongs = show.tracks. SelectMany(phishinTrack => phishinTrack.song_ids.Select(song_id => existingSetlistSongs.GetValue(song_id.ToString()))). Where(t => t != null). GroupBy(t => t.upstream_identifier). Select(g => g.First()). ToList() ; stats += await _setlistShowService.UpdateSongPlays(dbShow, dbSongs); } }
string ShowPagesListingUrl(ArtistUpstreamSource src) { return($"https://phish.alecgorge.com/relisten/{src.upstream_identifier}/show_pages.json"); }
private async Task ProcessShow(ImportStats stats, Artist artist, ArtistUpstreamSource upstreamSrc, string showDate, string sourceName, IList <PanicStream.PanicStreamTrack> sourceTracks, PerformContext ctx) { var upstreamId = sourceName; var dbSource = existingSources.GetValue(upstreamId); var panicUpdatedAt = sourceTracks .Where(t => t.System.ParsedModificationTime.HasValue) .Max(t => t.System.ParsedModificationTime.Value); if (dbSource != null && dbSource.updated_at <= panicUpdatedAt) { return; } var isUpdate = dbSource != null; var src = new Source { artist_id = artist.id, display_date = showDate, is_soundboard = false, is_remaster = false, has_jamcharts = false, avg_rating = 0, num_reviews = 0, avg_rating_weighted = 0, upstream_identifier = upstreamId, taper_notes = "", updated_at = panicUpdatedAt }; if (isUpdate) { src.id = dbSource.id; } dbSource = await _sourceService.Save(src); existingSources[dbSource.upstream_identifier] = dbSource; if (isUpdate) { stats.Updated++; } else { stats.Created++; stats.Created += (await linkService.AddLinksForSource(dbSource, new[] { new Link { source_id = dbSource.id, for_ratings = false, for_source = true, for_reviews = false, upstream_source_id = upstreamSrc.upstream_source_id, url = $"https://www.panicstream.com/vault/widespread-panic/{dbSource.display_date.Substring(0, 4)}-streams/", label = "View show page on panicstream.com" } })).Count(); } var dbSet = await _sourceSetService.Update(dbSource, new SourceSet { source_id = dbSource.id, index = 0, is_encore = false, name = "Default Set", updated_at = panicUpdatedAt }); stats.Created++; var trackIndex = 0; var mp3s = sourceTracks .OrderBy(t => t.FileName) .Select(t => { var trackName = t.FileName .Replace(".mp3", "") .Replace(".MP3", "") .Replace(".M4A", "") .Replace(".m4a", "") .Trim(); var cleanedTrackName = Regex.Replace(trackName, @"(wsp[0-9-]+d\d+t\d+\.)|(^\d+ ?-? ?)", "").Trim(); if (cleanedTrackName.Length != 0) { trackName = cleanedTrackName; } trackIndex++; return(new SourceTrack { source_id = dbSource.id, source_set_id = dbSet.id, track_position = trackIndex, duration = ((int?)t.Composite?.CalculatedDuration.TotalSeconds ?? (int?)0).Value, title = trackName, slug = SlugifyTrack(trackName), mp3_url = t.AbsoluteUrl(_configuration["PANIC_KEY"]), updated_at = panicUpdatedAt, artist_id = artist.id }); }); ResetTrackSlugCounts(); await _sourceTrackService.InsertAll(dbSource, mp3s); stats.Created += mp3s.Count(); }
private async Task <ImportStats> ProcessIdentifiers(Artist artist, HttpResponseMessage res, ArtistUpstreamSource src, string showIdentifier, PerformContext ctx) { var stats = new ImportStats(); var json = await res.Content.ReadAsStringAsync(); var root = JsonConvert.DeserializeObject <Relisten.Vendor.ArchiveOrg.SearchRootObject>( json.Replace("\"0000-01-01T00:00:00Z\"", "null") /* serious...wtf archive */, new Relisten.Vendor.ArchiveOrg.TolerantArchiveDateTimeConverter() ); ctx?.WriteLine($"Checking {root.response.docs.Count} archive.org results"); var prog = ctx?.WriteProgressBar(); var identifiersWithoutMP3s = new HashSet <string>(); await root.response.docs.AsyncForEachWithProgress(prog, async doc => { try { var currentIsTargetedShow = doc.identifier == showIdentifier; if (showIdentifier != null && !currentIsTargetedShow) { return; } var dbShow = existingSources.GetValue(doc.identifier); var maxSourceInformation = existingSourceReviewInformation.GetValue(doc.identifier); var isNew = dbShow == null; var needsToUpdateReviews = maxSourceInformation != null && doc._iguana_index_date > maxSourceInformation.review_max_updated_at; if (currentIsTargetedShow || isNew || needsToUpdateReviews) { ctx?.WriteLine("Pulling https://archive.org/metadata/{0}", doc.identifier); var detailRes = await http.GetAsync(DetailsUrlForIdentifier(doc.identifier)); var detailsJson = await detailRes.Content.ReadAsStringAsync(); var detailsRoot = JsonConvert.DeserializeObject <Relisten.Vendor.ArchiveOrg.Metadata.RootObject>( detailsJson, new Vendor.ArchiveOrg.TolerantStringConverter() ); if (detailsRoot.is_dark ?? false) { ctx?.WriteLine("\tis_dark == true, skipping..."); return; } var properDate = FixDisplayDate(detailsRoot.metadata); if (properDate != null) { using (var scope = new TransactionScope(TransactionScopeAsyncFlowOption.Enabled)) { try { stats += await ImportSingleIdentifier(artist, dbShow, doc, detailsRoot, src, properDate, ctx); } catch (NoVBRMp3FilesException) { identifiersWithoutMP3s.Add(doc.identifier); } scope.Complete(); } } else { ctx?.WriteLine("\tSkipped {0} because it has an invalid, unrecoverable date: {1}", doc.identifier, detailsRoot.metadata.date); } } } catch (Exception e) { ctx?.WriteLine($"Error processing {doc.identifier}:"); ctx?.LogException(e); var telementry = new TelemetryClient(); telementry.TrackException(e, new Dictionary <string, string> { { "upstream_identifier", doc.identifier } }); } }); // we want to keep all the shows from this import--aside from ones that no longer have MP3s var showsToKeep = root.response.docs .Select(d => d.identifier) .Except(identifiersWithoutMP3s) ; // find sources that no longer exist var deletedSourceUpstreamIdentifiers = existingSources .Select(kvp => kvp.Key) .Except(showsToKeep) .ToList() ; ctx?.WriteLine($"Removing {deletedSourceUpstreamIdentifiers.Count} sources " + $"that are in the database but no longer on Archive.org: {string.Join(',', deletedSourceUpstreamIdentifiers)}"); stats.Removed += await _sourceService.RemoveSourcesWithUpstreamIdentifiers(deletedSourceUpstreamIdentifiers); ctx?.WriteLine("Rebuilding shows..."); // update shows await RebuildShows(artist); ctx?.WriteLine("--> rebuilt shows!"); ctx?.WriteLine("Rebuilding years..."); // update years await RebuildYears(artist); ctx?.WriteLine("--> rebuilt years!"); return(stats); }