/// <summary> /// If null inputfile, create fresh. /// </summary> /// <param name="inputFile"></param> /// <param name="outputFile"></param> static void CreateCsv(CommandLineArgs args) { string inputFile = args.InputFile; string outputFile = args.OutputFile; // Read in existing mapping from the input csv. If no input csv was specified, treat it as an empty csv. List <CsvRow> inputCsvRows; if (inputFile == null) { inputCsvRows = new List <CsvRow>(); } else { inputCsvRows = LoadCsv(inputFile); } // Index streams available by the streaming service and the anime name used by the streaming service. Dictionary <StreamingService, Dictionary <string, List <CsvRow> > > rowsByServiceAndAnime = new Dictionary <StreamingService, Dictionary <string, List <CsvRow> > >(); foreach (CsvRow csvRow in inputCsvRows) { if (!rowsByServiceAndAnime.ContainsKey(csvRow.Service)) { rowsByServiceAndAnime[csvRow.Service] = new Dictionary <string, List <CsvRow> >(); } if (!rowsByServiceAndAnime[csvRow.Service].ContainsKey(csvRow.AnimeName)) { rowsByServiceAndAnime[csvRow.Service][csvRow.AnimeName] = new List <CsvRow>(); } rowsByServiceAndAnime[csvRow.Service][csvRow.AnimeName].Add(csvRow); } List <AnimeStreamInfo> streams = new List <AnimeStreamInfo>(); using (WebClient webClient = new WebClient()) // Use Firefox driver with headless Firefox for Funimation to get around Incapsula. // Use Firefox and not Chrome because javascript cannot be disabled with headless Chrome at this time. // Javascript slows the process down and is not needed currently. using (FirefoxDriverWebClient funimationWebClient = new FirefoxDriverWebClient(args.GeckoDriverDirectory)) { List <IAnimeStreamInfoSource> streamInfoSources = GetStreamInfoSources(args, webClient, funimationWebClient); using (CancellationTokenSource cancellation = new CancellationTokenSource(TimeSpan.FromMinutes(5))) { CancellableAsyncFunc <ICollection <AnimeStreamInfo> >[] streamFuncs = streamInfoSources.Select(source => new CancellableAsyncFunc <ICollection <AnimeStreamInfo> >( () => source.GetAnimeStreamInfoAsync(cancellation.Token), cancellation) ).ToArray(); CancellableTask <ICollection <AnimeStreamInfo> >[] streamTasks = AsyncUtils.StartTasksEnsureExceptionsWrapped(streamFuncs); try { AsyncUtils.WhenAllCancelOnFirstExceptionDontWaitForCancellations(streamTasks).GetAwaiter().GetResult(); } catch (OperationCanceledException ex) { throw new Exception("Getting streams timed out.", ex); } foreach (CancellableTask <ICollection <AnimeStreamInfo> > streamTask in streamTasks) { streams.AddRange(streamTask.Task.Result); } } } Dictionary <StreamingService, Dictionary <string, List <AnimeStreamInfo> > > streamsByServiceAndAnime = new Dictionary <StreamingService, Dictionary <string, List <AnimeStreamInfo> > >(); foreach (AnimeStreamInfo stream in streams) { if (!streamsByServiceAndAnime.ContainsKey(stream.Service)) { streamsByServiceAndAnime[stream.Service] = new Dictionary <string, List <AnimeStreamInfo> >(); } if (!streamsByServiceAndAnime[stream.Service].ContainsKey(stream.AnimeName)) { streamsByServiceAndAnime[stream.Service][stream.AnimeName] = new List <AnimeStreamInfo>(); } streamsByServiceAndAnime[stream.Service][stream.AnimeName].Add(stream); } Console.WriteLine("Writing out csv."); // Write a new csv mapping to the output file. If MAL anime ids or n/a was present in the input file for a certain // streaming service/anime name/URL combination, use them. Otherwise, leave the MAL anime id column blank // for a human operator to fill in. using (FileStream outputStream = new FileStream(outputFile, FileMode.Create, FileAccess.Write, FileShare.Read)) using (StreamWriter output = new StreamWriter(outputStream, Encoding.UTF8)) { string header = "Service,Anime,URL,MAL ID (or n/a)"; output.Write(header); // Newline gets written by the first row's data foreach (AnimeStreamInfo streamInfo in streams .OrderBy(stream => stream.Service.ToString()) .ThenBy(stream => stream.AnimeName) .ThenBy(stream => stream.Url)) { List <CsvRow> existingCsvRows = new List <CsvRow>(); if (rowsByServiceAndAnime.ContainsKey(streamInfo.Service) && rowsByServiceAndAnime[streamInfo.Service].ContainsKey(streamInfo.AnimeName)) { List <CsvRow> rowsForThisServiceAndAnime = rowsByServiceAndAnime[streamInfo.Service][streamInfo.AnimeName]; // Amazon URLs look like https://www.amazon.com/Our-Eyes-Finally-Met-Anothers/dp/B06Y5WC21S // The "Our-Eyes-Finally-Met-Anothers" relates to an episode title. // The B06Y5WC21S is some sort of ID that seems to relate to the episode rather than the whole series. // For reasons unknown, the episode that represents the whole series can change, resulting in the URL changing. // This results in a fair amount of churn in the CSV, around 20 changes per week. // To avoid having to remap those streams to MAL IDs, use the following logic: // If amazon service, and only one URL present in existing CSV for this (service, title), // and only one url present in the streams that we just got, then consider the existing CSV rows // a match and use their MAL IDs. // Even if that is not the case, then only consider the ID at the end when matching URLs of streams // to existing CSV rows. if (streamInfo.Service != StreamingService.AmazonPrime) { existingCsvRows = rowsForThisServiceAndAnime.Where(row => row.Url == streamInfo.Url).ToList(); } else { if (rowsForThisServiceAndAnime.GroupBy(row => row.Url).Count() == 1 && streamsByServiceAndAnime[streamInfo.Service][streamInfo.AnimeName].GroupBy(stream => stream.Url).Count() == 1) { existingCsvRows = rowsForThisServiceAndAnime.ToList(); } else { string amazonStreamID = GetAmazonIDFromUrl(streamInfo.Url); existingCsvRows = rowsForThisServiceAndAnime.Where(row => GetAmazonIDFromUrl(row.Url) == amazonStreamID).ToList(); } } } if (existingCsvRows.Count > 0) { foreach (CsvRow existingRow in existingCsvRows) { // not WriteLine() - this should be \r\n regardless of what platform this is run on per the CSV RFC // Header row did not write a newline, so there won't be a blank line between header and first row. output.Write("\r\n"); output.Write("{0},{1},{2},{3}", QuoteForCsv(streamInfo.Service.ToString()), QuoteForCsv(streamInfo.AnimeName), QuoteForCsv(streamInfo.Url), existingRow.MalAnimeId.ToString()); } } else { output.Write("\r\n"); // Notice the comma at the end - leave MAL anime id blank for a human operator to fill in. output.Write("{0},{1},{2},", QuoteForCsv(streamInfo.Service.ToString()), QuoteForCsv(streamInfo.AnimeName), QuoteForCsv(streamInfo.Url)); } } } }