Exemple #1
0
        /// <summary>
        /// If null inputfile, create fresh.
        /// </summary>
        /// <param name="inputFile"></param>
        /// <param name="outputFile"></param>
        static void CreateCsv(CommandLineArgs args)
        {
            string inputFile  = args.InputFile;
            string outputFile = args.OutputFile;

            // Read in existing mapping from the input csv. If no input csv was specified, treat it as an empty csv.
            List <CsvRow> inputCsvRows;

            if (inputFile == null)
            {
                inputCsvRows = new List <CsvRow>();
            }
            else
            {
                inputCsvRows = LoadCsv(inputFile);
            }

            // Index streams available by the streaming service and the anime name used by the streaming service.
            Dictionary <StreamingService, Dictionary <string, List <CsvRow> > > rowsByServiceAndAnime = new Dictionary <StreamingService, Dictionary <string, List <CsvRow> > >();

            foreach (CsvRow csvRow in inputCsvRows)
            {
                if (!rowsByServiceAndAnime.ContainsKey(csvRow.Service))
                {
                    rowsByServiceAndAnime[csvRow.Service] = new Dictionary <string, List <CsvRow> >();
                }
                if (!rowsByServiceAndAnime[csvRow.Service].ContainsKey(csvRow.AnimeName))
                {
                    rowsByServiceAndAnime[csvRow.Service][csvRow.AnimeName] = new List <CsvRow>();
                }
                rowsByServiceAndAnime[csvRow.Service][csvRow.AnimeName].Add(csvRow);
            }

            List <AnimeStreamInfo> streams = new List <AnimeStreamInfo>();

            using (WebClient webClient = new WebClient())
                // Use Firefox driver with headless Firefox for Funimation to get around Incapsula.
                // Use Firefox and not Chrome because javascript cannot be disabled with headless Chrome at this time.
                // Javascript slows the process down and is not needed currently.
                using (FirefoxDriverWebClient funimationWebClient = new FirefoxDriverWebClient(args.GeckoDriverDirectory))
                {
                    List <IAnimeStreamInfoSource> streamInfoSources = GetStreamInfoSources(args, webClient, funimationWebClient);
                    using (CancellationTokenSource cancellation = new CancellationTokenSource(TimeSpan.FromMinutes(5)))
                    {
                        CancellableAsyncFunc <ICollection <AnimeStreamInfo> >[] streamFuncs = streamInfoSources.Select(source => new CancellableAsyncFunc <ICollection <AnimeStreamInfo> >(
                                                                                                                           () => source.GetAnimeStreamInfoAsync(cancellation.Token), cancellation)
                                                                                                                       ).ToArray();

                        CancellableTask <ICollection <AnimeStreamInfo> >[] streamTasks = AsyncUtils.StartTasksEnsureExceptionsWrapped(streamFuncs);
                        try
                        {
                            AsyncUtils.WhenAllCancelOnFirstExceptionDontWaitForCancellations(streamTasks).GetAwaiter().GetResult();
                        }
                        catch (OperationCanceledException ex)
                        {
                            throw new Exception("Getting streams timed out.", ex);
                        }

                        foreach (CancellableTask <ICollection <AnimeStreamInfo> > streamTask in streamTasks)
                        {
                            streams.AddRange(streamTask.Task.Result);
                        }
                    }
                }

            Dictionary <StreamingService, Dictionary <string, List <AnimeStreamInfo> > > streamsByServiceAndAnime = new Dictionary <StreamingService, Dictionary <string, List <AnimeStreamInfo> > >();

            foreach (AnimeStreamInfo stream in streams)
            {
                if (!streamsByServiceAndAnime.ContainsKey(stream.Service))
                {
                    streamsByServiceAndAnime[stream.Service] = new Dictionary <string, List <AnimeStreamInfo> >();
                }
                if (!streamsByServiceAndAnime[stream.Service].ContainsKey(stream.AnimeName))
                {
                    streamsByServiceAndAnime[stream.Service][stream.AnimeName] = new List <AnimeStreamInfo>();
                }
                streamsByServiceAndAnime[stream.Service][stream.AnimeName].Add(stream);
            }

            Console.WriteLine("Writing out csv.");

            // Write a new csv mapping to the output file. If MAL anime ids or n/a was present in the input file for a certain
            // streaming service/anime name/URL combination, use them. Otherwise, leave the MAL anime id column blank
            // for a human operator to fill in.
            using (FileStream outputStream = new FileStream(outputFile, FileMode.Create, FileAccess.Write, FileShare.Read))
                using (StreamWriter output = new StreamWriter(outputStream, Encoding.UTF8))
                {
                    string header = "Service,Anime,URL,MAL ID (or n/a)";
                    output.Write(header); // Newline gets written by the first row's data

                    foreach (AnimeStreamInfo streamInfo in streams
                             .OrderBy(stream => stream.Service.ToString())
                             .ThenBy(stream => stream.AnimeName)
                             .ThenBy(stream => stream.Url))
                    {
                        List <CsvRow> existingCsvRows = new List <CsvRow>();
                        if (rowsByServiceAndAnime.ContainsKey(streamInfo.Service) && rowsByServiceAndAnime[streamInfo.Service].ContainsKey(streamInfo.AnimeName))
                        {
                            List <CsvRow> rowsForThisServiceAndAnime = rowsByServiceAndAnime[streamInfo.Service][streamInfo.AnimeName];

                            // Amazon URLs look like https://www.amazon.com/Our-Eyes-Finally-Met-Anothers/dp/B06Y5WC21S
                            // The "Our-Eyes-Finally-Met-Anothers" relates to an episode title.
                            // The B06Y5WC21S is some sort of ID that seems to relate to the episode rather than the whole series.
                            // For reasons unknown, the episode that represents the whole series can change, resulting in the URL changing.
                            // This results in a fair amount of churn in the CSV, around 20 changes per week.
                            // To avoid having to remap those streams to MAL IDs, use the following logic:

                            // If amazon service, and only one URL present in existing CSV for this (service, title),
                            // and only one url present in the streams that we just got, then consider the existing CSV rows
                            // a match and use their MAL IDs.

                            // Even if that is not the case, then only consider the ID at the end when matching URLs of streams
                            // to existing CSV rows.

                            if (streamInfo.Service != StreamingService.AmazonPrime)
                            {
                                existingCsvRows = rowsForThisServiceAndAnime.Where(row => row.Url == streamInfo.Url).ToList();
                            }
                            else
                            {
                                if (rowsForThisServiceAndAnime.GroupBy(row => row.Url).Count() == 1 && streamsByServiceAndAnime[streamInfo.Service][streamInfo.AnimeName].GroupBy(stream => stream.Url).Count() == 1)
                                {
                                    existingCsvRows = rowsForThisServiceAndAnime.ToList();
                                }
                                else
                                {
                                    string amazonStreamID = GetAmazonIDFromUrl(streamInfo.Url);
                                    existingCsvRows = rowsForThisServiceAndAnime.Where(row => GetAmazonIDFromUrl(row.Url) == amazonStreamID).ToList();
                                }
                            }
                        }

                        if (existingCsvRows.Count > 0)
                        {
                            foreach (CsvRow existingRow in existingCsvRows)
                            {
                                // not WriteLine() - this should be \r\n regardless of what platform this is run on per the CSV RFC
                                // Header row did not write a newline, so there won't be a blank line between header and first row.
                                output.Write("\r\n");
                                output.Write("{0},{1},{2},{3}", QuoteForCsv(streamInfo.Service.ToString()),
                                             QuoteForCsv(streamInfo.AnimeName), QuoteForCsv(streamInfo.Url),
                                             existingRow.MalAnimeId.ToString());
                            }
                        }
                        else
                        {
                            output.Write("\r\n");

                            // Notice the comma at the end - leave MAL anime id blank for a human operator to fill in.
                            output.Write("{0},{1},{2},", QuoteForCsv(streamInfo.Service.ToString()), QuoteForCsv(streamInfo.AnimeName),
                                         QuoteForCsv(streamInfo.Url));
                        }
                    }
                }
        }