Beispiel #1
0
        static async Task SaveVids(ChannelStored2 c, IReadOnlyCollection <VideoItem> vids, JsonlStore <VideoStored2> vidStore, DateTime?uploadedFrom,
                                   ILogger log)
        {
            var updated    = DateTime.UtcNow;
            var vidsStored = vids.Select(v => new VideoStored2 {
                VideoId      = v.Id,
                Title        = v.Title,
                Description  = v.Description,
                Duration     = v.Duration,
                Keywords     = v.Keywords.ToList(),
                Statistics   = v.Statistics,
                ChannelId    = c.ChannelId,
                ChannelTitle = c.ChannelTitle,
                UploadDate   = v.UploadDate,
                AddedDate    = v.AddedDate,
                Updated      = updated
            }).ToList();

            if (vidsStored.Count > 0)
            {
                await vidStore.Append(vidsStored, log);
            }

            var newVideos = vidsStored.Count(v => uploadedFrom == null || v.UploadDate > uploadedFrom);

            log.Information("Collect - {Channel} - Recorded {VideoCount} videos. {NewCount} new, {UpdatedCount} updated",
                            c.ChannelTitle, vids.Count, newVideos, vids.Count - newVideos);
        }
Beispiel #2
0
        public static async Task Process(ISimpleFileStore store, YtWeb ytWeb, ILogger log)
        {
            var blobs = await store.List("rec_exports").SelectManyList();

            //blobs = blobs.Where(b => b.Path == "rec_exports/Traffic source 2019-07-01_2019-08-01 David Pakman Show.zip").ToList();

            var fileInfoRegex = new Regex("^Traffic source (?'from'\\d+-\\d+-\\d+)_(?'to'\\d+-\\d+-\\d+) (?'channel'[^.]+)", RegexOptions.Compiled);

            var appendStore = new JsonlStore <TrafficSourceRow>(store, "rec_exports_processed", r => r.FileUpdated.FileSafeTimestamp(), log);

            var md = await appendStore.LatestFile();

            var latestModified = md?.Ts.ParseFileSafeTimestamp();

            var newBlobs = latestModified != null
        ? blobs.Where(b => b.Modified > latestModified).ToList()
        : blobs;

            log.Information("Processing {NewExports}/{AllExports} exports", newBlobs.Count, blobs.Count);

            foreach (var b in newBlobs)
            {
                log.Information("Processing {Path}", b.Path);

                var m = fileInfoRegex.Match(b.Path.Name);
                if (m.Groups.Count < 3)
                {
                    throw new InvalidOperationException($"unable to parse export info from file name '{b.Path.Name}'");
                }
                var exportInfo = new {
                    Channel = m.Groups["channel"].Value,
                    From    = m.Groups["from"].Value.ParseDate(),
                    To      = m.Groups["to"].Value.ParseDate()
                };

                var stream = await store.Load(b.Path);

                var zip = new ZipArchive(stream);
                using var csvStream = new StreamReader(
                          zip.GetEntry("Table data.csv")?.Open() ?? throw new InvalidOperationException("expected export to have 'Table data.csv'"),
                          Encoding.UTF8);
                using var csvReader = new CsvReader(csvStream, CsvExtensions.DefaultConfig);

                var records = csvReader.GetRecords <TrafficSourceExportRow>().ToList();
                var rows    = (await records.BlockFunc(ToTrafficSourceRow, 4,
                                                       progressUpdate: p => log.Debug("Processing traffic sources for {Path}: {Rows}/{TotalRows}", b.Path, p.Completed, records.Count)))
                              .NotNull().ToList();

                await appendStore.Append(rows, log);

                log.Information("Completed processing traffic source exports for {Path}", b.Path);

                async Task <TrafficSourceRow> ToTrafficSourceRow(TrafficSourceExportRow row)
                {
                    var source = row.Source.Split(".");

                    if (source.Length != 2 || source[0] != "YT_RELATED")
                    {
                        return(null); // total at the top or otherwise. not interested
                    }
                    var videoId   = source[1];
                    var fromVideo = await ytWeb.GetVideo(videoId, log);

                    return(new TrafficSourceRow {
                        ToChannelTitle = exportInfo.Channel,
                        From = exportInfo.From,
                        To = exportInfo.To,
                        Impressions = row.Impressions,
                        Source = row.Source,
                        AvgViewDuration = row.AvgViewDuration,
                        Views = row.Views,
                        SourceType = row.SourceType,
                        FromChannelId = fromVideo?.ChannelId,
                        FromChannelTitle = fromVideo?.ChannelTitle,
                        FromVideoId = fromVideo?.Id,
                        FromVideoTitle = fromVideo?.Title,
                        ImpressionClickThrough = row.ImpressionClickThrough,
                        WatchTimeHrsTotal = row.WatchTimeHrsTotal,
                        FileUpdated = b.Modified?.UtcDateTime ?? DateTime.MinValue
                    });
                }
            }

            log.Information("Completed processing traffic source exports");
        }