Esempio n. 1
0
        protected override async Task OnConsume(string mediaId, TaskParameters taskParameters, ClientActiveTasks cleanup)
        {
            registerTask(cleanup, mediaId); // may throw AlreadyInProgress exception

            Media media;

            using (var _context = CTDbContext.CreateDbContext())
            {
                media = await _context.Medias.Where(m => m.Id == mediaId)
                        .Include(m => m.Playlist).FirstAsync();
            }
            GetLogger().LogInformation("Consuming" + media);
            Video video = new Video();

            switch (media.SourceType)
            {
            case SourceType.Echo360: video = await DownloadEchoVideo(media); break;

            case SourceType.Youtube: video = await DownloadYoutubeVideo(media); break;

            case SourceType.Local: video = await DownloadLocalPlaylist(media); break;

            case SourceType.Kaltura: video = await DownloadKalturaVideo(media); break;

            case SourceType.Box: video = await DownloadBoxVideo(media); break;
            }
            // If no valid video1, or if a video2 object exists but not a valid file - fail the task.
            if (video == null || video.Video1 == null || !video.Video1.IsValidFile() ||
                (video.Video2 != null && !video.Video2.IsValidFile()))
            {
                throw new Exception("DownloadMediaTask failed for mediaId " + media.Id);
            }

            using (var _context = CTDbContext.CreateDbContext())
            {
                var latestMedia = await _context.Medias.FindAsync(media.Id);

                // Don't add video if there are already videos for the given media.
                if (latestMedia.Video == null)
                {
                    // Check if Video already exists, if yes link it with this media item.
                    var file = _context.FileRecords.Where(f => f.Hash == video.Video1.Hash).ToList();
                    if (!file.Any())
                    {
                        // Create new video Record
                        await _context.Videos.AddAsync(video);

                        await _context.SaveChangesAsync();

                        latestMedia.VideoId = video.Id;
                        await _context.SaveChangesAsync();

                        GetLogger().LogInformation("Downloaded:" + video);
                        _transcriptionTask.Publish(video.Id);
                        _processVideoTask.Publish(video.Id);
                    }
                    else
                    {
                        var existingVideos = await _context.Videos.Where(v => v.Video1Id == file.First().Id).ToListAsync();

                        // If file exists but video doesn't.
                        if (!existingVideos.Any())
                        {
                            // Delete existing file Record
                            await file.First().DeleteFileRecordAsync(_context);

                            // Create new video Record
                            await _context.Videos.AddAsync(video);

                            await _context.SaveChangesAsync();

                            latestMedia.VideoId = video.Id;
                            await _context.SaveChangesAsync();

                            GetLogger().LogInformation("Downloaded:" + video);
                            _transcriptionTask.Publish(video.Id);
                            _processVideoTask.Publish(video.Id);
                        }
                        // If video and file both exist.
                        else
                        {
                            var existingVideo = await _context.Videos.Where(v => v.Video1Id == file.First().Id).FirstAsync();

                            latestMedia.VideoId = existingVideo.Id;
                            await _context.SaveChangesAsync();

                            GetLogger().LogInformation("Existing Video:" + existingVideo);

                            // Deleting downloaded video as it's duplicate.
                            await video.DeleteVideoAsync(_context);
                        }
                    }
                }
            }
        }
Esempio n. 2
0
        /// <summary>Finds incomplete tasks and adds them all a TaskItem table.
        /// This appears to be defunct and not yet used code - grep FindPendingJobs, found no callers of this function
        /// </summary>
        //       private async Task FindPendingJobs()
        // {
        //     using (var context = CTDbContext.CreateDbContext())
        //     {
        //         // Medias for which no videos have downloaded
        //         var toDownloadMediaIds = await context.Medias.Where(m => m.Video == null).Select(m =>
        //             new TaskItem
        //             {
        //                 UniqueId = m.Id,
        //                 ResultData = new JObject(),
        //                 TaskParameters = new JObject(),
        //                 TaskType = TaskType.DownloadMedia,
        //                 Attempts = 0
        //             }).ToListAsync();

        //         // Videos which haven't been converted to wav
        //         var toConvertVideoIds = await context.Videos.Where(v => v.Medias.Any() && v.Audio == null).Select(v =>
        //             new TaskItem
        //             {
        //                 UniqueId = v.Id,
        //                 ResultData = new JObject(),
        //                 TaskParameters = new JObject(),
        //                 TaskType = TaskType.ConvertMedia,
        //                 Attempts = 0
        //             }).ToListAsync();

        //         // Transcribe pending videos.
        //         var toTranscribeVideoIds = await context.Videos.Where(v => v.TranscribingAttempts < 3 &&
        //                                                                    v.TranscriptionStatus != "NoError" &&
        //                                                                    v.Medias.Any() && v.Audio != null).Select(v =>
        //                                                                    new TaskItem
        //                                                                    {
        //                                                                        UniqueId = v.Id,
        //                                                                        ResultData = new JObject(),
        //                                                                        TaskParameters = new JObject(),
        //                                                                        TaskType = TaskType.Transcribe,
        //                                                                        Attempts = 0
        //                                                                    }).ToListAsync();

        //         // Completed Transcriptions which haven't generated vtt files
        //         var toGenerateVTTsTranscriptionIds = await context.Transcriptions.Where(t => t.Captions.Count > 0 && t.File == null)
        //                                                                         .Select(t =>
        //                                                                         new TaskItem
        //                                                                         {
        //                                                                             UniqueId = t.Id,
        //                                                                             ResultData = new JObject(),
        //                                                                             TaskParameters = new JObject(),
        //                                                                             TaskType = TaskType.GenerateVTTFile,
        //                                                                             Attempts = 0
        //                                                                         }).ToListAsync();

        //         var allTaskItems = new List<TaskItem>();
        //         allTaskItems.AddRange(toDownloadMediaIds);
        //         allTaskItems.AddRange(toConvertVideoIds);
        //         allTaskItems.AddRange(toTranscribeVideoIds);
        //         allTaskItems.AddRange(toGenerateVTTsTranscriptionIds);

        //         foreach(var taskItem in allTaskItems)
        //         {
        //             if(!await context.TaskItems.AnyAsync(t => t.TaskType == taskItem.TaskType && t.UniqueId == taskItem.UniqueId))
        //             {
        //                 await context.TaskItems.AddAsync(taskItem);
        //             }
        //         }
        //         await context.SaveChangesAsync();
        //     }
        // }
        /// <summary> Used by the PeriodicCheck to identify and enqueue missing tasks.
        /// This Task is started after all playlists are updated.
        /// </summary>
        private async Task PendingJobs()
        {
            // Update Box Token every few hours
            _updateBoxTokenTask.Publish("");

            //We will use these outside of the DB scope
            List <String> todoVTTs;
            List <String> todoProcessVideos;
            List <String> todoTranscriptions;
            List <String> todoDownloads;

            using (var context = CTDbContext.CreateDbContext())
            {
                // Most tasks are created directly from within a task when it normally completed.
                // This code exists to detect missing items and to publish tasks to complete them
                // A redesigned taskengine should not have the direct coupling inside each task

                // Since downloading a video could also create a Video, it is better to do these with little time delay in-between and then publish all the tasks
                // I believe there is still a race condition: Prior to this, we've just polled all active playlists and at least one of these may have already completed
                // So let's only consider items that are older than 10 minutes
                // Okay this is bandaid on the current design until we redesign the taskengine
                // Ideas For the future:
                // * Consider setting TTL on these messages to be 5 minutes short of thethe Periodic Refresh?
                // * If/when we drop the direct appoach consider: Random ordering. Most recent first (or randomly choosing either)

                // If an object was created during the middle of a periodic cycle, give it a full cycle to queue, and another cycle to complete its tasks


                int minutesCutOff = Math.Max(1, Convert.ToInt32(Globals.appSettings.PERIODIC_CHECK_OLDER_THAN_MINUTES));


                var tooRecentCutoff = DateTime.Now.AddMinutes(-minutesCutOff);
                // This is the first use of 'AsNoTracking' in this project; let's check it works in Production as expected

                // TODO/TOREVIEW: Does EF create the complete entity and then project out the ID column in dot Net, or does it request only the ID from the database?
                // TODO/TOREVIEW: Since this code  just pulls the IDs from the database, I expect this will be harmless no-op, however all DB reads should use AsNoTracking as a best practice
                // See https://code-maze.com/queries-in-entity-framework-core/
                // See https://docs.microsoft.com/en-us/ef/core/querying/tracking


                // Completed Transcriptions which haven't generated vtt files
                // TODO: Should also check dates too
                GetLogger().LogInformation($"Finding incomplete VTTs, Transcriptions and Downloads from before {tooRecentCutoff}, minutesCutOff=({minutesCutOff})");


                // Todo Could also check for secondary video too
                todoProcessVideos = await context.Videos.AsNoTracking().Where(
                    v => (v.Duration == null && !String.IsNullOrEmpty(v.Video1Id))
                    ).OrderByDescending(t => t.CreatedAt).Select(e => e.Id).ToListAsync();

                todoVTTs = await context.Transcriptions.AsNoTracking().Where(
                    t => t.Captions.Count > 0 && t.File == null && t.CreatedAt < tooRecentCutoff
                    ).OrderByDescending(t => t.CreatedAt).Select(e => e.Id).ToListAsync();

                todoTranscriptions = await context.Videos.AsNoTracking().Where(
                    v => v.TranscribingAttempts < 1 && v.TranscriptionStatus != "NoError" && v.Medias.Any() && v.CreatedAt < tooRecentCutoff
                    ).OrderByDescending(t => t.CreatedAt).Select(e => e.Id).ToListAsync();

                // Medias for which no videos have downloaded
                todoDownloads = await context.Medias.AsNoTracking().Where(
                    m => m.Video == null && m.CreatedAt < tooRecentCutoff
                    ).OrderByDescending(t => t.CreatedAt).Select(e => e.Id).ToListAsync();
            }
            // We have a list of outstanding tasks
            // However some of these may already be in progress
            // So don't queue theses

            GetLogger().LogInformation($"Found {todoProcessVideos.Count},{todoVTTs.Count},{todoTranscriptions.Count},{todoDownloads.Count} counts before filtering");
            ClientActiveTasks currentProcessVideos = _processVideoTask.GetCurrentTasks();

            todoProcessVideos.RemoveAll(e => currentProcessVideos.Contains(e));


            ClientActiveTasks currentVTTs = _generateVTTFileTask.GetCurrentTasks();

            todoVTTs.RemoveAll(e => currentVTTs.Contains(e));

            ClientActiveTasks currentTranscription = _transcriptionTask.GetCurrentTasks();

            todoTranscriptions.RemoveAll(e => currentTranscription.Contains(e));

            ClientActiveTasks currentDownloads = _transcriptionTask.GetCurrentTasks();

            todoDownloads.RemoveAll(e => currentDownloads.Contains(e));

            GetLogger().LogInformation($"Current In progress  {currentProcessVideos.Count},{currentVTTs.Count},{currentTranscription.Count},{currentDownloads.Count} counts after filtering");
            GetLogger().LogInformation($"Found {todoProcessVideos.Count},{todoVTTs.Count},{todoTranscriptions.Count},{todoDownloads.Count} counts after filtering");


            // Now we have a list of new things we want to do
            GetLogger().LogInformation($"Publishing processingVideos ({String.Join(",", todoProcessVideos)})");

            todoProcessVideos.ForEach(t => _processVideoTask.Publish(t));

            GetLogger().LogInformation($"Publishing todoVTTs ({String.Join(",", todoVTTs)})");

            todoVTTs.ForEach(t => _generateVTTFileTask.Publish(t));

            GetLogger().LogInformation($"Publishing todoTranscriptions ({String.Join(",", todoTranscriptions)})");

            todoTranscriptions.ForEach(v => _transcriptionTask.Publish(v));

            GetLogger().LogInformation($"Publishing todoDownloads ({String.Join(",", todoDownloads)})");

            todoDownloads.ForEach(m => _downloadMediaTask.Publish(m));

            //// Not used Videos which haven't been converted to wav
            /// Code Not deleted because one day we will just reuse the one wav file and use an offset into that file
            //(await context.Videos.Where(v => v.Medias.Any() && v.Audio == null).ToListAsync()).ForEach(v => _convertVideoToWavTask.Publish(v.Id));
            // Videos which have failed in transcribing
            GetLogger().LogInformation("Pending Jobs - completed");
        }