/// <summary> /// Original implementation of OnConsume. This code may be deleted if it is no longer useful. It is left as available for now as a template /// </summary> /// <param name="videoId"></param> /// <param name="taskParameters"></param> /// <returns></returns> private async Task OldOnConsumeNotUsed(string videoId, TaskParameters taskParameters) { using (var _context = CTDbContext.CreateDbContext()) { // Get the video object var video = await _context.Videos.FindAsync(videoId); _logger.LogInformation("Consuming" + video); // Make RPC call to produce audio file. var file = await _rpcClient.PythonServerClient.ConvertVideoToWavRPCWithOffsetAsync(new CTGrpc.FileForConversion { File = new CTGrpc.File { FilePath = video.Video1.VMPath } }); // Check if a valid file was returned. if (FileRecord.IsValidFile(file.FilePath)) { var fileRecord = await FileRecord.GetNewFileRecordAsync(file.FilePath, file.Ext); // Get the latest video object, in case it has changed var videoLatest = await _context.Videos.FindAsync(video.Id); // If there is no Audio file present, then update. if (videoLatest.Audio == null) { await _context.FileRecords.AddAsync(fileRecord); videoLatest.Audio = fileRecord; await _context.SaveChangesAsync(); // If no transcriptions present, produce transcriptions. if (!videoLatest.Transcriptions.Any()) { _transcriptionTask.Publish(videoLatest.Id); } } } else { throw new Exception("ConvertVideoToWavTask Failed + " + video.Id); } } }
protected override async Task OnConsume(string mediaId, TaskParameters taskParameters, ClientActiveTasks cleanup) { registerTask(cleanup, mediaId); // may throw AlreadyInProgress exception Media media; using (var _context = CTDbContext.CreateDbContext()) { media = await _context.Medias.Where(m => m.Id == mediaId) .Include(m => m.Playlist).FirstAsync(); } GetLogger().LogInformation("Consuming" + media); Video video = new Video(); switch (media.SourceType) { case SourceType.Echo360: video = await DownloadEchoVideo(media); break; case SourceType.Youtube: video = await DownloadYoutubeVideo(media); break; case SourceType.Local: video = await DownloadLocalPlaylist(media); break; case SourceType.Kaltura: video = await DownloadKalturaVideo(media); break; case SourceType.Box: video = await DownloadBoxVideo(media); break; } // If no valid video1, or if a video2 object exists but not a valid file - fail the task. if (video == null || video.Video1 == null || !video.Video1.IsValidFile() || (video.Video2 != null && !video.Video2.IsValidFile())) { throw new Exception("DownloadMediaTask failed for mediaId " + media.Id); } using (var _context = CTDbContext.CreateDbContext()) { var latestMedia = await _context.Medias.FindAsync(media.Id); // Don't add video if there are already videos for the given media. if (latestMedia.Video == null) { // Check if Video already exists, if yes link it with this media item. var file = _context.FileRecords.Where(f => f.Hash == video.Video1.Hash).ToList(); if (!file.Any()) { // Create new video Record await _context.Videos.AddAsync(video); await _context.SaveChangesAsync(); latestMedia.VideoId = video.Id; await _context.SaveChangesAsync(); GetLogger().LogInformation("Downloaded:" + video); _transcriptionTask.Publish(video.Id); _processVideoTask.Publish(video.Id); } else { var existingVideos = await _context.Videos.Where(v => v.Video1Id == file.First().Id).ToListAsync(); // If file exists but video doesn't. if (!existingVideos.Any()) { // Delete existing file Record await file.First().DeleteFileRecordAsync(_context); // Create new video Record await _context.Videos.AddAsync(video); await _context.SaveChangesAsync(); latestMedia.VideoId = video.Id; await _context.SaveChangesAsync(); GetLogger().LogInformation("Downloaded:" + video); _transcriptionTask.Publish(video.Id); _processVideoTask.Publish(video.Id); } // If video and file both exist. else { var existingVideo = await _context.Videos.Where(v => v.Video1Id == file.First().Id).FirstAsync(); latestMedia.VideoId = existingVideo.Id; await _context.SaveChangesAsync(); GetLogger().LogInformation("Existing Video:" + existingVideo); // Deleting downloaded video as it's duplicate. await video.DeleteVideoAsync(_context); } } } } }
/// <summary>Finds incomplete tasks and adds them all a TaskItem table. /// This appears to be defunct and not yet used code - grep FindPendingJobs, found no callers of this function /// </summary> // private async Task FindPendingJobs() // { // using (var context = CTDbContext.CreateDbContext()) // { // // Medias for which no videos have downloaded // var toDownloadMediaIds = await context.Medias.Where(m => m.Video == null).Select(m => // new TaskItem // { // UniqueId = m.Id, // ResultData = new JObject(), // TaskParameters = new JObject(), // TaskType = TaskType.DownloadMedia, // Attempts = 0 // }).ToListAsync(); // // Videos which haven't been converted to wav // var toConvertVideoIds = await context.Videos.Where(v => v.Medias.Any() && v.Audio == null).Select(v => // new TaskItem // { // UniqueId = v.Id, // ResultData = new JObject(), // TaskParameters = new JObject(), // TaskType = TaskType.ConvertMedia, // Attempts = 0 // }).ToListAsync(); // // Transcribe pending videos. // var toTranscribeVideoIds = await context.Videos.Where(v => v.TranscribingAttempts < 3 && // v.TranscriptionStatus != "NoError" && // v.Medias.Any() && v.Audio != null).Select(v => // new TaskItem // { // UniqueId = v.Id, // ResultData = new JObject(), // TaskParameters = new JObject(), // TaskType = TaskType.Transcribe, // Attempts = 0 // }).ToListAsync(); // // Completed Transcriptions which haven't generated vtt files // var toGenerateVTTsTranscriptionIds = await context.Transcriptions.Where(t => t.Captions.Count > 0 && t.File == null) // .Select(t => // new TaskItem // { // UniqueId = t.Id, // ResultData = new JObject(), // TaskParameters = new JObject(), // TaskType = TaskType.GenerateVTTFile, // Attempts = 0 // }).ToListAsync(); // var allTaskItems = new List<TaskItem>(); // allTaskItems.AddRange(toDownloadMediaIds); // allTaskItems.AddRange(toConvertVideoIds); // allTaskItems.AddRange(toTranscribeVideoIds); // allTaskItems.AddRange(toGenerateVTTsTranscriptionIds); // foreach(var taskItem in allTaskItems) // { // if(!await context.TaskItems.AnyAsync(t => t.TaskType == taskItem.TaskType && t.UniqueId == taskItem.UniqueId)) // { // await context.TaskItems.AddAsync(taskItem); // } // } // await context.SaveChangesAsync(); // } // } /// <summary> Used by the PeriodicCheck to identify and enqueue missing tasks. /// This Task is started after all playlists are updated. /// </summary> private async Task PendingJobs() { // Update Box Token every few hours _updateBoxTokenTask.Publish(""); //We will use these outside of the DB scope List <String> todoVTTs; List <String> todoProcessVideos; List <String> todoTranscriptions; List <String> todoDownloads; using (var context = CTDbContext.CreateDbContext()) { // Most tasks are created directly from within a task when it normally completed. // This code exists to detect missing items and to publish tasks to complete them // A redesigned taskengine should not have the direct coupling inside each task // Since downloading a video could also create a Video, it is better to do these with little time delay in-between and then publish all the tasks // I believe there is still a race condition: Prior to this, we've just polled all active playlists and at least one of these may have already completed // So let's only consider items that are older than 10 minutes // Okay this is bandaid on the current design until we redesign the taskengine // Ideas For the future: // * Consider setting TTL on these messages to be 5 minutes short of thethe Periodic Refresh? // * If/when we drop the direct appoach consider: Random ordering. Most recent first (or randomly choosing either) // If an object was created during the middle of a periodic cycle, give it a full cycle to queue, and another cycle to complete its tasks int minutesCutOff = Math.Max(1, Convert.ToInt32(Globals.appSettings.PERIODIC_CHECK_OLDER_THAN_MINUTES)); var tooRecentCutoff = DateTime.Now.AddMinutes(-minutesCutOff); // This is the first use of 'AsNoTracking' in this project; let's check it works in Production as expected // TODO/TOREVIEW: Does EF create the complete entity and then project out the ID column in dot Net, or does it request only the ID from the database? // TODO/TOREVIEW: Since this code just pulls the IDs from the database, I expect this will be harmless no-op, however all DB reads should use AsNoTracking as a best practice // See https://code-maze.com/queries-in-entity-framework-core/ // See https://docs.microsoft.com/en-us/ef/core/querying/tracking // Completed Transcriptions which haven't generated vtt files // TODO: Should also check dates too GetLogger().LogInformation($"Finding incomplete VTTs, Transcriptions and Downloads from before {tooRecentCutoff}, minutesCutOff=({minutesCutOff})"); // Todo Could also check for secondary video too todoProcessVideos = await context.Videos.AsNoTracking().Where( v => (v.Duration == null && !String.IsNullOrEmpty(v.Video1Id)) ).OrderByDescending(t => t.CreatedAt).Select(e => e.Id).ToListAsync(); todoVTTs = await context.Transcriptions.AsNoTracking().Where( t => t.Captions.Count > 0 && t.File == null && t.CreatedAt < tooRecentCutoff ).OrderByDescending(t => t.CreatedAt).Select(e => e.Id).ToListAsync(); todoTranscriptions = await context.Videos.AsNoTracking().Where( v => v.TranscribingAttempts < 1 && v.TranscriptionStatus != "NoError" && v.Medias.Any() && v.CreatedAt < tooRecentCutoff ).OrderByDescending(t => t.CreatedAt).Select(e => e.Id).ToListAsync(); // Medias for which no videos have downloaded todoDownloads = await context.Medias.AsNoTracking().Where( m => m.Video == null && m.CreatedAt < tooRecentCutoff ).OrderByDescending(t => t.CreatedAt).Select(e => e.Id).ToListAsync(); } // We have a list of outstanding tasks // However some of these may already be in progress // So don't queue theses GetLogger().LogInformation($"Found {todoProcessVideos.Count},{todoVTTs.Count},{todoTranscriptions.Count},{todoDownloads.Count} counts before filtering"); ClientActiveTasks currentProcessVideos = _processVideoTask.GetCurrentTasks(); todoProcessVideos.RemoveAll(e => currentProcessVideos.Contains(e)); ClientActiveTasks currentVTTs = _generateVTTFileTask.GetCurrentTasks(); todoVTTs.RemoveAll(e => currentVTTs.Contains(e)); ClientActiveTasks currentTranscription = _transcriptionTask.GetCurrentTasks(); todoTranscriptions.RemoveAll(e => currentTranscription.Contains(e)); ClientActiveTasks currentDownloads = _transcriptionTask.GetCurrentTasks(); todoDownloads.RemoveAll(e => currentDownloads.Contains(e)); GetLogger().LogInformation($"Current In progress {currentProcessVideos.Count},{currentVTTs.Count},{currentTranscription.Count},{currentDownloads.Count} counts after filtering"); GetLogger().LogInformation($"Found {todoProcessVideos.Count},{todoVTTs.Count},{todoTranscriptions.Count},{todoDownloads.Count} counts after filtering"); // Now we have a list of new things we want to do GetLogger().LogInformation($"Publishing processingVideos ({String.Join(",", todoProcessVideos)})"); todoProcessVideos.ForEach(t => _processVideoTask.Publish(t)); GetLogger().LogInformation($"Publishing todoVTTs ({String.Join(",", todoVTTs)})"); todoVTTs.ForEach(t => _generateVTTFileTask.Publish(t)); GetLogger().LogInformation($"Publishing todoTranscriptions ({String.Join(",", todoTranscriptions)})"); todoTranscriptions.ForEach(v => _transcriptionTask.Publish(v)); GetLogger().LogInformation($"Publishing todoDownloads ({String.Join(",", todoDownloads)})"); todoDownloads.ForEach(m => _downloadMediaTask.Publish(m)); //// Not used Videos which haven't been converted to wav /// Code Not deleted because one day we will just reuse the one wav file and use an offset into that file //(await context.Videos.Where(v => v.Medias.Any() && v.Audio == null).ToListAsync()).ForEach(v => _convertVideoToWavTask.Publish(v.Id)); // Videos which have failed in transcribing GetLogger().LogInformation("Pending Jobs - completed"); }