private static void Execute(Stopwatch stopwatch) { int audioPurgeCutoffInDays; int.TryParse(ConfigurationManager.AppSettings["audioPurgeCutoffInDays"], out audioPurgeCutoffInDays); if (audioPurgeCutoffInDays <= 0) { throw new Exception("The audioPurgeCutoffInDays appSetting must be set to a positive integer."); } int count = 0; var db = new trackdocEntities(); var unprocessedRecordIds = Shared.GetUnpurgedAudioIds(db, audioPurgeCutoffInDays); int?lastProcessedId = null; while (unprocessedRecordIds.Any()) { foreach (var unprocessedRecordId in unprocessedRecordIds) { count++; Shared.PurgeAudioRecord(unprocessedRecordId, db); lastProcessedId = unprocessedRecordId; Console.Title = GetStatusString(stopwatch, count); } db.Dispose(); db = new trackdocEntities(); unprocessedRecordIds = Shared.GetUnpurgedAudioIds(db, audioPurgeCutoffInDays, lastProcessedId); } db.Dispose(); Console.WriteLine(); Console.WriteLine(GetStatusString(stopwatch, count)); }
/// <summary> /// Retrives the list of audioid to convert /// by executing the stored procedure /// </summary> /// <returns></returns> private List <int> GetJobList(trackdocEntities db) { WriteDebugEntry("Initiating retrieval of IDs of jobs in Converting status from database."); var jobList = db.usp_get_converting_jobs().Select(a => a.Value).OrderByDescending(x => x).ToList(); WriteDebugEntry("Successfully retrieved IDs of " + jobList.Count + " jobs in Converting status from database."); return(jobList); }
/// <summary> /// Returns an array of a maximum of 10 ordered ints corresponding to the oldest non-purged audio records older than the /// given cutoffDays. If startAfterId is given and not null, it is used as an additional filter in the WHERE clause to /// improve performance. /// </summary> /// <param name="db"></param> /// <param name="cutoffDays"></param> /// <param name="startAfterId"></param> /// <returns></returns> public static int[] GetUnpurgedAudioIds(trackdocEntities db, int cutoffDays, int?startAfterId = null) { var cutoff = DateTime.Now - TimeSpan.FromDays(cutoffDays); var audiosToPurge = db.audios.Where(a => a.purged != true && a.date_created < cutoff); if (startAfterId != null) { audiosToPurge = audiosToPurge.Where(a => a.id > startAfterId); } return(audiosToPurge.OrderBy(a => a.id) .Select(a => a.id) .Take(10)// 10 seems to be significantly faster than higher values like 100 or 1000 for some reason. .ToArray()); }
private void ConvertAudio(AudioMetadata audioMetadata, trackdocEntities db) { if (Shared.ValidFfmpegExtensions.Contains(audioMetadata.OriginalAudioType)) { ConvertToMp3(audioMetadata, false); } else if (audioMetadata.OriginalAudioType == "dss" || audioMetadata.OriginalAudioType == "ds2") { //ConvertToWav(audioMetadata);// Converted WAV should already have been created. ConvertToMp3(audioMetadata, true); } else { throw new Exception("Unsupported audio format."); } UpdateDB(audioMetadata, db); }
/// <summary> /// Updates the audio record in the database corresponding to the given audioRecordId so its audio_lob is set to an /// empty byte array. Purged is set to true. The date_last_modified and last_modified_by properties are also set. /// </summary> /// <param name="audioRecordId"></param> /// <param name="db"></param> public static void PurgeAudioRecord(int audioRecordId, trackdocEntities db) { var exsitingAudioToPurge = new audio(); exsitingAudioToPurge.id = audioRecordId; exsitingAudioToPurge.audio_lob = new byte[0]; exsitingAudioToPurge.purged = true; exsitingAudioToPurge.date_last_modified = DateTime.Now; exsitingAudioToPurge.last_modified_by = 0; db.audios.Attach(exsitingAudioToPurge); db.Entry(exsitingAudioToPurge).Property(d => d.purged).IsModified = true; db.Entry(exsitingAudioToPurge).Property(d => d.audio_lob).IsModified = true; db.Entry(exsitingAudioToPurge).Property(d => d.date_last_modified).IsModified = true; db.Entry(exsitingAudioToPurge).Property(d => d.last_modified_by).IsModified = true; db.Configuration.ValidateOnSaveEnabled = false;// Needed because some of the properties we're ignoring are required. db.SaveChanges(); }
/// <summary> /// Sets the audio_lob field to empty for any audio records older than this.audioPurgeCutoffInDays. Processes a maximum /// of 10,000 records in a single run to prevent delaying audio conversion. /// </summary> private void PurgeOldAudios() { if (this.audioPurgeCutoffInDays > 0) // 0 or negative value disables audio purging. { const int maxNumberToPurge = 10000; // So a single run never delays converting for too long (10000 should take less than 2 minutes). var db = new trackdocEntities(); try { WriteDebugEntry("Initiating retrieval of IDs of audios to purge from database."); var unpurgedAudioIds = AudioPurge.Shared.GetUnpurgedAudioIds(db, this.audioPurgeCutoffInDays); WriteDebugEntry("Successfully retrieved IDs of " + unpurgedAudioIds.Length + " audios to purge from database."); int?lastPurgedAudioId = null; int numberPurged = 0; while (unpurgedAudioIds.Any() && numberPurged < maxNumberToPurge) { foreach (var unpurgedAudioId in unpurgedAudioIds) { WriteDebugEntry("Initiating purge of audio " + unpurgedAudioId + "."); AudioPurge.Shared.PurgeAudioRecord(unpurgedAudioId, db); WriteEventLogEntry("Purged audio " + unpurgedAudioId + ".", EventLogEntryType.Information); lastPurgedAudioId = unpurgedAudioId; numberPurged++; } db.Dispose(); db = new trackdocEntities(); WriteDebugEntry("Initiating retrieval of IDs of audios to purge from database."); unpurgedAudioIds = AudioPurge.Shared.GetUnpurgedAudioIds(db, this.audioPurgeCutoffInDays, lastPurgedAudioId); WriteDebugEntry("Successfully retrieved IDs of " + unpurgedAudioIds.Length + " audios to purge from database."); } WriteDebugEntry("Successfully purged " + numberPurged + " audios."); } finally { db?.Dispose(); } } }
private void UpdateDB(AudioMetadata audioMetadata, trackdocEntities db) { var status = db.usp_update_audio_and_job_new(audioMetadata.audioId, "mp3", GetMp3Bitrate(audioMetadata.FinalPath), audioMetadata.FinalPath, audioMetadata.Filename + ".mp3", GetMp3DurationInSeconds(audioMetadata.FinalPath)); if ((status != 0)) { throw new Exception("Error while updating db records..."); } //using (var conn = new SqlConnection(connectionString)) //{ // using (var command = new SqlCommand("[dbo].[usp_update_audio_and_job_new]", conn)) // { // command.CommandType = CommandType.StoredProcedure; // command.Parameters.Add(new SqlParameter("@audioid", audioMetadata.audioId)); // command.Parameters.Add(new SqlParameter("@audiofilepath", audioMetadata.FinalPath)); // command.Parameters.Add(new SqlParameter("@audio_type", "mp3")); // command.Parameters.Add(new SqlParameter("@bitrate", GetMp3Bitrate(audioMetadata.FinalPath))); // command.Parameters.Add(new SqlParameter("@length", GetMp3DurationInSeconds(audioMetadata.FinalPath))); // command.Parameters.Add(new SqlParameter("@filename", audioMetadata.Filename + ".mp3")); // command.Parameters.Add(new SqlParameter("@RET_VAL", SqlDbType.Int)); // command.Parameters["@RET_VAL"].Direction = ParameterDirection.ReturnValue; // command.CommandTimeout = 0;// Disable command timeout. // conn.Open(); // command.ExecuteScalar(); // var retVal = (int)command.Parameters["@RET_VAL"].Value; // //conn.Close(); // if ((retVal != 0)) throw new Exception("Error while updating db records..."); // } //} WriteEventLogEntry("Updated audio " + audioMetadata.audioId + " in database with converted file at " + audioMetadata.FinalPath + ".", EventLogEntryType.Information); File.Delete(audioMetadata.FinalPath); }
public static void CreateLogEntry(Exception ex, EventLog eventLog) { try { using (trackdocEntities trackdocEntities = new trackdocEntities()) { ELMAH_Error elmahError = new ELMAH_Error(); var errorId = Guid.NewGuid(); var host = Dns.GetHostName(); DateTime dateTime = DateTime.UtcNow; elmahError.ErrorId = errorId; elmahError.Host = Shared.TruncateString(host, 50); elmahError.Application = "TrackDoc"; elmahError.Type = Shared.TruncateString(ex.GetType().ToString(), 100); elmahError.Source = Shared.TruncateString(ex.Source, 60) ?? string.Empty; elmahError.Message = Shared.TruncateString("Audio Processor Service: " + ex.Message, 500); elmahError.User = string.Empty; elmahError.StatusCode = 0; elmahError.TimeUtc = dateTime.ToUniversalTime(); var xml = string.Format("<error application=\"{0}\" host=\"{1}\" message=\"{2}\" source=\"{3}\" detail=\"{4}\" user=\"{5}\" time=\"{6}\" statusCode=\"{7}\"> </error>", "TrackDoc", host, XmlEscape(ex.Message), XmlEscape(ex.Source), XmlEscape(ex.ToString()), "", dateTime.ToString("yyyy-MM-ddTHH:mm:ss.fffffffZ"), 0); elmahError.AllXml = xml; trackdocEntities.ELMAH_Error.Add(elmahError); trackdocEntities.SaveChanges(); } } catch (Exception metaException) { Shared.WriteEventLogEntry(eventLog, "Error writing ELMAH log entry: " + metaException.ToString(), EventLogEntryType.Error); } }
/// <summary> /// For any speechrec_jobs with Processing status, polls the status of the operation with Google and updates the speechrec_job accordingly if /// it's completed. /// </summary> /// <param name="db"></param> public void HandleCompleted(trackdocEntities db) { try { WriteDebugEntry("Getting speechrec_jobs in Processing status."); var processingSpeechRecJobs = db.speechrec_job .Where(s => s.delete_flag == false && s.status_id == EnumTable.speechrec_status.Processing && s.operation_name != null) .ToList();// ToList() since we're updating incrementally and it would cause an error otherwise. WriteDebugEntry("Successfully retrieved " + processingSpeechRecJobs.Count + " speechrec_jobs in Processing status."); foreach (var speechRecJob in processingSpeechRecJobs) { try { var longRunningRecognizePollResponse = GetLongRunningRecognizePollResponse(speechRecJob.operation_name); WriteDebugEntry("Successfully retrieved long-running recognize poll response for operation " + speechRecJob.operation_name + "."); if (longRunningRecognizePollResponse.IsFaulted || longRunningRecognizePollResponse.Exception != null) // Rarely gets in here for some reason. { speechRecJob.status_id = EnumTable.speechrec_status.Error; speechRecJob.last_modified_by = 0; speechRecJob.date_last_modified = DateTime.Now; db.SaveChanges(); DeleteObjectFromGoogleStorage(speechRecJob.audio_id.ToString()); // Should we be deleting it here? HandleError("Poll response completed with failure for speechRecJob " + speechRecJob.id + ": ", longRunningRecognizePollResponse?.Exception); } else if (longRunningRecognizePollResponse.IsCompleted) { speechRecJob.status_id = EnumTable.speechrec_status.Complete; speechRecJob.output = longRunningRecognizePollResponse.Result.Results.ToString(); speechRecJob.last_modified_by = 0; //speechRecJob.date_last_modified = DateTime.Now; string inputScript = ConvertGoogleJsonStr(speechRecJob.output); AutoPunctuationFlask.InvokeRequestResponseService(inputScript).Wait(); if (AutoPunctuationFlask.IsSuccessfully) //AutoPunctuationFlask.Response.IsSuccessStatusCode) { speechRecJob.postprocoutput = RemoveRedundantPunc(AutoPunctuationFlask.ScriptWithAutoPunc); } else { WriteEventLogEntry(AutoPunctuationFlask.ApiErrorMessage, EventLogEntryType.Error); } speechRecJob.date_last_modified = DateTime.Now; db.SaveChanges(); WriteEventLogEntry("Updated speechrec_job " + speechRecJob?.id + " with output from Google.", EventLogEntryType.Information); DeleteObjectFromGoogleStorage(speechRecJob.audio_id.ToString()); } } catch (Exception ex) { HandleError("Error with poll response for speechRecJob " + speechRecJob?.id + ": ", ex); } } } catch (Exception ex) { HandleError("SpeechRecognition HandleCompleted() exception: ", ex); } }
/// <summary> /// Initiates a long-running speech recognition operation with Google for the given speechRecJob and unconvertedPath. /// </summary> /// <param name="speechRecJob"></param> /// <param name="unconvertedPath"></param> /// <param name="db"></param> public void InitiateSpeechRecProcessing(speechrec_job speechRecJob, string unconvertedPath, trackdocEntities db) { try { WriteDebugEntry("Initiating speech rec processing for speechrec_job " + speechRecJob.id + "."); string pathToUploadFrom = null; var originalFormat = Path.GetExtension(unconvertedPath).TrimStart('.').ToLower(); var objectName = Path.GetFileNameWithoutExtension(unconvertedPath); try { pathToUploadFrom = useOpus ? GetOpusPathToUploadFrom(unconvertedPath, originalFormat, objectName) : GetWavPathToUploadFrom(unconvertedPath, originalFormat, objectName); UploadFile(objectName, pathToUploadFrom); var operationName = GetOperationName(objectName); speechRecJob.operation_name = operationName; speechRecJob.status_id = EnumTable.speechrec_status.Processing; db.SaveChanges(); WriteDebugEntry("Updated speechRecJob " + speechRecJob.id + " operation_name field and set status to Processing in database."); } finally { if (pathToUploadFrom?.Contains(".speechRec.") == true && File.Exists(pathToUploadFrom)) { File.Delete(pathToUploadFrom); } } } catch (Exception ex) { speechRecJob.status_id = EnumTable.speechrec_status.Error; db.SaveChanges(); HandleError("InitiateSpeechRecProcessing exception for speechRecJob " + speechRecJob?.id + ": ", ex); } }
/// <summary> /// Returns any non-deleted speechrec_jobs with Pending status from the database. /// </summary> /// <param name="db"></param> /// <returns></returns> public IQueryable <speechrec_job> GetPendingSpeechRecJobs(trackdocEntities db) { return(db.speechrec_job.Where(s => s.delete_flag == false && s.status_id == EnumTable.speechrec_status.Pending)); }
/// <summary> /// watcher thread method. Here all the processing happens. /// </summary> public async Task ProcessTask() { try { using (var db = new trackdocEntities()) { _speechRecognition.HandleCompleted(db); var audioIdsToConvert = GetJobList(db);//.OrderByDescending(x => x).ToList(); var pendingSpeechRecognitionJobs = _speechRecognition.GetPendingSpeechRecJobs(db); var pendingSpeechRecAudioIds = _speechRecognition.GetPendingSpeechRecJobIds(pendingSpeechRecognitionJobs); var audioIdsToDownload = audioIdsToConvert.Union(pendingSpeechRecAudioIds).Distinct().OrderBy(a => a); var audioMetadatas = GetAudioMetadatas(audioIdsToDownload, db); foreach (var audioId in audioIdsToDownload) { AudioMetadata audioMetadata = null; try { audioMetadata = audioMetadatas.First(a => a.audioId == audioId); await Shared.StreamBlobToFile(connectionString, audioId, audioMetadata.UnconvertedPath, eventLog); if (audioMetadata.OriginalAudioType == "dss" || audioMetadata.OriginalAudioType == "ds2") { ConvertToWav(audioMetadata); } if (pendingSpeechRecAudioIds.Contains(audioId)) { var speechRecJob = pendingSpeechRecognitionJobs.First(s => s.audio_id == audioId); _speechRecognition.InitiateSpeechRecProcessing(speechRecJob, audioMetadata.UnconvertedPath, db); } if (audioIdsToConvert.Contains(audioId)) { ConvertAudio(audioMetadata, db); } } catch (Exception ex) { SetErrorStatus(audioId); Elmah.CreateLogEntry(ex, eventLog); WriteEventLogEntry("Processing of audio " + audioId + " failed: " + ex.ToString(), EventLogEntryType.Error); mail.SendMail("Error processing audio " + audioId + ": " + ex.Message); } finally { DeleteIfExists(audioMetadata?.UnconvertedPath); DeleteIfExists(audioMetadata?.IntermediateWavPath); DeleteIfExists(audioMetadata?.FinalPath); } } } PurgeOldAudios(); } catch (Exception ex) { Elmah.CreateLogEntry(ex, eventLog); WriteEventLogEntry(ex.ToString(), EventLogEntryType.Error); mail.SendMail("Audio Processor Service Error: " + ex.Message); } }