private async Task <List <Media> > GetBoxPlaylist(Playlist playlist, CTDbContext _context)
                var client = await _box.GetBoxClientAsync();

                /// Try to refresh the access token
                var folderInfo = await client.FoldersManager.GetInformationAsync(playlist.PlaylistIdentifier);

                playlist.JsonMetadata = JObject.FromObject(folderInfo);

                var items = (await client.FoldersManager.GetFolderItemsAsync(playlist.PlaylistIdentifier, 500)).Entries.OfType <BoxFile>();
                // Process only files with an mp4 extension.
                items = items.Where(i => i.Name.Substring(i.Name.LastIndexOf(".") + 1) == "mp4").ToList();
                List <Media> newMedia = new List <Media>();

                foreach (var item in items)
                    var file = await client.FilesManager.GetInformationAsync(item.Id);

                    // Check if there is a valid file.Id, and for the same playlist the same media does not exist.
                    if (file.Id.Length > 0 &&
                        !await _context.Medias.Where(m => m.UniqueMediaIdentifier == file.Id &&
                                                     m.SourceType == playlist.SourceType &&
                                                     m.PlaylistId == playlist.Id).AnyAsync())
                        newMedia.Add(new Media
                            SourceType            = playlist.SourceType,
                            PlaylistId            = playlist.Id,
                            UniqueMediaIdentifier = file.Id,
                            JsonMetadata          = JObject.FromObject(file),
                            CreatedAt             = file.CreatedAt ?? DateTime.Now
                newMedia.ForEach(m => m.Name = GetMediaName(m));
                await _context.Medias.AddRangeAsync(newMedia);

                await _context.SaveChangesAsync();

            catch (Box.V2.Exceptions.BoxSessionInvalidatedException e)
                GetLogger().LogError(e, "Box Token Failure.");
                await _slack.PostErrorAsync(e, "Box Token Failure.");

        public async Task <Video> DownloadBoxVideo(Media media)
                var guid    = Guid.NewGuid().ToString();
                var newPath = Path.Combine(Globals.appSettings.DATA_DIRECTORY, guid + ".mp4");
                var client  = await _box.GetBoxClientAsync();

                var stream = await client.FilesManager.DownloadAsync(media.UniqueMediaIdentifier);

                using (var fileStream = File.Create(newPath))
                if (FileRecord.IsValidFile(newPath))
                    Video video = new Video
                        Video1 = await FileRecord.GetNewFileRecordAsync(newPath, Path.GetExtension(newPath))
                    // Deleting media is fine if download failed as we can get it back from the youtube playlist.
                    GetLogger().LogError("DownloadBoxVideo failed. mediaId {0}, removing Media record", media.Id);
                    using (var context = CTDbContext.CreateDbContext())
            catch (Box.V2.Exceptions.BoxSessionInvalidatedException e)
                GetLogger().LogError(e, "Box Token Failure.");
                await _slack.PostErrorAsync(e, "Box Token Failure.");

        private async Task <MSTResult> performRecognitionAsync(string logId, string filePath, SpeechTranslationConfig speechConfig, TimeSpan restartOffset,
                                                               string sourceLanguage, Dictionary <string, List <Caption> > captions, Dictionary <string, TimeSpan> startAfterMap)
            using (var audioInput = WavHelper.OpenWavFile(filePath))
                var      logOnce            = new HashSet <string>();
                var      stopRecognition    = new TaskCompletionSource <int>();
                bool     verboseLogging     = false;
                TimeSpan lastSuccessfulTime = TimeSpan.Zero;
                string   errorCode          = "";
                using (var recognizer = new TranslationRecognizer(speechConfig, audioInput))
                    recognizer.Recognized += (s, e) =>
                        if (e.Result.Reason == ResultReason.TranslatedSpeech)
                            JObject jObject           = JObject.Parse(e.Result.Properties.GetProperty(PropertyId.SpeechServiceResponse_JsonResult));
                            var     wordLevelCaptions = jObject["Words"]
                                                        .ToObject <List <MSTWord> >()
                                                        .OrderBy(w => w.Offset)

                            if (e.Result.Text == "" && wordLevelCaptions.Count == 0)
                                if (verboseLogging)
                                    TimeSpan _offset = new TimeSpan(e.Result.OffsetInTicks);
                                    TimeSpan _end    = e.Result.Duration.Add(_offset);
                                    _logger.LogInformation($"{logId}: Empty String: Begin={_offset.Minutes}:{_offset.Seconds},{_offset.Milliseconds}, End={_end.Minutes}:{_end.Seconds},{_end.Milliseconds}");

                            if (wordLevelCaptions.Any())
                                // TODO/TOREVIEW: Is this a bug fix or redefinition? Could this change in later versions of the SDK?
                                long offsetDifference = e.Result.OffsetInTicks - wordLevelCaptions.FirstOrDefault().Offset;

                                wordLevelCaptions.ForEach(w => w.Offset += offsetDifference);

                            var sentenceLevelCaptions = MSTWord.WordLevelTimingsToSentenceLevelTimings(e.Result.Text, wordLevelCaptions);

                            // Convert back to time in original untrimmed video.
                            // These timings are used to check if we should be adding any captions
                            // However they are then used direcly for sentence level translations
                            // but not for the word-level timings of the primary language
                            TimeSpan begin = (new TimeSpan(e.Result.OffsetInTicks)).Add(restartOffset);
                            TimeSpan end   = e.Result.Duration.Add(begin);

                            if (verboseLogging)
                                _logger.LogInformation($"{logId}: Begin={begin.Minutes}:{begin.Seconds},{begin.Milliseconds}", begin);
                                _logger.LogInformation($"{logId}: End={end.Minutes}:{end.Seconds},{end.Milliseconds}");
                            // TODO/TOREVIEW:
                            // ToCaptionEntitiesWithWordTiming vs ToCaptionEntitiesInterpolate
                            // Can this code be simplified to use a single function?
                            // Also: Caution - it is possible that word timing data from MS may depend on SDK version

                            var newCaptions = MSTWord.ToCaptionEntitiesWithWordTiming(captions[sourceLanguage].Count, restartOffset, sentenceLevelCaptions);

                            if (begin >= startAfterMap[sourceLanguage])
                                if (logOnce.Add("AddingMain"))
                                    _logger.LogInformation($"{logId}: Adding Primary Language captions");
                                if (logOnce.Add("SkippingMain"))
                                    _logger.LogInformation($"{logId}: Skipping Main captions because {begin} < {startAfterMap[sourceLanguage]}");
                            foreach (var element in e.Result.Translations)
                                var language   = element.Key;
                                var startAfter = startAfterMap[language];
                                if (begin >= startAfter)
                                    // Translations dont have word level timing so
                                    // interpolate between start and end
                                    newCaptions = Caption.ToCaptionEntitiesInterpolate(captions[language].Count, begin, end, element.Value);

                                    if (logOnce.Add($"AddingTranslated {language}"))
                                        _logger.LogInformation($"{logId}: Adding translation ({language}) captions");
                                    if (logOnce.Add($"SkippingTranslated {language}"))
                                        _logger.LogInformation($"{logId}: Skipping ({language}) captions because {begin} < {startAfter}");
                        else if (e.Result.Reason == ResultReason.NoMatch)
                            _logger.LogInformation($"{logId}: NOMATCH: Speech could not be recognized.");

                    recognizer.Canceled += (s, e) =>
                        errorCode = e.ErrorCode.ToString();
                        _logger.LogInformation($"{logId}: CANCELED: ErrorCode={e.ErrorCode} Reason={e.Reason}");

                        if (e.Reason == CancellationReason.Error)
                            _logger.LogError($"{logId}: CANCELED: ErrorCode={e.ErrorCode} Reason={e.Reason}");

                            if (e.ErrorCode == CancellationErrorCode.ServiceTimeout ||
                                e.ErrorCode == CancellationErrorCode.ServiceUnavailable ||
                                e.ErrorCode == CancellationErrorCode.ConnectionFailure)
                                TimeSpan lastTime = TimeSpan.Zero;
                                if (captions.Count != 0)
                                    var lastCaption = captions[sourceLanguage].OrderBy(c => c.End).TakeLast(1).ToList().First();
                                    lastTime = lastCaption.End;

                                _logger.LogInformation($"{logId}: Retrying, LastSuccessTime={lastTime}");
                                lastSuccessfulTime = lastTime;
                            else if (e.ErrorCode != CancellationErrorCode.NoError)
                                _logger.LogInformation($"{logId}: CANCELED: ErrorCode={e.ErrorCode} Reason={e.Reason}");
                                _slackLogger.PostErrorAsync(new Exception($"{logId}: Transcription Failure"),
                                                            "Transcription Failure").GetAwaiter().GetResult();


                    recognizer.SessionStarted += (s, e) =>
                        _logger.LogInformation($"{logId}: Session started event.");

                    recognizer.SessionStopped += (s, e) =>
                        _logger.LogInformation($"{logId}: Session stopped event. Stopping recognition.");

                    // Starts continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition.
                    await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);

                    // Waits for completion.
                    // Use Task.WaitAny to keep the task rooted.
                    Task.WaitAny(new[] { stopRecognition.Task });

                    // Stops recognition.
                    await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);

                    _logger.LogInformation($"{logId}: Returning {captions.Count} languages, ErrorCode = {errorCode}, LastSuccessTime = {lastSuccessfulTime}");

                    return(new MSTResult
                        Captions = captions,
                        ErrorCode = errorCode,
                        LastSuccessTime = lastSuccessfulTime