private async Task ReloadTrainingDataHighAvailabilityAsync(bool finalize, CancellationToken cancellationToken) { using (var trainingDataUpgradeableLock = await m_trainingDataLockAsync.EnterUpgradeableReadLockAsync(cancellationToken).ConfigureAwait(false)) { Logging.Log.Info("Reloading training data and retraining rec sources. Rec sources will remain available."); Logging.Log.InfoFormat("Memory use: {0} bytes", GC.GetTotalMemory(forceFullCollection: false)); Stopwatch totalTimer = Stopwatch.StartNew(); // Load new training data MalTrainingData newData; IDictionary <int, string> newUsernames; IDictionary <int, IList <int> > newPrereqs; using (IMalTrainingDataLoader malTrainingDataLoader = m_trainingDataLoaderFactory.GetTrainingDataLoader()) using (CancellationTokenSource faultCanceler = new CancellationTokenSource()) using (CancellationTokenSource faultOrUserCancel = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken, faultCanceler.Token)) { Stopwatch trainingDataTimer = Stopwatch.StartNew(); CancellableTask <MalTrainingData> trainingDataTask = new CancellableTask <MalTrainingData>( malTrainingDataLoader.LoadMalTrainingDataAsync(faultOrUserCancel.Token), faultCanceler); Task trainingDataTimerTask = trainingDataTask.Task.ContinueWith(task => { trainingDataTimer.Stop(); Logging.Log.InfoFormat("Training data loaded. {0} users, {1} animes, {2} entries. Took {3}.", task.Result.Users.Count, task.Result.Animes.Count, task.Result.Users.Keys.Sum(userId => task.Result.Users[userId].Entries.Count), trainingDataTimer.Elapsed); }, cancellationToken, TaskContinuationOptions.ExecuteSynchronously | TaskContinuationOptions.NotOnCanceled | TaskContinuationOptions.NotOnFaulted, TaskScheduler.Current); Stopwatch prereqsTimer = Stopwatch.StartNew(); CancellableTask <IDictionary <int, IList <int> > > prereqsTask = new CancellableTask <IDictionary <int, IList <int> > >( malTrainingDataLoader.LoadPrerequisitesAsync(faultOrUserCancel.Token), faultCanceler); Task prereqsTimerTask = prereqsTask.Task.ContinueWith(task => { prereqsTimer.Stop(); int numPrereqs = task.Result.Values.Sum(prereqList => prereqList.Count); Logging.Log.InfoFormat("Prerequisites loaded. {0} prerequisites for {1} animes. Took {2}.", numPrereqs, task.Result.Count, prereqsTimer.Elapsed); }, cancellationToken, TaskContinuationOptions.ExecuteSynchronously | TaskContinuationOptions.NotOnCanceled | TaskContinuationOptions.NotOnFaulted, TaskScheduler.Current); await AsyncUtils.WhenAllCancelOnFirstExceptionDontWaitForCancellations(trainingDataTask, prereqsTask); newData = trainingDataTask.Task.Result; newUsernames = GetUsernamesFromTrainingData(newData); newPrereqs = prereqsTask.Task.Result; await trainingDataTimerTask.ConfigureAwait(false); await prereqsTimerTask.ConfigureAwait(false); } GC.Collect(); Logging.Log.InfoFormat("Memory use: {0} bytes", GC.GetTotalMemory(forceFullCollection: false)); using (var recSourcesUpgradeableLock = await m_recSourcesLockAsync.EnterUpgradeableReadLockAsync(cancellationToken).ConfigureAwait(false)) { // clone the json rec sources without the training state and train each one with the new data. Dictionary <string, ITrainableJsonRecSource> newRecSources = new Dictionary <string, ITrainableJsonRecSource>(StringComparer.OrdinalIgnoreCase); Dictionary <string, Func <ITrainableJsonRecSource> > newRecSourceFactories = new Dictionary <string, Func <ITrainableJsonRecSource> >(m_recSourceFactories, StringComparer.OrdinalIgnoreCase); if (m_recSourceFactories.Count == 0) { Logging.Log.Info("No rec sources to retrain."); } else { Logging.Log.Info("Retraining rec sources."); object newRecSourcesLockAndMemFence = new object(); List <Task> recSourceTrainTasksList = new List <Task>(); // ToList() so we can unload a rec source as we iterate if it errors while training. foreach (string recSourceNameLoopVar in m_recSourceFactories.Keys.ToList()) { string recSourceName = recSourceNameLoopVar; // avoid capturing the loop var ITrainableJsonRecSource recSource = newRecSourceFactories[recSourceName](); Task recSourceTrainTask = Task.Run(() => { Logging.Log.InfoFormat("Retraining rec source {0} ({1}).", recSourceName, recSource); Stopwatch trainTimer = Stopwatch.StartNew(); try { recSource.Train(newData, newUsernames, cancellationToken); trainTimer.Stop(); Logging.Log.InfoFormat("Trained rec source {0} ({1}). Took {2}.", recSourceName, recSource, trainTimer.Elapsed); lock (newRecSourcesLockAndMemFence) { newRecSources[recSourceName] = recSource; } } catch (OperationCanceledException) { Logging.Log.InfoFormat("Canceled while retraining rec source {0} ({1}).", recSourceName, recSource); throw; } catch (Exception ex) { Logging.Log.ErrorFormat("Error retraining rec source {0} ({1}): {2} Unloading it.", ex, recSourceName, recSource, ex.Message); lock (newRecSourcesLockAndMemFence) { newRecSourceFactories.Remove(recSourceName); } } }, cancellationToken); recSourceTrainTasksList.Add(recSourceTrainTask); } // Wait for all to complete or cancellation. There should not be any exceptions other than OperationCanceledException. await Task.WhenAll(recSourceTrainTasksList); lock (newRecSourcesLockAndMemFence) { ; // just for the fence } } // Swap in the newly trained rec sources. using (var trainingDataWriteLock = await m_trainingDataLockAsync.UpgradeToWriteLock(cancellationToken).ConfigureAwait(false)) using (var recSourcesWriteLock = await m_recSourcesLockAsync.UpgradeToWriteLock(cancellationToken).ConfigureAwait(false)) { m_recSources = newRecSources; m_recSourceFactories = newRecSourceFactories; m_animes = newData.Animes; m_prereqs = newPrereqs; if (finalize) { m_trainingData = null; m_usernames = null; m_finalized = true; Logging.Log.Info("Finalized rec sources."); } else { m_trainingData = newData; m_usernames = newUsernames; m_finalized = false; } } } totalTimer.Stop(); Logging.Log.InfoFormat("All rec sources retrained with the latest data. Total time: {0}", totalTimer.Elapsed); } GC.Collect(); Logging.Log.InfoFormat("Memory use: {0} bytes", GC.GetTotalMemory(forceFullCollection: false)); }
public async Task LoadRecSourceAsync(Func <ITrainableJsonRecSource> recSourceFactory, string name, bool replaceExisting, CancellationToken cancellationToken) { // Acquire read lock on current list, write lock on pending list // If name already exists on current list and replaceExisting = false, throw. // If name already exists on pending list, throw. // Otherwise, add name to pending list, release locks, and proceed. using (var recSourcesReadLock = await m_recSourcesLockAsync.EnterReadLockAsync(cancellationToken).ConfigureAwait(false)) using (var pendingRecSourcesWriteLock = await m_pendingRecSourcesLockAsync.EnterWriteLockAsync(cancellationToken).ConfigureAwait(false)) { if (m_recSources.ContainsKey(name) && !replaceExisting) { throw new RecServiceErrorException(new Error(errorCode: ErrorCodes.Unknown, message: string.Format("A recommendation source with the name \"{0}\" already exists.", name))); } if (m_pendingRecSources.Contains(name)) { throw new RecServiceErrorException(new Error(errorCode: ErrorCodes.Unknown, message: string.Format("A recommendation source with the name \"{0}\" is currently being trained.", name))); } m_pendingRecSources.Add(name); } try { // Need to hold read lock on training data while training so that a retrain can't happen while we're training here. // Rec sources must be trained with the current m_trainingData, not an old version. using (var trainingDataReadLock = await m_trainingDataLockAsync.EnterReadLockAsync(cancellationToken).ConfigureAwait(false)) { if (m_trainingData == null && !m_finalized) { throw new RecServiceErrorException(new Error(errorCode: ErrorCodes.NoTrainingData, message: "A reload/retrain in low memory mode failed, leaving the rec service without training data or rec sources. Issue a ReloadTrainingData command to load training data, then load rec sources.")); } else if (m_trainingData == null && m_finalized) { throw new RecServiceErrorException(new Error(errorCode: ErrorCodes.Finalized, message: "Rec sources have been finalized. A non-finalized retrain must be invoked to be able to add rec sources.")); } ITrainableJsonRecSource recSource = recSourceFactory(); Logging.Log.InfoFormat("Training rec source with name \"{0}\", replaceExisting={1}: {2}", name, replaceExisting, recSource); Stopwatch timer = Stopwatch.StartNew(); recSource.Train(m_trainingData, m_usernames, cancellationToken); timer.Stop(); Logging.Log.InfoFormat("Trained rec source {0}. Took {1}", name, timer.Elapsed); using (var recSourcesWriteLock = await m_recSourcesLockAsync.EnterWriteLockAsync(cancellationToken).ConfigureAwait(false)) using (var pendingRecSourcesWriteLock = await m_pendingRecSourcesLockAsync.EnterWriteLockAsync(cancellationToken).ConfigureAwait(false)) { m_recSources[name] = recSource; m_recSourceFactories[name] = recSourceFactory; m_pendingRecSources.Remove(name); } } } catch (Exception) { m_pendingRecSources.Remove(name); throw; } Logging.Log.InfoFormat("Loaded rec source {0}.", name); GC.Collect(); Logging.Log.InfoFormat("Memory use: {0} bytes", GC.GetTotalMemory(forceFullCollection: false)); }