public MetaStruct VerifyAndSave(TrainingResultPackage pResults) { //check for completeness if (!pResults.IsValidResult()) { NotifyNewLogMessageEvent($"[ERROR] Attempting to write incomplete results for {pResults.InWorkPackage.GUID.ToString()}"); return(null); } //check if already exist if (_filesystemManager.CheckIfFileExists(pResults.InWorkPackage.Version, pResults.InWorkPackage.GUID.ToString())) { NotifyNewLogMessageEvent($"[ERROR] Attempting to write duplicate results for {pResults.InWorkPackage.GUID.ToString()}"); return(null); } try { return(_filesystemManager.WriteResultsToFilesystem(pResults)); } catch (Exception ex) { NotifyNewLogMessageEvent($"[ERROR] {ex.Message}"); return(null); } }
private void SendTrainingResults(TrainingResultPackage pResults) { if (pResults == null) { return; } try { NotifyLogMessageEvent($"Send training results"); _connection.InvokeAsync("SendTrainingResults", pResults).ContinueWith(t => NotifyLogMessageEvent($"Results sent."), TaskContinuationOptions.OnlyOnRanToCompletion); } catch (Exception e) { NotifyLogMessageEvent($"SendTrainingResults failed: {e.Message}"); } }
public MetaStruct WriteResultsToFilesystem(TrainingResultPackage pResults) { //write transmitted files var modelsdir = System.IO.Path.Combine(GetVersionModelsDirectory(pResults.InWorkPackage.Version).FullName, pResults.InWorkPackage.GUID.ToString()); CreateIfNotExists(modelsdir); foreach (var file in pResults.ResultFiles) { var filepath = new FileInfo(System.IO.Path.Combine(modelsdir, file.DirectoryStructure, file.Filename)); CreateIfNotExists(filepath.Directory.FullName); System.IO.File.WriteAllBytes(filepath.FullName, file.FileData); } //write meta file MetaStruct metaData = new MetaStruct { Duration = pResults.DurationTime, Epochs = pResults.ClientStatusAtEnd.CurrentEpoch, LastEpochDuration = pResults.ClientStatusAtEnd.LastEpochDuration, NodeName = pResults.MachineData.Name, OS = pResults.MachineData.OperatingSystem.ToString(), Version = pResults.MachineData.ContainerVersion, ProcessingUnit = pResults.MachineData.ProcessingUnit.ToString(), Timestamp = pResults.FinishTimestamp, }; using (var filestream = new FileStream(System.IO.Path.Combine(modelsdir, "meta.xml"), FileMode.CreateNew)) { var xmlserializer = new XmlSerializer(typeof(MetaStruct)); xmlserializer.Serialize(filestream, metaData); filestream.Close(); } //write logfile System.IO.File.WriteAllLines(System.IO.Path.Combine(modelsdir, "console.log"), pResults.OutLog); NotifyNewLogMessageEvent($"Saved results to file for {pResults.InWorkPackage.GUID}"); return(metaData); }
public void SendTrainingResults(TrainingResultPackage pResults) { //debug NewLogMessageEvent?.Invoke($"Training files received from {this.Context?.ConnectionId} with {pResults.ResultFiles.Sum(t => t.FileData.Length)} bytes in {pResults.ResultFiles.Count} files"); TrainingResultsReceivedEvent?.Invoke(pResults, this.Context?.ConnectionId); }
private void SignalrmanagerOnTrainingResultsReceivedEvent(TrainingResultPackage pResults, string pClientID) { _commandManager.ReceiveTrainingResults(pResults); }
public static TrainingResultPackage RunTraining(DirectoryInfo pWorkingDirectory, WorkPackage pWorkPackage, ref ClientStatus pClientStatus, Action <ClientStatus> pSendStatusUpdate, Action <String> pNotifyLogMessageEvent, MachineData pMachine) { //init client status pClientStatus.IsWorking = true; pClientStatus.CurrentEpoch = 0; pClientStatus.LastEpochDuration = "none"; pClientStatus.CurrentWorkParameters = pWorkPackage.Version.TrainingCommands.First().Parameters; pSendStatusUpdate(pClientStatus); //create result package var trainingResultPackage = new TrainingResultPackage() { InWorkPackage = pWorkPackage, MachineData = Machine.Machine.GetMachineData() }; //run process pNotifyLogMessageEvent("[Log] Create worker process."); DateTime startTime = DateTime.UtcNow; String errorMessage = String.Empty; foreach (var command in pWorkPackage.Version.TrainingCommands) { pNotifyLogMessageEvent($"[Log] Create process for: {command.FileName} {command.Arguments} in {pWorkingDirectory.FullName}"); var startInfo = new ProcessStartInfo() { FileName = command.FileName, WorkingDirectory = pWorkingDirectory.FullName, Arguments = command.Arguments, RedirectStandardOutput = true, RedirectStandardInput = false, RedirectStandardError = true, UseShellExecute = false }; //set env if (pMachine.ProcessingUnit == Conductor_Shared.Enums.ProcessingUnitEnum.CPU) { startInfo.Environment.Add("CONDUCTOR_TYPE", "cpu"); Console.WriteLine($"process env CONDUCTOR_TYPE set to 'cpu'"); } else if (pMachine.ProcessingUnit == Conductor_Shared.Enums.ProcessingUnitEnum.GPU) { startInfo.Environment.Add("CONDUCTOR_TYPE", "gpu"); Console.WriteLine($"process env CONDUCTOR_TYPE set to 'gpu'"); } try { using (var process = Process.Start(startInfo)) { process.OutputDataReceived += (sender, args) => { //intercept out stream for prediction in log if (args.Data != null) { if (Regex.IsMatch(args.Data, @"loss: nan - val_loss: nan")) { errorMessage = "NaN found in training output"; } else if (Regex.IsMatch(args.Data, @"utility.exceptions.ConductorError")) { //e.g. utility.exceptions.ConductorError: __EncodePrediction() takes 6 positional arguments but 7 were given errorMessage = "Error found in training output"; } if (!String.IsNullOrWhiteSpace(errorMessage)) { pNotifyLogMessageEvent($"{errorMessage}. Aborting!: {args.Data}"); process.Kill(); } } pNotifyLogMessageEvent(args.Data); }; process.ErrorDataReceived += (sender, args) => pNotifyLogMessageEvent(args.Data); pNotifyLogMessageEvent($"[Log] Starting process for: {command.FileName} {command.Arguments}"); process.BeginOutputReadLine(); process.BeginErrorReadLine(); process.WaitForExit(); pNotifyLogMessageEvent($"[Log] Finished process for: {command.FileName} {command.Arguments}"); } } catch (Exception) { throw; } } pNotifyLogMessageEvent("[Log] Process finished."); if (!String.IsNullOrWhiteSpace(errorMessage)) { throw new Exception(errorMessage); } trainingResultPackage.ClientStatusAtEnd = pClientStatus; trainingResultPackage.DurationTime = DateTime.UtcNow - startTime; trainingResultPackage.FinishTimestamp = DateTime.UtcNow; //get results return(trainingResultPackage); }