/// <summary> /// 推論履歴コンテナを削除し、ステータスを変更する。 /// </summary> /// <param name="inferenceHistory">対象学習履歴</param> /// <param name="status">変更後のステータス</param> /// <param name="force">他テナントに対する変更を許可するか</param> public async Task ExitAsync(InferenceHistory inferenceHistory, ContainerStatus status, bool force) { // コンテナの生存確認 if (inferenceHistory.GetStatus().Exist()) { var info = await clusterManagementLogic.GetContainerDetailsInfoAsync(inferenceHistory.Key, CurrentUserInfo.SelectedTenant.Name, force); // コンテナ削除の前に、DBの更新を先に実行 await inferenceHistoryRepository.UpdateStatusAsync(inferenceHistory.Id, status, info.CreatedAt, DateTime.Now, force); // 実コンテナ削除の結果は確認せず、DBの更新を先に確定する(コンテナがいないなら、そのまま消しても問題ない想定) unitOfWork.Commit(); if (info.Status.Exist()) { // 再確認してもまだ存在していたら、コンテナ削除 await clusterManagementLogic.DeleteContainerAsync( ContainerType.Training, inferenceHistory.Key, CurrentUserInfo.SelectedTenant.Name, force); } } else { await inferenceHistoryRepository.UpdateStatusAsync(inferenceHistory.Id, status, force); // DBの更新を確定する unitOfWork.Commit(); } }
/// <summary> /// コンストラクタ /// </summary> /// <param name="history">推論履歴</param> public InferenceDetailsOutputModel(InferenceHistory history) : base(history) { Key = history.Key; Options = new List <KeyValuePair <string, string> >(); GitModel = new GitCommitOutputModel() { GitId = history.ModelGitId, Repository = history.ModelRepository, Owner = history.ModelRepositoryOwner, Branch = history.ModelBranch, CommitId = history.ModelCommitId }; ContainerImage = new ContainerImageOutputModel() { RegistryId = history.ContainerRegistryId.Value, RegistryName = history.ContainerRegistry.Name, Image = history.ContainerImage, Tag = history.ContainerTag }; CompletedAt = history.CompletedAt?.ToFormatedString(); StartedAt = history.StartedAt?.ToFormatedString(); LogSummary = history.LogSummary; if (history.ParentMaps != null && history.ParentMaps.Count > 0) { List <IndexOutputModel> parents = new List <IndexOutputModel>(); foreach (InferenceHistoryParentMap parentMap in history.ParentMaps) { parents.Add(new IndexOutputModel(parentMap.Parent)); } Parents = parents; } if (history.ParentInferenceMaps != null && history.ParentInferenceMaps.Count > 0) { List <InferenceIndexOutputModel> parentInferences = new List <InferenceIndexOutputModel>(); foreach (InferenceHistoryParentInferenceMap parentInferenceMap in history.ParentInferenceMaps) { parentInferences.Add(new InferenceIndexOutputModel(parentInferenceMap.Parent)); } ParentInferences = parentInferences; } Node = history.Node; EntryPoint = history.EntryPoint; Cpu = history.Cpu; Memory = history.Memory; Gpu = history.Gpu; Partition = history.Partition; Zip = history.Zip; LocalDataSet = history.LocalDataSet; foreach (var option in history.GetOptionDic()) { Options.Add(new KeyValuePair <string, string>(option.Key, option.Value)); } }
public InferenceIndexOutputModel(InferenceHistory history) : base(history) { DataSet = new DataSetApiModels.IndexOutputModel(history.DataSet); EntryPoint = history.EntryPoint; if (history.Parent != null) { ParentName = history.Parent.Name; } }
public InferenceSimpleOutputModel(InferenceHistory history) : base(history) { Id = history.Id; DisplayId = history.DisplayId; Name = history.Name; Memo = history.Memo; Status = history.GetStatus().ToString(); FullName = $"{Id}:{Name}"; Favorite = history.Favorite; }
/// <summary> /// コンストラクタ /// </summary> /// <param name="history">推論履歴</param> public InferenceIndexOutputModel(InferenceHistory history) : base(history) { DataSet = new DataSetApiModels.IndexOutputModel(history.DataSet); EntryPoint = history.EntryPoint; if (history.ParentMaps != null && history.ParentMaps.Count > 0) { List <string> parentFullNameList = new List <string>(); foreach (InferenceHistoryParentMap parentMap in history.ParentMaps) { parentFullNameList.Add($"{parentMap.Parent.Id}:{parentMap.Parent.Name}"); } ParentFullNameList = parentFullNameList; } }
public InferenceDetailsOutputModel(InferenceHistory history) : base(history) { Key = history.Key; Options = new List <KeyValuePair <string, string> >(); GitModel = new GitCommitOutputModel() { GitId = history.ModelGitId, Repository = history.ModelRepository, Owner = history.ModelRepositoryOwner, Branch = history.ModelBranch, CommitId = history.ModelCommitId }; ContainerImage = new ContainerImageOutputModel() { RegistryId = history.ContainerRegistryId.Value, RegistryName = history.ContainerRegistry.Name, Image = history.ContainerImage, Tag = history.ContainerTag }; CompletedAt = history.CompletedAt?.ToFormatedString(); StartedAt = history.StartedAt?.ToFormatedString(); LogSummary = history.LogSummary; Parent = history.Parent == null ? null : new SimpleOutputModel(history.Parent); Node = history.Node; EntryPoint = history.EntryPoint; Cpu = history.Cpu; Memory = history.Memory; Gpu = history.Gpu; Partition = history.Partition; Zip = history.Zip; foreach (var option in history.GetOptionDic()) { Options.Add(new KeyValuePair <string, string>(option.Key, option.Value)); } }
/// <summary> /// ノートブック履歴IDに親推論履歴IDを紐づける /// </summary> /// <param name="notebookHistory">ノートブック履歴</param> /// <param name="parentInference">親推論履歴</param> public NotebookHistoryParentInferenceMap AttachParentInferenceToNotebookAsync(NotebookHistory notebookHistory, InferenceHistory parentInference) { if (parentInference == null) { //指定がなければ何もしない return(null); } NotebookHistoryParentInferenceMap map = new NotebookHistoryParentInferenceMap() { NotebookHistoryId = notebookHistory.Id, ParentId = parentInference.Id }; AddModel(map); return(map); }
/// <summary> /// 新規に画像認識の推論用コンテナを作成する。 /// </summary> /// <param name="inferenceHistory">対象の推論履歴</param> /// <returns>作成したコンテナのステータス</returns> public async Task <Result <ContainerInfo, string> > RunInferenceContainerAsync(InferenceHistory inferenceHistory) { string token = await GetUserAccessTokenAsync(); if (token == null) { //トークンがない場合、結果はnull return(Result <ContainerInfo, string> .CreateErrorResult("Access denied. Failed to get token to access the cluster management system.")); } long gitId = inferenceHistory.ModelGitId == -1 ? CurrentUserInfo.SelectedTenant.DefaultGitId.Value : inferenceHistory.ModelGitId.Value; var registryMap = registryLogic.GetCurrentRegistryMap(inferenceHistory.ContainerRegistryId.Value); var gitEndpoint = gitLogic.GetPullUrl(gitId, inferenceHistory.ModelRepository, inferenceHistory.ModelRepositoryOwner); if (gitEndpoint == null) { //Git情報は必須にしているので、無ければエラー return(Result <ContainerInfo, string> .CreateErrorResult("Git credential is not valid.")); } var nodes = GetAccessibleNode(); if (nodes == null || nodes.Count == 0) { //デプロイ可能なノードがゼロなら、エラー扱い return(Result <ContainerInfo, string> .CreateErrorResult("Access denied. There is no node this tenant can use.")); } //コンテナを起動するために必要な設定値をインスタンス化 var inputModel = new RunContainerInputModel() { ID = inferenceHistory.Id, TenantName = TenantName, LoginUser = CurrentUserInfo.Alias, //アカウントはエイリアスから指定 Name = inferenceHistory.Key, ContainerImage = registryMap.Registry.GetImagePath(inferenceHistory.ContainerImage, inferenceHistory.ContainerTag), ScriptType = "inference", Cpu = inferenceHistory.Cpu, Memory = inferenceHistory.Memory, Gpu = inferenceHistory.Gpu, KqiImage = "kamonohashi/cli:" + versionLogic.GetVersion(), KqiToken = loginLogic.GenerateToken().AccessToken, LogPath = "/kqi/attach/inference_stdout_stderr_${INFERENCE_ID}.log", NfsVolumeMounts = new List <NfsVolumeMountModel>() { // 結果保存するディレクトリ new NfsVolumeMountModel() { Name = "nfs-output", MountPath = "/kqi/output", SubPath = inferenceHistory.Id.ToString(), Server = CurrentUserInfo.SelectedTenant.Storage.NfsServer, ServerPath = CurrentUserInfo.SelectedTenant.InferenceContainerOutputNfsPath, ReadOnly = false }, // 添付ファイルを保存するディレクトリ // 学習結果ディレクトリを学習完了時にzip圧縮して添付するために使用 new NfsVolumeMountModel() { Name = "nfs-attach", MountPath = "/kqi/attach", SubPath = inferenceHistory.Id.ToString(), Server = CurrentUserInfo.SelectedTenant.Storage.NfsServer, ServerPath = CurrentUserInfo.SelectedTenant.InferenceContainerAttachedNfsPath, ReadOnly = false } }, ContainerSharedPath = new Dictionary <string, string>() { { "tmp", "/kqi/tmp/" }, { "input", "/kqi/input/" }, { "git", "/kqi/git/" } }, EnvList = new Dictionary <string, string>() { { "DATASET_ID", inferenceHistory.DataSetId.ToString() }, { "INFERENCE_ID", inferenceHistory.Id.ToString() }, { "PARENT_ID", inferenceHistory.ParentId?.ToString() }, { "MODEL_REPOSITORY", gitEndpoint.FullUrl }, { "MODEL_REPOSITORY_URL", gitEndpoint.Url }, { "MODEL_REPOSITORY_TOKEN", gitEndpoint.Token }, { "COMMIT_ID", inferenceHistory.ModelCommitId }, { "KQI_SERVER", containerOptions.WebServerUrl }, { "KQI_TOKEN", loginLogic.GenerateToken().AccessToken }, { "http_proxy", containerOptions.Proxy }, { "https_proxy", containerOptions.Proxy }, { "no_proxy", containerOptions.NoProxy }, { "HTTP_PROXY", containerOptions.Proxy }, { "HTTPS_PROXY", containerOptions.Proxy }, { "NO_PROXY", containerOptions.NoProxy }, { "COLUMNS", containerOptions.ShellColumns }, { "PYTHONUNBUFFERED", "true" }, // python実行時の標準出力・エラーのバッファリングをなくす { "LC_ALL", "C.UTF-8" }, // python実行時のエラー回避 { "LANG", "C.UTF-8" } // python実行時のエラー回避 }, EntryPoint = inferenceHistory.EntryPoint, PortMappings = new PortMappingModel[] { new PortMappingModel() { Protocol = "TCP", Port = 22, TargetPort = 22, Name = "ssh" }, }, ClusterManagerToken = token, RegistryTokenName = registryMap.RegistryTokenKey, IsNodePort = true }; // 親を指定した場合は親の出力結果を/kqi/parentにマウント // 推論ジョブにおける親ジョブは学習ジョブとなるので、SubPathとServerPathの指定に注意 if (inferenceHistory.ParentId != null) { inputModel.NfsVolumeMounts.Add(new NfsVolumeMountModel() { Name = "nfs-parent", MountPath = "/kqi/parent", SubPath = inferenceHistory.ParentId.ToString(), Server = CurrentUserInfo.SelectedTenant.Storage.NfsServer, ServerPath = CurrentUserInfo.SelectedTenant.TrainingContainerOutputNfsPath, ReadOnly = true }); } // ユーザの任意追加環境変数をマージする AddUserEnvToInputModel(inferenceHistory.OptionDic, inputModel); //使用できるノードを制約に追加 inputModel.ConstraintList = new Dictionary <string, List <string> >() { { containerOptions.ContainerLabelHostName, nodes } }; if (string.IsNullOrEmpty(inferenceHistory.Partition) == false) { // パーティション指定があれば追加 inputModel.ConstraintList.Add(containerOptions.ContainerLabelPartition, new List <string> { inferenceHistory.Partition }); } var outModel = await clusterManagementService.RunContainerAsync(inputModel); if (outModel.IsSuccess == false) { return(Result <ContainerInfo, string> .CreateErrorResult(outModel.Error)); } var port = outModel.Value.PortMappings.Find(p => p.Name == "ssh"); return(Result <ContainerInfo, string> .CreateResult(new ContainerInfo() { Name = outModel.Value.Name, Status = outModel.Value.Status, Host = outModel.Value.Host, Port = port.NodePort, Configuration = outModel.Value.Configuration })); }
public async Task <IActionResult> Create([FromBody] CreateInputModel model, [FromServices] INodeRepository nodeRepository) { //データの入力チェック if (!ModelState.IsValid) { return(JsonBadRequest("Invalid inputs.")); } //データの存在チェック var dataSet = await dataSetRepository.GetByIdAsync(model.DataSetId.Value); if (dataSet == null) { return(JsonNotFound($"DataSet ID {model.DataSetId} is not found.")); } if (model.ParentId.HasValue) { var parent = await trainingHistoryRepository.GetByIdAsync(model.ParentId.Value); if (parent == null) { return(JsonNotFound($"Training ID {model.ParentId.Value} is not found.")); } } if (string.IsNullOrEmpty(model.Partition) == false) { bool existPartition = await nodeRepository.IsEnablePartitionAsync(model.Partition, true); if (existPartition == false) { return(JsonNotFound($"There are no enable nodes with Partition {model.Partition}.")); } } //同じ名前のコンテナは実行できないので、確認する var currentStatus = await clusterManagementLogic.GetContainerStatusAsync(model.Name, CurrentUserInfo.SelectedTenant.Name, false); if (currentStatus.Exist()) { if (currentStatus.IsError()) { return(JsonConflict($"Failed to check cluster status. Please contact your server administrator.")); } return(JsonConflict($"Container {model.Name} already exists: status {currentStatus}")); } long? gitId = model.GitModel.GitId ?? CurrentUserInfo.SelectedTenant.DefaultGit?.Id; string branch = model.GitModel.Branch ?? "master"; string commitId = model.GitModel.CommitId; //コミットIDが指定されていなければ、ブランチのHEADからコミットIDを取得する if (string.IsNullOrEmpty(commitId)) { commitId = await gitLogic.GetCommitIdAsync(gitId.Value, model.GitModel.Repository, model.GitModel.Owner, branch); if (string.IsNullOrEmpty(commitId)) { //コミットIDが特定できなかったらエラー return(JsonNotFound($"The branch {branch} for {gitId.Value}/{model.GitModel.Owner}/{model.GitModel.Repository} is not found.")); } } //コンテナの実行前に、推論履歴を作成する(コンテナの実行に失敗した場合、そのステータスをユーザに表示するため) var inferenceHistory = new InferenceHistory() { Name = model.Name, DisplayId = -1, ContainerRegistryId = model.ContainerImage.RegistryId ?? CurrentUserInfo.SelectedTenant.DefaultRegistry?.Id, ContainerImage = model.ContainerImage.Image, ContainerTag = model.ContainerImage.Tag, //latestは運用上使用されていないハズなので、そのまま直接代入 DataSetId = model.DataSetId.Value, EntryPoint = model.EntryPoint, ModelGitId = gitId, ModelRepository = model.GitModel.Repository, ModelRepositoryOwner = model.GitModel.Owner, ModelBranch = branch, ModelCommitId = commitId, OptionDic = model.Options ?? new Dictionary <string, string>(), //オプションはnullの可能性があるので、その時は初期化 ParentId = model.ParentId, Memo = model.Memo, Cpu = model.Cpu.Value, Memory = model.Memory.Value, Gpu = model.Gpu.Value, Partition = model.Partition, Status = ContainerStatus.Running.Key }; if (inferenceHistory.OptionDic.ContainsKey("")) //空文字は除外する { inferenceHistory.OptionDic.Remove(""); } inferenceHistoryRepository.Add(inferenceHistory); if (dataSet.IsLocked == false) { dataSet.IsLocked = true; } unitOfWork.Commit(); var result = await clusterManagementLogic.RunInferenceContainerAsync(inferenceHistory); if (result.IsSuccess == false) { //コンテナの起動に失敗した状態。エラーを出力して、保存した推論履歴も削除する。 inferenceHistoryRepository.Delete(inferenceHistory); unitOfWork.Commit(); return(JsonError(HttpStatusCode.ServiceUnavailable, "Failed to run training. The message bellow may be help to resolve: " + result.Error)); } //結果に従い、推論結果を更新する。 //実行には時間がかかりうるので、DBから最新の情報を取ってくる inferenceHistory = await inferenceHistoryRepository.GetByIdAsync(inferenceHistory.Id); inferenceHistory.Configuration = result.Value.Configuration; inferenceHistory.Status = result.Value.Status.Key; unitOfWork.Commit(); if (result.Value.Status.Succeed()) { return(JsonCreated(new InferenceSimpleOutputModel(inferenceHistory))); } else { return(JsonError(HttpStatusCode.ServiceUnavailable, $"Failed to run training. Status={result.Value.Status.Name}. Please contact your server administrator.")); } }
/// <summary> /// ステータスを更新して、出力モデルに変換する /// </summary> private async Task <InferenceIndexOutputModel> GetUpdatedIndexOutputModelAsync(InferenceHistory history) { var model = new InferenceIndexOutputModel(history); var status = history.GetStatus(); if (status.Exist()) { //推論がまだ進行中の場合、情報を更新する var newStatus = await clusterManagementLogic.GetContainerStatusAsync(history.Key, CurrentUserInfo.SelectedTenant.Name, false); if (status.Key != newStatus.Key) { //更新があったので、変更処理 await inferenceHistoryRepository.UpdateStatusAsync(history.Id, newStatus, false); unitOfWork.Commit(); model.Status = newStatus.Name; } } // storageへの出力値があれば取得し、modelに格納 var outputFileName = "value.txt"; //値を読み込むファイル名 var outputPath = history.Id + "/" + outputFileName; var content = await storageLogic.GetFileContentAsync(ResourceType.InferenceContainerOutputFiles, outputPath, outputFileName, true); if (content != null) { model.OutputValue = content; } return(model); }
public async Task <IActionResult> Create([FromBody] CreateInputModel model) { //データの入力チェック if (!ModelState.IsValid) { return(JsonBadRequest("Invalid inputs.")); } //データの存在チェック var dataSet = await dataSetRepository.GetByIdAsync(model.DataSetId.Value); if (dataSet == null) { return(JsonNotFound($"DataSet ID {model.DataSetId} is not found.")); } if (string.IsNullOrEmpty(model.Partition) == false) { bool existPartition = await nodeRepository.IsEnablePartitionAsync(model.Partition, true); if (existPartition == false) { return(JsonNotFound($"There are no enable nodes with Partition {model.Partition}.")); } } // 環境変数名のチェック if (model.Options != null && model.Options.Count > 0) { foreach (var env in model.Options) { if (!string.IsNullOrEmpty(env.Key)) { // フォーマットチェック if (!Regex.IsMatch(env.Key, "^[-._a-zA-Z][-._a-zA-Z0-9]*$")) { return(JsonNotFound($"Invalid envName. Please match the format of '^[-._a-zA-Z][-._a-zA-Z0-9]*$'.")); } } } } long? gitId = model.GitModel.GitId ?? CurrentUserInfo.SelectedTenant.DefaultGit?.Id; string branch = model.GitModel.Branch ?? "master"; string commitId = model.GitModel.CommitId; //コミットIDが指定されていなければ、ブランチのHEADからコミットIDを取得する if (string.IsNullOrEmpty(commitId)) { commitId = await gitLogic.GetCommitIdAsync(gitId.Value, model.GitModel.Repository, model.GitModel.Owner, branch); if (string.IsNullOrEmpty(commitId)) { //コミットIDが特定できなかったらエラー return(JsonNotFound($"The branch {branch} for {gitId.Value}/{model.GitModel.Owner}/{model.GitModel.Repository} is not found.")); } } // 各リソースの超過チェック Tenant tenant = tenantRepository.Get(CurrentUserInfo.SelectedTenant.Id); string errorMessage = clusterManagementLogic.CheckQuota(tenant, model.Cpu.Value, model.Memory.Value, model.Gpu.Value); if (errorMessage != null) { return(JsonBadRequest(errorMessage)); } //コンテナの実行前に、推論履歴を作成する(コンテナの実行に失敗した場合、そのステータスをユーザに表示するため) var inferenceHistory = new InferenceHistory() { Name = model.Name, DisplayId = -1, ContainerRegistryId = model.ContainerImage.RegistryId ?? CurrentUserInfo.SelectedTenant.DefaultRegistry?.Id, ContainerImage = model.ContainerImage.Image, ContainerTag = model.ContainerImage.Tag, //latestは運用上使用されていないハズなので、そのまま直接代入 DataSetId = model.DataSetId.Value, EntryPoint = model.EntryPoint, ModelGitId = gitId, ModelRepository = model.GitModel.Repository, ModelRepositoryOwner = model.GitModel.Owner, ModelBranch = branch, ModelCommitId = commitId, OptionDic = model.Options ?? new Dictionary <string, string>(), //オプションはnullの可能性があるので、その時は初期化 Memo = model.Memo, Cpu = model.Cpu.Value, Memory = model.Memory.Value, Gpu = model.Gpu.Value, Partition = model.Partition, Status = ContainerStatus.Running.Key, Zip = model.Zip, LocalDataSet = model.LocalDataSet, }; if (inferenceHistory.OptionDic.ContainsKey("")) //空文字は除外する { inferenceHistory.OptionDic.Remove(""); } // 親学習が指定されていれば存在チェック if (model.ParentIds != null) { var maps = new List <InferenceHistoryParentMap>(); foreach (var parentId in model.ParentIds) { var parent = await trainingHistoryRepository.GetByIdAsync(parentId); if (parent == null) { return(JsonNotFound($"Training ID {parentId} is not found.")); } // 推論履歴に親学習を紐づける var map = inferenceHistoryRepository.AttachParentAsync(inferenceHistory, parent); if (map != null) { maps.Add(map); } } inferenceHistory.ParentMaps = maps; } // 親推論が指定されていれば存在チェック if (model.InferenceIds != null) { var maps = new List <InferenceHistoryParentInferenceMap>(); foreach (var parentId in model.InferenceIds) { var parentInference = await inferenceHistoryRepository.GetByIdAsync(parentId); if (parentInference == null) { return(JsonNotFound($"Inference ID {parentId} is not found.")); } // 推論履歴に親推論を紐づける var map = inferenceHistoryRepository.AttachParentInferenceToInferenceAsync(inferenceHistory, parentInference); if (map != null) { maps.Add(map); } } inferenceHistory.ParentInferenceMaps = maps; } inferenceHistoryRepository.Add(inferenceHistory); if (dataSet.IsLocked == false) { dataSet.IsLocked = true; } unitOfWork.Commit(); var result = await clusterManagementLogic.RunInferenceContainerAsync(inferenceHistory); if (result.IsSuccess == false) { //コンテナの起動に失敗した状態。エラーを出力して、保存した推論履歴も削除する。 await dataSetLogic.ReleaseLockAsync(inferenceHistory.DataSetId); inferenceHistoryRepository.Delete(inferenceHistory); unitOfWork.Commit(); return(JsonError(HttpStatusCode.ServiceUnavailable, "Failed to run training. The message bellow may be help to resolve: " + result.Error)); } //結果に従い、推論結果を更新する。 //実行には時間がかかりうるので、DBから最新の情報を取ってくる inferenceHistory = await inferenceHistoryRepository.GetByIdAsync(inferenceHistory.Id); inferenceHistory.Configuration = result.Value.Configuration; inferenceHistory.Status = result.Value.Status.Key; unitOfWork.Commit(); if (result.Value.Status.Succeed()) { return(JsonCreated(new InferenceSimpleOutputModel(inferenceHistory))); } else { return(JsonError(HttpStatusCode.ServiceUnavailable, $"Failed to run training. Status={result.Value.Status.Name}. Please contact your server administrator.")); } }