Пример #1
0
        public async Task <IActionResult> Create([FromBody] CreateInputModel model, [FromServices] INodeRepository nodeRepository)
        {
            //データの入力チェック
            if (!ModelState.IsValid)
            {
                return(JsonBadRequest("Invalid inputs."));
            }
            //データの存在チェック
            var dataSet = await dataSetRepository.GetByIdAsync(model.DataSetId.Value);

            if (dataSet == null)
            {
                return(JsonNotFound($"DataSet ID {model.DataSetId} is not found."));
            }
            if (model.ParentId.HasValue)
            {
                var parent = await trainingHistoryRepository.GetByIdAsync(model.ParentId.Value);

                if (parent == null)
                {
                    return(JsonNotFound($"Training ID {model.ParentId.Value} is not found."));
                }
            }
            if (string.IsNullOrEmpty(model.Partition) == false)
            {
                bool existPartition = await nodeRepository.IsEnablePartitionAsync(model.Partition, true);

                if (existPartition == false)
                {
                    return(JsonNotFound($"There are no enable nodes with Partition {model.Partition}."));
                }
            }

            // 環境変数名のチェック
            if (model.Options != null && model.Options.Count > 0)
            {
                foreach (var env in model.Options)
                {
                    if (!string.IsNullOrEmpty(env.Key))
                    {
                        // フォーマットチェック
                        if (!Regex.IsMatch(env.Key, "^[-._a-zA-Z][-._a-zA-Z0-9]*$"))
                        {
                            return(JsonNotFound($"Invalid envName. Please match the format of '^[-._a-zA-Z][-._a-zA-Z0-9]*$'."));
                        }
                    }
                }
            }

            long?  gitId    = model.GitModel.GitId ?? CurrentUserInfo.SelectedTenant.DefaultGit?.Id;
            string branch   = model.GitModel.Branch ?? "master";
            string commitId = model.GitModel.CommitId;

            //コミットIDが指定されていなければ、ブランチのHEADからコミットIDを取得する
            if (string.IsNullOrEmpty(commitId))
            {
                commitId = await gitLogic.GetCommitIdAsync(gitId.Value, model.GitModel.Repository, model.GitModel.Owner, branch);

                if (string.IsNullOrEmpty(commitId))
                {
                    //コミットIDが特定できなかったらエラー
                    return(JsonNotFound($"The branch {branch} for {gitId.Value}/{model.GitModel.Owner}/{model.GitModel.Repository} is not found."));
                }
            }

            //コンテナの実行前に、学習履歴を作成する(コンテナの実行に失敗した場合、そのステータスをユーザに表示するため)
            var trainingHistory = new TrainingHistory()
            {
                Name                 = model.Name,
                DisplayId            = -1,
                ContainerRegistryId  = model.ContainerImage.RegistryId ?? CurrentUserInfo.SelectedTenant.DefaultRegistry?.Id,
                ContainerImage       = model.ContainerImage.Image,
                ContainerTag         = model.ContainerImage.Tag, //latestは運用上使用されていないハズなので、そのまま直接代入
                DataSetId            = model.DataSetId.Value,
                EntryPoint           = model.EntryPoint,
                ModelGitId           = gitId.Value,
                ModelRepository      = model.GitModel.Repository,
                ModelRepositoryOwner = model.GitModel.Owner,
                ModelBranch          = branch,
                ModelCommitId        = commitId,
                OptionDic            = model.Options ?? new Dictionary <string, string>(), //オプションはnullの可能性があるので、その時は初期化
                ParentId             = model.ParentId,
                Memo                 = model.Memo,
                Cpu       = model.Cpu.Value,
                Memory    = model.Memory.Value,
                Gpu       = model.Gpu.Value,
                Partition = model.Partition,
                Status    = ContainerStatus.Running.Key
            };

            if (trainingHistory.OptionDic.ContainsKey("")) //空文字は除外する
            {
                trainingHistory.OptionDic.Remove("");
            }
            trainingHistoryRepository.Add(trainingHistory);
            if (dataSet.IsLocked == false)
            {
                dataSet.IsLocked = true;
            }
            unitOfWork.Commit();

            var result = await clusterManagementLogic.RunTrainContainerAsync(trainingHistory);

            if (result.IsSuccess == false)
            {
                //コンテナの起動に失敗した状態。エラーを出力して、保存した学習履歴も削除する。
                trainingHistoryRepository.Delete(trainingHistory);
                unitOfWork.Commit();

                return(JsonError(HttpStatusCode.ServiceUnavailable, "Failed to run training. The message bellow may be help to resolve: " + result.Error));
            }

            //結果に従い、学習結果を更新する。
            //実行には時間がかかりうるので、DBから最新の情報を取ってくる
            trainingHistory = await trainingHistoryRepository.GetByIdAsync(trainingHistory.Id);

            trainingHistory.Configuration = result.Value.Configuration;
            trainingHistory.Status        = result.Value.Status.Key;
            unitOfWork.Commit();

            if (result.Value.Status.Succeed())
            {
                return(JsonCreated(new SimpleOutputModel(trainingHistory)));
            }
            else
            {
                return(JsonError(HttpStatusCode.ServiceUnavailable, $"Failed to run training. Status={result.Value.Status.Name}. Please contact your server administrator."));
            }
        }
Пример #2
0
        public static async Task <(TrainingHistory, IActionResult)> DoCreate(CreateInputModel model,
                                                                             IDataSetRepository dataSetRepository,
                                                                             INodeRepository nodeRepository,
                                                                             ITenantRepository tenantRepository,
                                                                             ITrainingHistoryRepository trainingHistoryRepository,
                                                                             IClusterManagementLogic clusterManagementLogic,
                                                                             IDataSetLogic dataSetLogic,
                                                                             IGitLogic gitLogic,
                                                                             ITagLogic tagLogic,
                                                                             IUnitOfWork unitOfWork,
                                                                             UserInfo currentUserInfo,
                                                                             ModelStateDictionary modelState,
                                                                             string requestUrl,
                                                                             string scriptType,
                                                                             string regisryTokenName,
                                                                             string gitToken
                                                                             )
        {
            //データの入力チェック
            if (!modelState.IsValid)
            {
                return(null,
                       DoJsonBadRequest(typeof(TrainingController), requestUrl, modelState, "Invalid inputs."));
            }
            //データの存在チェック
            var dataSet = await dataSetRepository.GetByIdAsync(model.DataSetId.Value);

            if (dataSet == null)
            {
                return(null,
                       DoJsonNotFound(typeof(TrainingController), requestUrl, modelState,
                                      $"DataSet ID {model.DataSetId} is not found."));
            }
            if (string.IsNullOrEmpty(model.Partition) == false)
            {
                bool existPartition = await nodeRepository.IsEnablePartitionAsync(model.Partition, true);

                if (existPartition == false)
                {
                    return(null,
                           DoJsonNotFound(typeof(TrainingController), requestUrl, modelState,
                                          $"There are no enable nodes with Partition {model.Partition}."));
                }
            }

            // 環境変数名のチェック
            if (model.Options != null && model.Options.Count > 0)
            {
                foreach (var env in model.Options)
                {
                    if (!string.IsNullOrEmpty(env.Key))
                    {
                        // フォーマットチェック
                        if (!Regex.IsMatch(env.Key, "^[-._a-zA-Z][-._a-zA-Z0-9]*$"))
                        {
                            return(null,
                                   DoJsonNotFound(typeof(TrainingController), requestUrl, modelState, $"Invalid envName. Please match the format of '^[-._a-zA-Z][-._a-zA-Z0-9]*$'."));
                        }
                    }
                }
            }

            long?  gitId    = model.GitModel.GitId ?? currentUserInfo.SelectedTenant.DefaultGit?.Id;
            string branch   = model.GitModel.Branch ?? "master";
            string commitId = model.GitModel.CommitId;

            //コミットIDが指定されていなければ、ブランチのHEADからコミットIDを取得する
            if (string.IsNullOrEmpty(commitId))
            {
                commitId = await gitLogic.GetCommitIdAsync(gitId.Value, model.GitModel.Repository, model.GitModel.Owner, branch);

                if (string.IsNullOrEmpty(commitId))
                {
                    //コミットIDが特定できなかったらエラー
                    return(null,
                           DoJsonNotFound(typeof(TrainingController), requestUrl, modelState,
                                          $"The branch {branch} for {gitId.Value}/{model.GitModel.Owner}/{model.GitModel.Repository} is not found."));
                }
            }

            // 各リソースの超過チェック
            Tenant tenant       = tenantRepository.Get(currentUserInfo.SelectedTenant.Id);
            string errorMessage = clusterManagementLogic.CheckQuota(tenant, model.Cpu.Value, model.Memory.Value, model.Gpu.Value);

            if (errorMessage != null)
            {
                return(null, DoJsonBadRequest(typeof(TrainingController), requestUrl, modelState, errorMessage));
            }

            //コンテナの実行前に、学習履歴を作成する(コンテナの実行に失敗した場合、そのステータスをユーザに表示するため)
            var trainingHistory = new TrainingHistory()
            {
                Name                 = model.Name,
                DisplayId            = -1,
                ContainerRegistryId  = model.ContainerImage.RegistryId ?? currentUserInfo.SelectedTenant.DefaultRegistry?.Id,
                ContainerImage       = model.ContainerImage.Image,
                ContainerTag         = model.ContainerImage.Tag, //latestは運用上使用されていないハズなので、そのまま直接代入
                DataSetId            = model.DataSetId.Value,
                EntryPoint           = model.EntryPoint,
                ModelGitId           = gitId.Value,
                ModelRepository      = model.GitModel.Repository,
                ModelRepositoryOwner = model.GitModel.Owner,
                ModelBranch          = branch,
                ModelCommitId        = commitId,
                OptionDic            = model.Options ?? new Dictionary <string, string>(), //オプションはnullの可能性があるので、その時は初期化
                Memo                 = model.Memo,
                Cpu          = model.Cpu.Value,
                Memory       = model.Memory.Value,
                Gpu          = model.Gpu.Value,
                Partition    = model.Partition,
                PortList     = model.Ports,
                Status       = ContainerStatus.Running.Key,
                Zip          = model.Zip,
                LocalDataSet = model.LocalDataSet,
            };

            if (trainingHistory.OptionDic.ContainsKey("")) //空文字は除外する
            {
                trainingHistory.OptionDic.Remove("");
            }
            // 親学習が指定されていれば存在チェック
            if (model.ParentIds != null)
            {
                var maps = new List <TrainingHistoryParentMap>();

                foreach (var parentId in model.ParentIds)
                {
                    var parent = await trainingHistoryRepository.GetByIdAsync(parentId);

                    if (parent == null)
                    {
                        return(null, DoJsonNotFound(typeof(TrainingController), requestUrl, modelState, $"Training ID {parentId} is not found."));
                    }
                    // 学習履歴に親学習を紐づける
                    var map = trainingHistoryRepository.AttachParentAsync(trainingHistory, parent);
                    if (map != null)
                    {
                        maps.Add(map);
                    }
                }

                trainingHistory.ParentMaps = maps;
            }
            //タグの登録
            if (model.Tags != null && model.Tags.Count() > 0)
            {
                tagLogic.CreateTrainingHistoryTags(trainingHistory, model.Tags);
            }

            trainingHistoryRepository.Add(trainingHistory);
            if (dataSet.IsLocked == false)
            {
                dataSet.IsLocked = true;
            }
            unitOfWork.Commit();

            var result = await clusterManagementLogic.RunTrainContainerAsync(trainingHistory, scriptType, regisryTokenName, gitToken);

            if (result.IsSuccess == false)
            {
                //コンテナの起動に失敗した状態。エラーを出力して、保存した学習履歴も削除する。
                await dataSetLogic.ReleaseLockAsync(trainingHistory.DataSetId);

                trainingHistoryRepository.Delete(trainingHistory);
                unitOfWork.Commit();

                return(null, DoJsonError(HttpStatusCode.ServiceUnavailable, "Failed to run training. The message bellow may be help to resolve: " + result.Error,
                                         typeof(TrainingController), requestUrl, modelState));
            }

            //結果に従い、学習結果を更新する。
            //実行には時間がかかりうるので、DBから最新の情報を取ってくる
            trainingHistory = await trainingHistoryRepository.GetByIdAsync(trainingHistory.Id);

            trainingHistory.Configuration = result.Value.Configuration;
            trainingHistory.Status        = result.Value.Status.Key;
            unitOfWork.Commit();

            if (result.Value.Status.Succeed())
            {
                return(trainingHistory,
                       JsonCreated(new SimpleOutputModel(trainingHistory)));
            }
            else
            {
                return(trainingHistory,
                       DoJsonError(HttpStatusCode.ServiceUnavailable, $"Failed to run training. Status={result.Value.Status.Name}. Please contact your server administrator.",
                                   typeof(TrainingController), requestUrl, modelState));
            }
        }