private static void FillDisplayValues(UserConnection userConnection, List <LocalizedFeatureWeight> features, Dictionary <string, ColumnExpression> columnExpressionMapping, Guid entitySchemaId, ModelSchemaMetadata metadata) { EntitySchema schema = userConnection.EntitySchemaManager.GetInstanceByUId(entitySchemaId); foreach (var input in metadata.Inputs) { var feature = features.Find(featureWeight => featureWeight.Name == input.Name); if (feature == null) { continue; } bool isBooleanValue = input.Type == "Boolean" || input.Transformations != null && input.Transformations.Exists(transformation => transformation.Operation == "isNotNull"); if (isBooleanValue) { FillBooleanDisplayValue(userConnection, feature); continue; } if (input.Type != "Lookup") { continue; } FillLookupDisplayValue(userConnection, columnExpressionMapping, feature, input, schema); } }
private static void SetPredictedResultColumnName(MLModelConfig modelConfig, UserConnection userConnection, IDataReader reader) { if (modelConfig.PredictedResultColumnName.IsNotNullOrEmpty()) { return; } Guid predictedResultColumnUId = FindValue <Guid>(reader, "PredictedResultColumnUId", Guid.Empty); if (predictedResultColumnUId.IsNotEmpty()) { modelConfig.PredictedResultColumnName = FindEntityColumnValueName(userConnection, modelConfig.EntitySchemaId, predictedResultColumnUId); return; } if (modelConfig.TrainingTargetColumnName.IsNotNullOrEmpty()) { modelConfig.PredictedResultColumnName = modelConfig.TrainingTargetColumnName; return; } //TODO #CRM-40303 Remove when PredictedResultColumnUId will be set for all known customer models ModelSchemaMetadata metadata = modelConfig.GetModelSchemaMetadata(); string outputName = metadata.Output?.Name; if (IsEntityColumnExists(userConnection.EntitySchemaManager, modelConfig.EntitySchemaId, outputName)) { _log.Warn("Neither PredictedResultColumnUId nor TargetColumnUId is set. " + $"Using metadata's output as PredictedResultColumn in model {modelConfig.Id}"); modelConfig.PredictedResultColumnName = outputName; } }
private void FillColumnsInfo(ModelSchemaMetadata metadata, Select select, Dictionary <string, EntitySchema> tableAliases) { select.Columns.ForEach(column => { var modelColumnName = string.IsNullOrEmpty(column.Alias) ? column.SourceColumnAlias : column.Alias; ModelSchemaColumn modelColumn = metadata.Inputs.FirstOrDefault(input => input.Name == modelColumnName); if (modelColumn == null && metadata.Output?.Name == modelColumnName) { modelColumn = metadata.Output; } if (modelColumn == null) { return; } var schemaColumn = ResolveSchemaColumn(column, tableAliases); if (modelColumn.Caption.IsNullOrEmpty()) { modelColumn.Caption = schemaColumn?.Caption; } if (schemaColumn != null && schemaColumn.IsLookupType) { modelColumn.ReferenceSchemaName = schemaColumn.ReferenceSchema?.Name; } }); }
/// <summary> /// Generates the metadata by query. /// </summary> /// <param name="select">The select query.</param> /// <param name="predefinedMetadata">The predefined metadata in JSON format.</param> /// <param name="outputColumnName">Name of the output column.</param> /// <param name="fillColumnsInfo">Indicates if it should be filled information about each column retrieved from /// the corresponding schema. I.e. <see cref="ModelSchemaColumn.Caption"/>, /// <see cref="ModelSchemaColumn.ReferenceSchemaName"/> etc.</param> /// <returns>Generated and merged model metadata.</returns> public ModelSchemaMetadata GenerateMetadata(Select select, string predefinedMetadata = "", string outputColumnName = "", bool fillColumnsInfo = false) { Dictionary <string, EntitySchema> tableAliases = GetTableSchemas(select); var output = new ModelSchemaOutput { Name = outputColumnName, Type = string.Empty }; var outputColumn = select.Columns.FindByAlias(outputColumnName); if (outputColumn != null) { output.Type = ResolveExpressionType(outputColumn, tableAliases); } var metadata = new ModelSchemaMetadata { Inputs = new List <ModelSchemaInput>(), Output = output }; ApplyPredefinedMetadata(metadata, predefinedMetadata); var autoGeneratedInputs = new List <ModelSchemaInput>(GetModelInputs(select, tableAliases) .Where(input => input.Name != "Id").ToList()); MergeMetadata(metadata, autoGeneratedInputs); if (fillColumnsInfo) { FillColumnsInfo(metadata, select, tableAliases); } CheckUnknownTypes(select, metadata); var modelValidator = ClassFactory.Get <IMLModelValidator>(); modelValidator.CheckInputColumns(select, metadata); return(metadata); }
private static void ApplyPredefinedMetadata(ModelSchemaMetadata metadata, string predefinedMetadata) { if (!predefinedMetadata.IsNotNullOrWhiteSpace()) { return; } JObject customMetaData; try { customMetaData = JObject.Parse(predefinedMetadata); } catch (Exception) { var message = $"Custom metadata has wrong format: {Environment.NewLine}{predefinedMetadata}"; throw new FormatException(message); } if (customMetaData.TryGetValue("output", out var output)) { metadata.Output = Json.Deserialize <ModelSchemaOutput>(output.ToString()); } if (customMetaData.TryGetValue("inputs", out var customMetaInputs)) { List <ModelSchemaInput> customInputs = customMetaInputs.Where(token => !token.Value <bool>("ignore")) .Select(item => Json.Deserialize <ModelSchemaInput>(item.ToString())).ToList(); metadata.Inputs = customInputs; } if (customMetaData.TryGetValue("params", out var customMetaParams)) { metadata.Params = Json.Deserialize <ModelSchemaParams>(customMetaParams.ToString()); } }
/// <summary> /// Starts the train session. /// </summary> /// <param name="ignoreMetricThreshold">if set to <c>true</c> ignore metric threshold on applying model /// instance.</param> /// <returns> /// New train session identifier. /// </returns> public Guid StartTrainSession(bool ignoreMetricThreshold = false) { try { Select trainingSelectQuery = BuildTrainingSelectQuery(); var modelValidator = ClassFactory.Get <IMLModelValidator>(); modelValidator.CheckColumns(trainingSelectQuery); modelValidator.CheckSqlQuery(trainingSelectQuery); string outputColumnName = GetMetadataOutputName() ?? DefaultOutputColumnAlias; ModelSchemaMetadata modelSchemaMetadata = _metadataGenerator.GenerateMetadata(trainingSelectQuery, _modelConfig.MetaData, outputColumnName); MergeFitParams(modelSchemaMetadata, _modelConfig); Guid sessionId = _proxy.StartTrainSession(ServiceStackTextHelper.Serialize(modelSchemaMetadata), _modelConfig.Id); SaveTrainSessionId(sessionId); InsertTrainSession(_modelConfig.Id, sessionId, TrainSessionState.DataTransferring, ignoreMetricThreshold); return(sessionId); } catch (Exception ex) { _modelConfig.TrainSessionId = Guid.NewGuid(); UpdateModelOnError(_modelConfig.Id, ex.Message); InsertTrainSession(_modelConfig.Id, _modelConfig.TrainSessionId, TrainSessionState.Error, ignoreMetricThreshold, ex.Message); throw; } }
private static void FillCaptionsFromMetadata(IEnumerable <LocalizedFeatureWeight> features, ModelSchemaMetadata metadata) { if (metadata?.Inputs == null) { return; } features.ForEach(feature => feature.Caption.IsNullOrEmpty(), feature => { var metadataItem = metadata.Inputs.Find(input => input.Name == feature.Name); feature.Caption = metadataItem?.Caption; }); }
/// <summary> /// Loads the model metadata captions. Sets enriched metadata to <see cref="MLModelConfig.MetaData"/> property. /// </summary> /// <param name="userConnection">The user connection.</param> /// <param name="model">Loaded model.</param> public void LoadModelMetadataCaptions(UserConnection userConnection, MLModelConfig model) { if (model.MetaData.IsNullOrEmpty()) { return; } ModelSchemaMetadata metadata = model.GetModelSchemaMetadata(); if (metadata.Inputs.IsNullOrEmpty() && metadata.Output == null) { return; } var modelSchema = userConnection.EntitySchemaManager.GetInstanceByName("MLModel"); var modelEntity = modelSchema.CreateEntity(userConnection); modelEntity.PrimaryColumnValue = model.Id; if (!modelEntity.FetchFromDB(new[] { "MetaDataLcz" }, false)) { return; } string serializedMetadataLcz = modelEntity.GetTypedColumnValue <string>("MetaDataLcz"); if (serializedMetadataLcz.IsNullOrEmpty()) { return; } ModelSchemaMetadata metaDataLcz; try { metaDataLcz = JsonConvert.DeserializeObject <ModelSchemaMetadata>(serializedMetadataLcz); } catch (Exception e) { _log.Warn($"Can't localize metadata for {model.Id}, because MetaDataLcz has incorrect format: " + serializedMetadataLcz, e); return; } metadata.Inputs?.ForEach(input => { var inputLcz = metaDataLcz.Inputs.Find(schemaInput => schemaInput.Name == input.Name); if (inputLcz == null) { return; } input.Caption = inputLcz.Caption; }); if (metadata.Output != null) { metadata.Output.Caption = metaDataLcz.Output?.Caption; } model.MetaData = JsonConvert.SerializeObject(metadata, Formatting.Indented); }
private void CheckUnknownTypes(Select select, ModelSchemaMetadata metadata) { var message = "Can't automatically determine type for expression(s): {0}. " + Environment.NewLine + " Generated query: {1}"; if (!_ignoreUnknownTypes) { var unknownInputs = metadata.Inputs.Where(input => input.Type == UnknownType) .Select(input => input.Name).ToList(); if (unknownInputs.Count > 0) { throw new ValidateException(string.Format(message, string.Join(", ", unknownInputs), select.GetSqlText())); } } }
/// <summary> /// Gets the deserialized from MetaData property model schema. /// </summary> /// <returns>Metadata object.</returns> public ModelSchemaMetadata GetModelSchemaMetadata() { if (MetaData.IsNullOrEmpty()) { return(new ModelSchemaMetadata()); } if (_modelSchemaMetaData == null) { try { _modelSchemaMetaData = JsonConvert.DeserializeObject <ModelSchemaMetadata>(MetaData); } catch (Exception) { _modelSchemaMetaData = new ModelSchemaMetadata(); } } return(_modelSchemaMetaData); }
private static void MergeMetadata(ModelSchemaMetadata metadata, List <ModelSchemaInput> autoGeneratedInputs) { var inputs = metadata.Inputs; var output = metadata.Output; var existingItems = inputs.Select(item => item.Name) .Union(inputs.Select(item => item.DisplayName)).ToList(); if (output != null) { existingItems.Add(output.Name); existingItems.Add(output.DisplayName); } var additionalItems = autoGeneratedInputs.Where(autogeneratedInput => !existingItems.Contains(autogeneratedInput.Name)); inputs.AddRange(additionalItems); }
/// <summary> /// Checks that all model input columns are present in the result query. /// </summary> /// <param name="select">The select.</param> /// <param name="metadata">The metadata.</param> /// <exception cref="ValidateException">At least one input column is not present in the result query. /// </exception> public void CheckInputColumns(Select select, ModelSchemaMetadata metadata) { if (metadata.Inputs == null) { return; } List <string> absentInputNames = metadata.Inputs.Select(input => input.Name) .Except(select.Columns.Select(column => column.Alias.ToNullIfEmpty() ?? column.SourceColumnAlias)) .ToList(); if (absentInputNames.IsNullOrEmpty()) { return; } var messageTemplate = select.UserConnection.GetLocalizableString(GetType().Name, "InputsFromMetadataAbsentInDatasetMessage") .ToNullIfEmpty() ?? "Some inputs from metadata are absent in the result dataset: {0}"; var message = string.Format(messageTemplate, string.Join(", ", absentInputNames)); throw new ValidateException(message); }
private void MergeFitParams(ModelSchemaMetadata metadata, MLModelConfig modelConfig) { if (!GetIsCFModel(modelConfig)) { return; } if (modelConfig.FactorsCounts.IsNullOrEmpty() && modelConfig.RegularizationValues.IsNullOrEmpty()) { return; } metadata.Params = metadata.Params ?? new ModelSchemaParams() { Fit = new ModelSchemaFitParams() }; ModelSchemaFitParams fitParams = metadata.Params.Fit; fitParams.Factors = fitParams.Factors.IsNullOrEmpty() ? modelConfig.FactorsCounts : fitParams.Factors; fitParams.Regularizations = fitParams.Regularizations.IsNullOrEmpty() ? modelConfig.RegularizationValues : fitParams.Regularizations; }
private static void FillCaptions(UserConnection userConnection, List <LocalizedFeatureWeight> features, Dictionary <string, ColumnExpression> columnExpressionMapping, Guid entitySchemaId, ModelSchemaMetadata metadata) { foreach (LocalizedFeatureWeight feature in features) { if (!columnExpressionMapping.ContainsKey(feature.Name)) { continue; } if (columnExpressionMapping[feature.Name] is MLColumnExpression columnExpression) { if (columnExpression.Caption.IsNotNullOrEmpty()) { feature.Caption = columnExpression.Caption; continue; } feature.Caption = GetSchemaColumnFullCaption(userConnection, entitySchemaId, columnExpression.ColumnPath); } } FillCaptionsFromMetadata(features, metadata); }