Exemplo n.º 1
0
        private static void FillDisplayValues(UserConnection userConnection, List <LocalizedFeatureWeight> features,
                                              Dictionary <string, ColumnExpression> columnExpressionMapping, Guid entitySchemaId,
                                              ModelSchemaMetadata metadata)
        {
            EntitySchema schema = userConnection.EntitySchemaManager.GetInstanceByUId(entitySchemaId);

            foreach (var input in metadata.Inputs)
            {
                var feature = features.Find(featureWeight => featureWeight.Name == input.Name);
                if (feature == null)
                {
                    continue;
                }
                bool isBooleanValue = input.Type == "Boolean" || input.Transformations != null &&
                                      input.Transformations.Exists(transformation => transformation.Operation == "isNotNull");
                if (isBooleanValue)
                {
                    FillBooleanDisplayValue(userConnection, feature);
                    continue;
                }
                if (input.Type != "Lookup")
                {
                    continue;
                }
                FillLookupDisplayValue(userConnection, columnExpressionMapping, feature, input, schema);
            }
        }
Exemplo n.º 2
0
        private static void SetPredictedResultColumnName(MLModelConfig modelConfig, UserConnection userConnection,
                                                         IDataReader reader)
        {
            if (modelConfig.PredictedResultColumnName.IsNotNullOrEmpty())
            {
                return;
            }
            Guid predictedResultColumnUId = FindValue <Guid>(reader, "PredictedResultColumnUId", Guid.Empty);

            if (predictedResultColumnUId.IsNotEmpty())
            {
                modelConfig.PredictedResultColumnName =
                    FindEntityColumnValueName(userConnection, modelConfig.EntitySchemaId, predictedResultColumnUId);
                return;
            }
            if (modelConfig.TrainingTargetColumnName.IsNotNullOrEmpty())
            {
                modelConfig.PredictedResultColumnName = modelConfig.TrainingTargetColumnName;
                return;
            }

            //TODO #CRM-40303 Remove when PredictedResultColumnUId will be set for all known customer models
            ModelSchemaMetadata metadata = modelConfig.GetModelSchemaMetadata();
            string outputName            = metadata.Output?.Name;

            if (IsEntityColumnExists(userConnection.EntitySchemaManager, modelConfig.EntitySchemaId, outputName))
            {
                _log.Warn("Neither PredictedResultColumnUId nor TargetColumnUId is set. " +
                          $"Using metadata's output as PredictedResultColumn in model {modelConfig.Id}");
                modelConfig.PredictedResultColumnName = outputName;
            }
        }
Exemplo n.º 3
0
 private void FillColumnsInfo(ModelSchemaMetadata metadata, Select select,
                              Dictionary <string, EntitySchema> tableAliases)
 {
     select.Columns.ForEach(column => {
         var modelColumnName           = string.IsNullOrEmpty(column.Alias) ? column.SourceColumnAlias : column.Alias;
         ModelSchemaColumn modelColumn = metadata.Inputs.FirstOrDefault(input => input.Name == modelColumnName);
         if (modelColumn == null && metadata.Output?.Name == modelColumnName)
         {
             modelColumn = metadata.Output;
         }
         if (modelColumn == null)
         {
             return;
         }
         var schemaColumn = ResolveSchemaColumn(column, tableAliases);
         if (modelColumn.Caption.IsNullOrEmpty())
         {
             modelColumn.Caption = schemaColumn?.Caption;
         }
         if (schemaColumn != null && schemaColumn.IsLookupType)
         {
             modelColumn.ReferenceSchemaName = schemaColumn.ReferenceSchema?.Name;
         }
     });
 }
Exemplo n.º 4
0
        /// <summary>
        /// Generates the metadata by query.
        /// </summary>
        /// <param name="select">The select query.</param>
        /// <param name="predefinedMetadata">The predefined metadata in JSON format.</param>
        /// <param name="outputColumnName">Name of the output column.</param>
        /// <param name="fillColumnsInfo">Indicates if it should be filled information about each column retrieved from
        /// the corresponding schema. I.e. <see cref="ModelSchemaColumn.Caption"/>,
        /// <see cref="ModelSchemaColumn.ReferenceSchemaName"/> etc.</param>
        /// <returns>Generated and merged model metadata.</returns>
        public ModelSchemaMetadata GenerateMetadata(Select select, string predefinedMetadata = "",
                                                    string outputColumnName = "", bool fillColumnsInfo = false)
        {
            Dictionary <string, EntitySchema> tableAliases = GetTableSchemas(select);
            var output = new ModelSchemaOutput {
                Name = outputColumnName, Type = string.Empty
            };
            var outputColumn = select.Columns.FindByAlias(outputColumnName);

            if (outputColumn != null)
            {
                output.Type = ResolveExpressionType(outputColumn, tableAliases);
            }
            var metadata = new ModelSchemaMetadata {
                Inputs = new List <ModelSchemaInput>(),
                Output = output
            };

            ApplyPredefinedMetadata(metadata, predefinedMetadata);
            var autoGeneratedInputs = new List <ModelSchemaInput>(GetModelInputs(select, tableAliases)
                                                                  .Where(input => input.Name != "Id").ToList());

            MergeMetadata(metadata, autoGeneratedInputs);
            if (fillColumnsInfo)
            {
                FillColumnsInfo(metadata, select, tableAliases);
            }
            CheckUnknownTypes(select, metadata);
            var modelValidator = ClassFactory.Get <IMLModelValidator>();

            modelValidator.CheckInputColumns(select, metadata);
            return(metadata);
        }
Exemplo n.º 5
0
        private static void ApplyPredefinedMetadata(ModelSchemaMetadata metadata, string predefinedMetadata)
        {
            if (!predefinedMetadata.IsNotNullOrWhiteSpace())
            {
                return;
            }
            JObject customMetaData;

            try {
                customMetaData = JObject.Parse(predefinedMetadata);
            } catch (Exception) {
                var message = $"Custom metadata has wrong format: {Environment.NewLine}{predefinedMetadata}";
                throw new FormatException(message);
            }
            if (customMetaData.TryGetValue("output", out var output))
            {
                metadata.Output = Json.Deserialize <ModelSchemaOutput>(output.ToString());
            }
            if (customMetaData.TryGetValue("inputs", out var customMetaInputs))
            {
                List <ModelSchemaInput> customInputs = customMetaInputs.Where(token => !token.Value <bool>("ignore"))
                                                       .Select(item => Json.Deserialize <ModelSchemaInput>(item.ToString())).ToList();
                metadata.Inputs = customInputs;
            }
            if (customMetaData.TryGetValue("params", out var customMetaParams))
            {
                metadata.Params = Json.Deserialize <ModelSchemaParams>(customMetaParams.ToString());
            }
        }
Exemplo n.º 6
0
 /// <summary>
 /// Starts the train session.
 /// </summary>
 /// <param name="ignoreMetricThreshold">if set to <c>true</c> ignore metric threshold on applying model
 /// instance.</param>
 /// <returns>
 /// New train session identifier.
 /// </returns>
 public Guid StartTrainSession(bool ignoreMetricThreshold = false)
 {
     try {
         Select trainingSelectQuery = BuildTrainingSelectQuery();
         var    modelValidator      = ClassFactory.Get <IMLModelValidator>();
         modelValidator.CheckColumns(trainingSelectQuery);
         modelValidator.CheckSqlQuery(trainingSelectQuery);
         string outputColumnName = GetMetadataOutputName() ?? DefaultOutputColumnAlias;
         ModelSchemaMetadata modelSchemaMetadata = _metadataGenerator.GenerateMetadata(trainingSelectQuery,
                                                                                       _modelConfig.MetaData, outputColumnName);
         MergeFitParams(modelSchemaMetadata, _modelConfig);
         Guid sessionId = _proxy.StartTrainSession(ServiceStackTextHelper.Serialize(modelSchemaMetadata),
                                                   _modelConfig.Id);
         SaveTrainSessionId(sessionId);
         InsertTrainSession(_modelConfig.Id, sessionId, TrainSessionState.DataTransferring,
                            ignoreMetricThreshold);
         return(sessionId);
     } catch (Exception ex) {
         _modelConfig.TrainSessionId = Guid.NewGuid();
         UpdateModelOnError(_modelConfig.Id, ex.Message);
         InsertTrainSession(_modelConfig.Id, _modelConfig.TrainSessionId, TrainSessionState.Error,
                            ignoreMetricThreshold, ex.Message);
         throw;
     }
 }
Exemplo n.º 7
0
 private static void FillCaptionsFromMetadata(IEnumerable <LocalizedFeatureWeight> features,
                                              ModelSchemaMetadata metadata)
 {
     if (metadata?.Inputs == null)
     {
         return;
     }
     features.ForEach(feature => feature.Caption.IsNullOrEmpty(), feature => {
         var metadataItem = metadata.Inputs.Find(input => input.Name == feature.Name);
         feature.Caption  = metadataItem?.Caption;
     });
 }
Exemplo n.º 8
0
        /// <summary>
        /// Loads the model metadata captions. Sets enriched metadata to <see cref="MLModelConfig.MetaData"/> property.
        /// </summary>
        /// <param name="userConnection">The user connection.</param>
        /// <param name="model">Loaded model.</param>
        public void LoadModelMetadataCaptions(UserConnection userConnection, MLModelConfig model)
        {
            if (model.MetaData.IsNullOrEmpty())
            {
                return;
            }
            ModelSchemaMetadata metadata = model.GetModelSchemaMetadata();

            if (metadata.Inputs.IsNullOrEmpty() && metadata.Output == null)
            {
                return;
            }
            var modelSchema = userConnection.EntitySchemaManager.GetInstanceByName("MLModel");
            var modelEntity = modelSchema.CreateEntity(userConnection);

            modelEntity.PrimaryColumnValue = model.Id;
            if (!modelEntity.FetchFromDB(new[] { "MetaDataLcz" }, false))
            {
                return;
            }
            string serializedMetadataLcz = modelEntity.GetTypedColumnValue <string>("MetaDataLcz");

            if (serializedMetadataLcz.IsNullOrEmpty())
            {
                return;
            }
            ModelSchemaMetadata metaDataLcz;

            try {
                metaDataLcz = JsonConvert.DeserializeObject <ModelSchemaMetadata>(serializedMetadataLcz);
            } catch (Exception e) {
                _log.Warn($"Can't localize metadata for {model.Id}, because MetaDataLcz has incorrect format: " +
                          serializedMetadataLcz, e);
                return;
            }
            metadata.Inputs?.ForEach(input => {
                var inputLcz = metaDataLcz.Inputs.Find(schemaInput => schemaInput.Name == input.Name);
                if (inputLcz == null)
                {
                    return;
                }
                input.Caption = inputLcz.Caption;
            });
            if (metadata.Output != null)
            {
                metadata.Output.Caption = metaDataLcz.Output?.Caption;
            }
            model.MetaData = JsonConvert.SerializeObject(metadata, Formatting.Indented);
        }
Exemplo n.º 9
0
        private void CheckUnknownTypes(Select select, ModelSchemaMetadata metadata)
        {
            var message = "Can't automatically determine type for expression(s): {0}. " + Environment.NewLine +
                          " Generated query: {1}";

            if (!_ignoreUnknownTypes)
            {
                var unknownInputs = metadata.Inputs.Where(input => input.Type == UnknownType)
                                    .Select(input => input.Name).ToList();
                if (unknownInputs.Count > 0)
                {
                    throw new ValidateException(string.Format(message, string.Join(", ", unknownInputs),
                                                              select.GetSqlText()));
                }
            }
        }
Exemplo n.º 10
0
 /// <summary>
 /// Gets the deserialized from MetaData property model schema.
 /// </summary>
 /// <returns>Metadata object.</returns>
 public ModelSchemaMetadata GetModelSchemaMetadata()
 {
     if (MetaData.IsNullOrEmpty())
     {
         return(new ModelSchemaMetadata());
     }
     if (_modelSchemaMetaData == null)
     {
         try {
             _modelSchemaMetaData = JsonConvert.DeserializeObject <ModelSchemaMetadata>(MetaData);
         } catch (Exception) {
             _modelSchemaMetaData = new ModelSchemaMetadata();
         }
     }
     return(_modelSchemaMetaData);
 }
Exemplo n.º 11
0
        private static void MergeMetadata(ModelSchemaMetadata metadata, List <ModelSchemaInput> autoGeneratedInputs)
        {
            var inputs        = metadata.Inputs;
            var output        = metadata.Output;
            var existingItems = inputs.Select(item => item.Name)
                                .Union(inputs.Select(item => item.DisplayName)).ToList();

            if (output != null)
            {
                existingItems.Add(output.Name);
                existingItems.Add(output.DisplayName);
            }
            var additionalItems = autoGeneratedInputs.Where(autogeneratedInput =>
                                                            !existingItems.Contains(autogeneratedInput.Name));

            inputs.AddRange(additionalItems);
        }
Exemplo n.º 12
0
        /// <summary>
        /// Checks that all model input columns are present in the result query.
        /// </summary>
        /// <param name="select">The select.</param>
        /// <param name="metadata">The metadata.</param>
        /// <exception cref="ValidateException">At least one input column is not present in the result query.
        /// </exception>
        public void CheckInputColumns(Select select, ModelSchemaMetadata metadata)
        {
            if (metadata.Inputs == null)
            {
                return;
            }
            List <string> absentInputNames = metadata.Inputs.Select(input => input.Name)
                                             .Except(select.Columns.Select(column => column.Alias.ToNullIfEmpty() ?? column.SourceColumnAlias))
                                             .ToList();

            if (absentInputNames.IsNullOrEmpty())
            {
                return;
            }
            var messageTemplate =
                select.UserConnection.GetLocalizableString(GetType().Name, "InputsFromMetadataAbsentInDatasetMessage")
                .ToNullIfEmpty() ?? "Some inputs from metadata are absent in the result dataset: {0}";
            var message = string.Format(messageTemplate, string.Join(", ", absentInputNames));

            throw new ValidateException(message);
        }
Exemplo n.º 13
0
        private void MergeFitParams(ModelSchemaMetadata metadata, MLModelConfig modelConfig)
        {
            if (!GetIsCFModel(modelConfig))
            {
                return;
            }
            if (modelConfig.FactorsCounts.IsNullOrEmpty() &&
                modelConfig.RegularizationValues.IsNullOrEmpty())
            {
                return;
            }
            metadata.Params = metadata.Params ?? new ModelSchemaParams()
            {
                Fit = new ModelSchemaFitParams()
            };
            ModelSchemaFitParams fitParams = metadata.Params.Fit;

            fitParams.Factors         = fitParams.Factors.IsNullOrEmpty() ? modelConfig.FactorsCounts : fitParams.Factors;
            fitParams.Regularizations = fitParams.Regularizations.IsNullOrEmpty()
                                ? modelConfig.RegularizationValues
                                : fitParams.Regularizations;
        }
Exemplo n.º 14
0
 private static void FillCaptions(UserConnection userConnection, List <LocalizedFeatureWeight> features,
                                  Dictionary <string, ColumnExpression> columnExpressionMapping, Guid entitySchemaId,
                                  ModelSchemaMetadata metadata)
 {
     foreach (LocalizedFeatureWeight feature in features)
     {
         if (!columnExpressionMapping.ContainsKey(feature.Name))
         {
             continue;
         }
         if (columnExpressionMapping[feature.Name] is MLColumnExpression columnExpression)
         {
             if (columnExpression.Caption.IsNotNullOrEmpty())
             {
                 feature.Caption = columnExpression.Caption;
                 continue;
             }
             feature.Caption = GetSchemaColumnFullCaption(userConnection, entitySchemaId,
                                                          columnExpression.ColumnPath);
         }
     }
     FillCaptionsFromMetadata(features, metadata);
 }