Ejemplo n.º 1
0
        public void ValidateSchema(CustomTextSchema schema, CustomTextProjects projects, SelectedProjects selectedProjects)
        {
            var result = ValidateSchemaInternal(schema, projects, selectedProjects);

            if (result == false)
            {
                throw new Exception("Cognitive Search doesn't support spaces or special characters in entity names! please rename your entities and re-train your model");
            }
        }
Ejemplo n.º 2
0
        private bool ValidateSchemaInternal(CustomTextSchema schema, CustomTextProjects projects, SelectedProjects selectedProjects)
        {
            var result = true;

            if (selectedProjects.IsSelected_EntityRecognitionProject)
            {
                result &= ValidateEntityNames(schema);
            }

            return(result);
        }
Ejemplo n.º 3
0
        private bool ValidateEntityNames(CustomTextSchema schema)
        {
            var result = true;

            schema.EntityNames.ForEach(e =>
            {
                if (e.Any(x => !char.IsLetterOrDigit(x) || char.IsWhiteSpace(x)))
                {
                    result = false;
                }
            });
            return(result);
        }
        public SearchIndex CreateSearchIndexSchema(CustomTextSchema schema, string indexName, SelectedProjects selectedProjects)
        {
            // core fields
            var indexFields = new List <SearchField>
            {
                new SearchField("id", SearchFieldDataType.String)
                {
                    IsKey = true
                },
                new SearchField("document_name", SearchFieldDataType.String),
                new SearchField("document_uri", SearchFieldDataType.String)
            };

            // classifers
            if (selectedProjects.IsSelected_SingleClassificationProject)
            {
                var singleClassField = new SearchField(
                    Constants.SearchIndexSingleClassColumnName,
                    SearchFieldDataType.String);
                indexFields.Add(singleClassField);
            }
            if (selectedProjects.IsSelected_MultiClassificationProject)
            {
                var multiClassField = new SearchField(
                    Constants.SearchIndexMultiClassColumnName,
                    SearchFieldDataType.Collection(SearchFieldDataType.String));
                indexFields.Add(multiClassField);
            }

            // extractors
            if (selectedProjects.IsSelected_EntityRecognitionProject)
            {
                foreach (var entityName in schema.EntityNames)
                {
                    var entityField = new SearchField(
                        entityName,
                        SearchFieldDataType.Collection(SearchFieldDataType.String));
                    indexFields.Add(entityField);
                }
            }

            // return
            return(new SearchIndex(indexName)
            {
                Fields = indexFields
            });
        }
Ejemplo n.º 5
0
        public async Task IndexCustomText(
            string indexName,
            CustomTextSchema customtexSchema,
            SelectedProjects selectedProjects)
        {
            // initialize resource names
            var dataSourceName      = indexName.ToLower() + "-data";
            var indexerName         = indexName.ToLower() + "-indexer";
            var skillSetName        = indexName.ToLower() + "-skillset";
            var customTextSkillName = indexName.ToLower() + "-customtext-skill";

            // create models (index & skillset)
            var indexSchema = _cognitiveSearchSchemaCreatorService.CreateSearchIndexSchema(
                customtexSchema,
                indexName,
                selectedProjects);
            var skillsetSchema = _cognitiveSearchSchemaCreatorService.CreateSkillSetSchema(
                skillSetName,
                customTextSkillName,
                _appConfigs.AzureFunction.FunctionUrl,
                _appConfigs.CustomText,
                selectedProjects);
            var indexerSchema = _cognitiveSearchSchemaCreatorService.CreateIndexerSchema(
                customtexSchema,
                indexerName,
                dataSourceName,
                skillSetName,
                indexName,
                selectedProjects);

            // indexing pipeline
            _loggerService.LogOperation(OperationType.CreateDataSource, $"{dataSourceName}");
            await _cognitiveSearchService.CreateDataSourceConnectionAsync(dataSourceName, _appConfigs.BlobStorage.ContainerName, _appConfigs.BlobStorage.ConnectionString);

            _loggerService.LogOperation(OperationType.CreatingSearchIndex, $"{indexName}");
            await _cognitiveSearchService.CreateIndexAsync(indexSchema);

            _loggerService.LogOperation(OperationType.CreatingSkillSet, $"{skillSetName}");
            await _cognitiveSearchService.CreateSkillSetAsync(skillsetSchema);

            _loggerService.LogOperation(OperationType.CreatingIndexer, $"{indexerName}");
            await _cognitiveSearchService.CreateIndexerAsync(indexerSchema);

            // log success message
            _loggerService.LogSuccessMessage("Indexing Application Was Successfull!");
        }
        public async Task <CustomTextSchema> LoadCustomTextAppSchema(
            CustomTextResource customTextResource,
            CustomTextProjects customTextProjects,
            SelectedProjects selectedProjects)
        {
            var client = new CustomTextAuthoringClient(customTextResource.Endpoint, customTextResource.Key);
            var result = new CustomTextSchema();

            // load extractors
            if (selectedProjects.IsSelected_EntityRecognitionProject)
            {
                var extractors = await LoadExtractors(client, customTextProjects.EntityRecognition.ProjectName);

                result.EntityNames = extractors.Select(e => e.Name).ToList();
            }

            // return result
            return(result);
        }
        public Indexer CreateIndexerSchema(
            CustomTextSchema schema,
            string indexerName,
            string dataSourceName,
            string skillSetName,
            string indexName,
            SelectedProjects selectedProjects)
        {
            // field mappings
            var fieldMappings = new List <IndexerFieldMapping>
            {
                new IndexerFieldMapping
                {
                    SourceFieldName = "metadata_storage_name",
                    TargetFieldName = "id",
                    MappingFunction = new MappingFunction
                    {
                        Name = "base64Encode"
                    }
                },
                new IndexerFieldMapping
                {
                    SourceFieldName = "metadata_storage_name",
                    TargetFieldName = "document_name",
                },
                new IndexerFieldMapping
                {
                    SourceFieldName = "metadata_storage_path",
                    TargetFieldName = "document_uri",
                }
            };

            // output fields mapping
            var outputFieldMappings = new List <IndexerFieldMapping>();

            if (selectedProjects.IsSelected_EntityRecognitionProject)
            {
                foreach (string entityName in schema.EntityNames)
                {
                    outputFieldMappings.Add(new IndexerFieldMapping
                    {
                        SourceFieldName = $"/document/content/{Constants.SkillsetResponseEntitiesKey}/{entityName}",
                        TargetFieldName = entityName
                    });
                }
            }
            if (selectedProjects.IsSelected_SingleClassificationProject)
            {
                outputFieldMappings.Add(new IndexerFieldMapping
                {
                    SourceFieldName = $"/document/content/{Constants.SkillsetResponseSingleClassKey}",
                    TargetFieldName = Constants.SearchIndexSingleClassColumnName
                });
            }
            if (selectedProjects.IsSelected_MultiClassificationProject)
            {
                outputFieldMappings.Add(new IndexerFieldMapping
                {
                    SourceFieldName = $"/document/content/{Constants.SkillsetResponseMultiClassKey}",
                    TargetFieldName = Constants.SearchIndexMultiClassColumnName
                });
            }

            // configs
            var indexerParameters = new IndexerParameters
            {
                Configuration = new IndexerConfiguration
                {
                    IndexedFileNameExtensions = ".txt"
                }
            };

            return(new Indexer
            {
                Name = indexerName,
                DataSourceName = dataSourceName,
                TargetIndexName = indexName,
                SkillsetName = skillSetName,
                FieldMappings = fieldMappings,
                OutputFieldMappings = outputFieldMappings,
                Parameters = indexerParameters
            });
        }