public void ValidateSchema(CustomTextSchema schema, CustomTextProjects projects, SelectedProjects selectedProjects) { var result = ValidateSchemaInternal(schema, projects, selectedProjects); if (result == false) { throw new Exception("Cognitive Search doesn't support spaces or special characters in entity names! please rename your entities and re-train your model"); } }
private bool ValidateSchemaInternal(CustomTextSchema schema, CustomTextProjects projects, SelectedProjects selectedProjects) { var result = true; if (selectedProjects.IsSelected_EntityRecognitionProject) { result &= ValidateEntityNames(schema); } return(result); }
private bool ValidateEntityNames(CustomTextSchema schema) { var result = true; schema.EntityNames.ForEach(e => { if (e.Any(x => !char.IsLetterOrDigit(x) || char.IsWhiteSpace(x))) { result = false; } }); return(result); }
public SearchIndex CreateSearchIndexSchema(CustomTextSchema schema, string indexName, SelectedProjects selectedProjects) { // core fields var indexFields = new List <SearchField> { new SearchField("id", SearchFieldDataType.String) { IsKey = true }, new SearchField("document_name", SearchFieldDataType.String), new SearchField("document_uri", SearchFieldDataType.String) }; // classifers if (selectedProjects.IsSelected_SingleClassificationProject) { var singleClassField = new SearchField( Constants.SearchIndexSingleClassColumnName, SearchFieldDataType.String); indexFields.Add(singleClassField); } if (selectedProjects.IsSelected_MultiClassificationProject) { var multiClassField = new SearchField( Constants.SearchIndexMultiClassColumnName, SearchFieldDataType.Collection(SearchFieldDataType.String)); indexFields.Add(multiClassField); } // extractors if (selectedProjects.IsSelected_EntityRecognitionProject) { foreach (var entityName in schema.EntityNames) { var entityField = new SearchField( entityName, SearchFieldDataType.Collection(SearchFieldDataType.String)); indexFields.Add(entityField); } } // return return(new SearchIndex(indexName) { Fields = indexFields }); }
public async Task IndexCustomText( string indexName, CustomTextSchema customtexSchema, SelectedProjects selectedProjects) { // initialize resource names var dataSourceName = indexName.ToLower() + "-data"; var indexerName = indexName.ToLower() + "-indexer"; var skillSetName = indexName.ToLower() + "-skillset"; var customTextSkillName = indexName.ToLower() + "-customtext-skill"; // create models (index & skillset) var indexSchema = _cognitiveSearchSchemaCreatorService.CreateSearchIndexSchema( customtexSchema, indexName, selectedProjects); var skillsetSchema = _cognitiveSearchSchemaCreatorService.CreateSkillSetSchema( skillSetName, customTextSkillName, _appConfigs.AzureFunction.FunctionUrl, _appConfigs.CustomText, selectedProjects); var indexerSchema = _cognitiveSearchSchemaCreatorService.CreateIndexerSchema( customtexSchema, indexerName, dataSourceName, skillSetName, indexName, selectedProjects); // indexing pipeline _loggerService.LogOperation(OperationType.CreateDataSource, $"{dataSourceName}"); await _cognitiveSearchService.CreateDataSourceConnectionAsync(dataSourceName, _appConfigs.BlobStorage.ContainerName, _appConfigs.BlobStorage.ConnectionString); _loggerService.LogOperation(OperationType.CreatingSearchIndex, $"{indexName}"); await _cognitiveSearchService.CreateIndexAsync(indexSchema); _loggerService.LogOperation(OperationType.CreatingSkillSet, $"{skillSetName}"); await _cognitiveSearchService.CreateSkillSetAsync(skillsetSchema); _loggerService.LogOperation(OperationType.CreatingIndexer, $"{indexerName}"); await _cognitiveSearchService.CreateIndexerAsync(indexerSchema); // log success message _loggerService.LogSuccessMessage("Indexing Application Was Successfull!"); }
public async Task <CustomTextSchema> LoadCustomTextAppSchema( CustomTextResource customTextResource, CustomTextProjects customTextProjects, SelectedProjects selectedProjects) { var client = new CustomTextAuthoringClient(customTextResource.Endpoint, customTextResource.Key); var result = new CustomTextSchema(); // load extractors if (selectedProjects.IsSelected_EntityRecognitionProject) { var extractors = await LoadExtractors(client, customTextProjects.EntityRecognition.ProjectName); result.EntityNames = extractors.Select(e => e.Name).ToList(); } // return result return(result); }
public Indexer CreateIndexerSchema( CustomTextSchema schema, string indexerName, string dataSourceName, string skillSetName, string indexName, SelectedProjects selectedProjects) { // field mappings var fieldMappings = new List <IndexerFieldMapping> { new IndexerFieldMapping { SourceFieldName = "metadata_storage_name", TargetFieldName = "id", MappingFunction = new MappingFunction { Name = "base64Encode" } }, new IndexerFieldMapping { SourceFieldName = "metadata_storage_name", TargetFieldName = "document_name", }, new IndexerFieldMapping { SourceFieldName = "metadata_storage_path", TargetFieldName = "document_uri", } }; // output fields mapping var outputFieldMappings = new List <IndexerFieldMapping>(); if (selectedProjects.IsSelected_EntityRecognitionProject) { foreach (string entityName in schema.EntityNames) { outputFieldMappings.Add(new IndexerFieldMapping { SourceFieldName = $"/document/content/{Constants.SkillsetResponseEntitiesKey}/{entityName}", TargetFieldName = entityName }); } } if (selectedProjects.IsSelected_SingleClassificationProject) { outputFieldMappings.Add(new IndexerFieldMapping { SourceFieldName = $"/document/content/{Constants.SkillsetResponseSingleClassKey}", TargetFieldName = Constants.SearchIndexSingleClassColumnName }); } if (selectedProjects.IsSelected_MultiClassificationProject) { outputFieldMappings.Add(new IndexerFieldMapping { SourceFieldName = $"/document/content/{Constants.SkillsetResponseMultiClassKey}", TargetFieldName = Constants.SearchIndexMultiClassColumnName }); } // configs var indexerParameters = new IndexerParameters { Configuration = new IndexerConfiguration { IndexedFileNameExtensions = ".txt" } }; return(new Indexer { Name = indexerName, DataSourceName = dataSourceName, TargetIndexName = indexName, SkillsetName = skillSetName, FieldMappings = fieldMappings, OutputFieldMappings = outputFieldMappings, Parameters = indexerParameters }); }