public void CanRoundtripIndexerWithFieldMappingFunctions() => Run(() => { Indexer expectedIndexer = new Indexer(SearchTestUtilities.GenerateName(), Data.DataSourceName, Data.TargetIndexName) { FieldMappings = new[] { // Try all the field mapping functions and parameters (even if they don't make sense in the context of the test DB). new FieldMapping("feature_id", "a", FieldMappingFunction.Base64Encode()), new FieldMapping("feature_id", "b", FieldMappingFunction.Base64Encode(useHttpServerUtilityUrlTokenEncode: true)), new FieldMapping("feature_id", "c", FieldMappingFunction.ExtractTokenAtPosition(delimiter: " ", position: 0)), new FieldMapping("feature_id", "d", FieldMappingFunction.Base64Decode()), new FieldMapping("feature_id", "e", FieldMappingFunction.Base64Decode(useHttpServerUtilityUrlTokenDecode: false)), new FieldMapping("feature_id", "f", FieldMappingFunction.JsonArrayToStringCollection()) } }; SearchServiceClient searchClient = Data.GetSearchServiceClient(); // We need to add desired fields to the index before those fields can be referenced by the field mappings Index index = searchClient.Indexes.Get(Data.TargetIndexName); string[] fieldNames = new[] { "a", "b", "c", "d", "e", "f" }; index.Fields = index.Fields.Concat(fieldNames.Select(name => new Field(name, DataType.String))).ToList(); searchClient.Indexes.CreateOrUpdate(index); searchClient.Indexers.Create(expectedIndexer); Indexer actualIndexer = searchClient.Indexers.Get(expectedIndexer.Name); AssertIndexersEqual(expectedIndexer, actualIndexer); });
private static void IndexBooks() { var definition = new Index { Name = "books", Fields = FieldBuilder.BuildForType <Book>() }; if (!_searchClient.Indexes.Exists(definition.Name)) { _searchClient.Indexes.Create(definition); } if (!_searchClient.DataSources.Exists("booksdatasource")) { DataSource ds = new DataSource { Name = "booksdatasource", Type = DataSourceType.AzureSql, Credentials = new DataSourceCredentials(ConfigurationManager.ConnectionStrings["BookContext"].ToString()), Container = new DataContainer("Books") }; _searchClient.DataSources.CreateOrUpdate(ds); } if (!_searchClient.Indexers.Exists("booksindexer")) { var nameFm = new FieldMapping("Name", FieldMappingFunction.Base64Decode()); _searchClient.Indexers.Create(new Indexer("booksindexer", "booksdatasource", definition.Name, fieldMappings: new[] { nameFm })); } _indexClient = _searchClient.Indexes.GetClient(definition.Name); }
public async Task EnsureIndexerCreatedAsync() { var definition = new Index() { Name = ConfigurationReader.SearchIndexName, Fields = FieldBuilder.BuildForType <IndexDocument>() }; var dataSource = new DataSource(); dataSource.Name = ConfigurationReader.SearchDataSourceName; dataSource.Credentials = new DataSourceCredentials(ConfigurationReader.BlobStorageConnectionString); dataSource.Type = DataSourceType.AzureBlob; dataSource.Container = new DataContainer(ConfigurationReader.BlobStorageContainerName); await Task.WhenAll( ServiceClient.DataSources.CreateOrUpdateAsync(dataSource), ServiceClient.Indexes.CreateOrUpdateAsync(definition)); var mappingFunctionId = new FieldMappingFunction(); mappingFunctionId.Name = "extractTokenAtPosition"; mappingFunctionId.Parameters = new Dictionary <string, object>(); mappingFunctionId.Parameters.Add("delimiter", "."); mappingFunctionId.Parameters.Add("position", 0); var fieldMappingId = new FieldMapping(); fieldMappingId.SourceFieldName = "metadata_storage_name"; // source field name for azure blob name fieldMappingId.TargetFieldName = nameof(IndexDocument.Id); fieldMappingId.MappingFunction = mappingFunctionId; var fieldMappingContent = new FieldMapping(); fieldMappingContent.SourceFieldName = "content"; // source field name for azure blob content fieldMappingContent.TargetFieldName = nameof(IndexDocument.Content); var indexer = new Indexer(); indexer.Name = ConfigurationReader.SearchIndexerName; indexer.DataSourceName = dataSource.Name; indexer.TargetIndexName = definition.Name; indexer.Parameters = new IndexingParameters(); indexer.Parameters.DoNotFailOnUnsupportedContentType(); indexer.Parameters.IndexFileNameExtensions(ConfigurationReader.SearchIndexerExtensions); indexer.Parameters.MaxFailedItems = -1; indexer.Parameters.MaxFailedItemsPerBatch = -1; indexer.FieldMappings = new List <FieldMapping>(); indexer.FieldMappings.Add(fieldMappingContent); indexer.FieldMappings.Add(fieldMappingId); indexer.Schedule = new IndexingSchedule(TimeSpan.FromMinutes(5)); await ServiceClient.Indexers.CreateOrUpdateAsync(indexer); }
public async Task <Indexer> CreateIndexerIfNotExists(string name, string datasourceName, string indexName, FieldMapping[] mapping) => await SearchServiceClient.Indexers.ExistsAsync(name) ? await SearchServiceClient.Indexers.GetAsync(name) : Task.Run(() => { var indexer = SearchServiceClient.Indexers.CreateOrUpdate(new Indexer(name, datasourceName, indexName, fieldMappings: new List <FieldMapping> { new FieldMapping("metadata_storage_path", FieldMappingFunction.Base64Encode()) //key cannot be an url therefore Encode it. })); Thread.Sleep(1000); SearchServiceClient.Indexers.Run(name); Thread.Sleep(1000); return(indexer); }).Result;
public Indexer CreateTestIndexer() => new Indexer(SearchTestUtilities.GenerateName(), DataSourceName, TargetIndexName) { // We can't test startTime because it's an absolute time that must be within 24 hours of the current // time. That doesn't play well with recorded mock payloads. Schedule = new IndexingSchedule(interval: TimeSpan.FromDays(1)), FieldMappings = new[] { // Try all the field mapping functions (even if they don't make sense in the context of the test DB). new FieldMapping("feature_class", FieldMappingFunction.Base64Encode()), new FieldMapping("state_alpha", "state"), new FieldMapping("county_name", FieldMappingFunction.ExtractTokenAtPosition(" ", 0)), new FieldMapping("elev_in_m", "elevation"), new FieldMapping("map_name", FieldMappingFunction.Base64Decode()), new FieldMapping("history", FieldMappingFunction.JsonArrayToStringCollection()) } };
internal FieldMapping(string sourceFieldName, string targetFieldName, FieldMappingFunction mappingFunction) { SourceFieldName = sourceFieldName; TargetFieldName = targetFieldName; MappingFunction = mappingFunction; }
public static async Task <SearchIndexer> CreateIndexerAsync(SearchIndexerClient indexerClient, SearchIndexerDataSourceConnection dataSource, SearchIndexerSkillset skillSet, SearchIndex index) { IndexingParameters indexingParameters = new IndexingParameters() { MaxFailedItems = -1, MaxFailedItemsPerBatch = -1, }; indexingParameters.IndexingParametersConfiguration = new IndexingParametersConfiguration(); indexingParameters.IndexingParametersConfiguration.DataToExtract = BlobIndexerDataToExtract.ContentAndMetadata; indexingParameters.IndexingParametersConfiguration.ParsingMode = BlobIndexerParsingMode.Text; string indexerName = index.Name + "-indexer"; SearchIndexer indexer = new SearchIndexer(indexerName, dataSource.Name, index.Name) { Description = index.Name + " Indexer", SkillsetName = skillSet.Name, Parameters = indexingParameters }; FieldMappingFunction mappingFunction = new FieldMappingFunction("base64Encode"); mappingFunction.Parameters.Add("useHttpServerUtilityUrlTokenEncode", true); indexer.FieldMappings.Add(new FieldMapping("metadata_storage_path") { TargetFieldName = "metadata_storage_path", MappingFunction = mappingFunction }); //indexer.FieldMappings.Add(new FieldMapping("metadata_storage_name") //{ // TargetFieldName = "FileName" //}); //indexer.FieldMappings.Add(new FieldMapping("content")5 //{ // TargetFieldName = "Content" //}); //indexer.OutputFieldMappings.Add(new FieldMapping("/document/pages/*/organizations/*") //{ // TargetFieldName = "organizations" //}); //indexer.OutputFieldMappings.Add(new FieldMapping("/document/pages/*/keyPhrases/*") //{ // TargetFieldName = "keyPhrases" //}); //indexer.OutputFieldMappings.Add(new FieldMapping("/document/languageCode") //{ // TargetFieldName = "languageCode" //}); try { await indexerClient.GetIndexerAsync(indexer.Name); await indexerClient.DeleteIndexerAsync(indexer.Name); } catch (RequestFailedException ex) when(ex.Status == 404) { //if the specified indexer not exist, 404 will be thrown. } try { await indexerClient.CreateIndexerAsync(indexer); } catch (RequestFailedException ex) { Console.WriteLine("Failed to create the indexer\n Exception message: {0}\n", ex.Message); ExitProgram("Cannot continue without creating an indexer"); } return(indexer); }
private static async Task CreateAndRunBlobIndexer(string indexName, SearchServiceClient searchService, Skillset skillSet) { DataSource blobDataSource = DataSource.AzureBlobStorage( name: configuration["BlobStorageAccountName"], storageConnectionString: configuration["BlobStorageConnectionString"], containerName: "qnateam7container"); // The blob data source does not need to be deleted if it already exists, // but the connection string might need to be updated if it has changed. await searchService.DataSources.CreateOrUpdateAsync(blobDataSource); Console.WriteLine("Creating Blob Storage indexer...\n"); IDictionary <string, object> config = new Dictionary <string, object>(); config.Add( key: "dataToExtract", value: "contentAndMetadata"); config.Add( key: "imageAction", value: "generateNormalizedImages"); // Add a field mapping to match the Id field in the documents to // the HotelId key field in the index List <FieldMapping> map = new List <FieldMapping> { new FieldMapping("metadata_storage_path", "id", FieldMappingFunction.Base64Encode()), new FieldMapping("metadata_storage_path", "url"), new FieldMapping("metadata_storage_name", "file_name"), new FieldMapping("content", "content"), new FieldMapping("metadata_storage_size", "size"), new FieldMapping("metadata_storage_last_modified", "last_modified") }; List <FieldMapping> outputMappings = new List <FieldMapping>(); outputMappings.Add(new FieldMapping( sourceFieldName: "/document/content/persons/*", targetFieldName: "persons")); // outputMappings.Add(new FieldMapping( // sourceFieldName: "/document/pages/*/keyPhrases/*", // targetFieldName: "keyPhrases")); outputMappings.Add(new FieldMapping( sourceFieldName: "/document/sentiment", targetFieldName: "sentiment")); outputMappings.Add(new FieldMapping( sourceFieldName: "/document/merged_text", targetFieldName: "merged_text")); outputMappings.Add(new FieldMapping( sourceFieldName: "/document/greeting", targetFieldName: "greeting")); outputMappings.Add(new FieldMapping( sourceFieldName: "/document/top_10_words", targetFieldName: "top_10_words")); Indexer blobIndexer = new Indexer( name: "hotelreviews-blob-indexer", dataSourceName: blobDataSource.Name, targetIndexName: indexName, fieldMappings: map, outputFieldMappings: outputMappings, skillsetName: skillSet.Name, parameters: new IndexingParameters( maxFailedItems: -1, maxFailedItemsPerBatch: -1, configuration: config), schedule: new IndexingSchedule(TimeSpan.FromDays(1))); // Reset the indexer if it already exists bool exists = await searchService.Indexers.ExistsAsync(blobIndexer.Name); if (exists) { // await searchService.Indexers.ResetAsync(blobIndexer.Name); await searchService.Indexers.DeleteAsync(blobIndexer.Name); } await searchService.Indexers.CreateOrUpdateAsync(blobIndexer); Console.WriteLine("Running Blob Storage indexer...\n"); try { await searchService.Indexers.RunAsync(blobIndexer.Name); } catch (CloudException e) when(e.Response.StatusCode == (HttpStatusCode)429) { Console.WriteLine("Failed to run indexer: {0}", e.Response.Content); } }
private static SearchIndexer CreateDemoIndexer(SearchIndexerClient indexerClient, SearchIndexerDataSourceConnection dataSource, SearchIndexerSkillset skillSet, SearchIndex index) { IndexingParameters indexingParameters = new IndexingParameters() { MaxFailedItems = -1, MaxFailedItemsPerBatch = -1, }; indexingParameters.Configuration.Add("dataToExtract", "contentAndMetadata"); indexingParameters.Configuration.Add("imageAction", "generateNormalizedImages"); SearchIndexer indexer = new SearchIndexer("demoindexer", dataSource.Name, index.Name) { Description = "Demo Indexer", SkillsetName = skillSet.Name, Parameters = indexingParameters }; FieldMappingFunction mappingFunction = new FieldMappingFunction("base64Encode"); mappingFunction.Parameters.Add("useHttpServerUtilityUrlTokenEncode", true); indexer.FieldMappings.Add(new FieldMapping("metadata_storage_path") { TargetFieldName = "id", MappingFunction = mappingFunction }); indexer.FieldMappings.Add(new FieldMapping("content") { TargetFieldName = "content" }); indexer.OutputFieldMappings.Add(new FieldMapping("/document/pages/*/organizations/*") { TargetFieldName = "organizations" }); indexer.OutputFieldMappings.Add(new FieldMapping("/document/pages/*/keyPhrases/*") { TargetFieldName = "keyPhrases" }); indexer.OutputFieldMappings.Add(new FieldMapping("/document/languageCode") { TargetFieldName = "languageCode" }); try { indexerClient.GetIndexer(indexer.Name); indexerClient.DeleteIndexer(indexer.Name); } catch (RequestFailedException ex) when(ex.Status == 404) { //if the specified indexer not exist, 404 will be thrown. } try { indexerClient.CreateIndexer(indexer); } catch (RequestFailedException ex) { Console.WriteLine("Failed to create the indexer\n Exception message: {0}\n", ex.Message); ExitProgram("Cannot continue without creating an indexer"); } return(indexer); }
/// <summary> /// Initializes a new instance of the FieldMapping class. /// </summary> /// <param name="sourceFieldName">The name of the field in the data source.</param> /// <param name="mappingFunction">A function to apply to each source field value before indexing.</param> public FieldMapping(string sourceFieldName, FieldMappingFunction mappingFunction) : this(sourceFieldName, sourceFieldName, mappingFunction) { // Other constructor does all initialization. }
static async Task Main(string[] args) { var services = new ServiceCollection(); services.AddLogging(l => { l.AddConsole(); }); var builder = new ConfigurationBuilder() .AddJsonFile("appsettings.json", false, true) //load base settings .AddJsonFile("appsettings.local.json", true, true); //load local settings var configuration = builder.Build(); services.AddBlobStorage(new BlobSettings( configuration.GetValue <string>("BlobStorage:ConnectionString"), configuration.GetValue <string>("BlobStorage:SearchServiceName"), configuration.GetValue <string>("BlobStorage:ApiKey"))); IServiceProvider provider = services.BuildServiceProvider(); var storage = provider.GetService <IIndexedBlobStorage>(); var logger = provider.GetService <ILoggerFactory>().CreateLogger("info"); // 1. create container var container = storage.GetOrCreateContainer(configuration.GetValue <string>("BlobStorage:Container")); var indexName = $"{container.Name}-index"; // 2.create blobs with documents new DirectoryInfo(configuration.GetValue <string>("AppSettings:DocumentFolder")) .GetFiles("*.*", SearchOption.AllDirectories) .Where(d => d.Extension == ".docx" || d.Extension == ".doc" || d.Extension == ".pdf") .ToList() .ForEach(async filename => { await container.UploadDocument(filename.FullName, metaData: new List <KeyValuePair <string, string> > { new KeyValuePair <string, string>("subject", "math") }); }); //3 add index var index = await container.GetOrCreateIndex(new Index(indexName, new List <Field>()) .AddDefaultWordFields("nl") .AddField(new Field("subject", AnalyzerName.NlLucene) { Type = DataType.String, IsFilterable = true }) ); //4 add datasource + indexer var datasource = await container .CreateDatasourceIfNotExists($"{container.Name}-datasource"); await datasource.CreateIndexerIfNotExists($"{container.Name}-indexer", $"{container.Name}-datasource", index.Name, new List <FieldMapping> { new FieldMapping("metadata_storage_path", FieldMappingFunction.Base64Encode()) //key cannot be an url therefore Encode it. }.ToArray()); //5. Search for words within a subject. var searchResult = container.Search(indexName, "Something to search for"); searchResult.ForEach(s => logger.LogInformation($"result found: {s}")); }