public void CanRoundtripIndexerWithFieldMappingFunctions() => Run(() => { Indexer expectedIndexer = new Indexer(SearchTestUtilities.GenerateName(), Data.DataSourceName, Data.TargetIndexName) { FieldMappings = new[] { // Try all the field mapping functions and parameters (even if they don't make sense in the context of the test DB). new FieldMapping("feature_id", "a", FieldMappingFunction.Base64Encode()), new FieldMapping("feature_id", "b", FieldMappingFunction.Base64Encode(useHttpServerUtilityUrlTokenEncode: true)), new FieldMapping("feature_id", "c", FieldMappingFunction.ExtractTokenAtPosition(delimiter: " ", position: 0)), new FieldMapping("feature_id", "d", FieldMappingFunction.Base64Decode()), new FieldMapping("feature_id", "e", FieldMappingFunction.Base64Decode(useHttpServerUtilityUrlTokenDecode: false)), new FieldMapping("feature_id", "f", FieldMappingFunction.JsonArrayToStringCollection()) } }; SearchServiceClient searchClient = Data.GetSearchServiceClient(); // We need to add desired fields to the index before those fields can be referenced by the field mappings Index index = searchClient.Indexes.Get(Data.TargetIndexName); string[] fieldNames = new[] { "a", "b", "c", "d", "e", "f" }; index.Fields = index.Fields.Concat(fieldNames.Select(name => new Field(name, DataType.String))).ToList(); searchClient.Indexes.CreateOrUpdate(index); searchClient.Indexers.Create(expectedIndexer); Indexer actualIndexer = searchClient.Indexers.Get(expectedIndexer.Name); AssertIndexersEqual(expectedIndexer, actualIndexer); });
public async Task <Indexer> CreateIndexerIfNotExists(string name, string datasourceName, string indexName, FieldMapping[] mapping) => await SearchServiceClient.Indexers.ExistsAsync(name) ? await SearchServiceClient.Indexers.GetAsync(name) : Task.Run(() => { var indexer = SearchServiceClient.Indexers.CreateOrUpdate(new Indexer(name, datasourceName, indexName, fieldMappings: new List <FieldMapping> { new FieldMapping("metadata_storage_path", FieldMappingFunction.Base64Encode()) //key cannot be an url therefore Encode it. })); Thread.Sleep(1000); SearchServiceClient.Indexers.Run(name); Thread.Sleep(1000); return(indexer); }).Result;
public Indexer CreateTestIndexer() => new Indexer(SearchTestUtilities.GenerateName(), DataSourceName, TargetIndexName) { // We can't test startTime because it's an absolute time that must be within 24 hours of the current // time. That doesn't play well with recorded mock payloads. Schedule = new IndexingSchedule(interval: TimeSpan.FromDays(1)), FieldMappings = new[] { // Try all the field mapping functions (even if they don't make sense in the context of the test DB). new FieldMapping("feature_class", FieldMappingFunction.Base64Encode()), new FieldMapping("state_alpha", "state"), new FieldMapping("county_name", FieldMappingFunction.ExtractTokenAtPosition(" ", 0)), new FieldMapping("elev_in_m", "elevation"), new FieldMapping("map_name", FieldMappingFunction.Base64Decode()), new FieldMapping("history", FieldMappingFunction.JsonArrayToStringCollection()) } };
private static async Task CreateAndRunBlobIndexer(string indexName, SearchServiceClient searchService, Skillset skillSet) { DataSource blobDataSource = DataSource.AzureBlobStorage( name: configuration["BlobStorageAccountName"], storageConnectionString: configuration["BlobStorageConnectionString"], containerName: "qnateam7container"); // The blob data source does not need to be deleted if it already exists, // but the connection string might need to be updated if it has changed. await searchService.DataSources.CreateOrUpdateAsync(blobDataSource); Console.WriteLine("Creating Blob Storage indexer...\n"); IDictionary <string, object> config = new Dictionary <string, object>(); config.Add( key: "dataToExtract", value: "contentAndMetadata"); config.Add( key: "imageAction", value: "generateNormalizedImages"); // Add a field mapping to match the Id field in the documents to // the HotelId key field in the index List <FieldMapping> map = new List <FieldMapping> { new FieldMapping("metadata_storage_path", "id", FieldMappingFunction.Base64Encode()), new FieldMapping("metadata_storage_path", "url"), new FieldMapping("metadata_storage_name", "file_name"), new FieldMapping("content", "content"), new FieldMapping("metadata_storage_size", "size"), new FieldMapping("metadata_storage_last_modified", "last_modified") }; List <FieldMapping> outputMappings = new List <FieldMapping>(); outputMappings.Add(new FieldMapping( sourceFieldName: "/document/content/persons/*", targetFieldName: "persons")); // outputMappings.Add(new FieldMapping( // sourceFieldName: "/document/pages/*/keyPhrases/*", // targetFieldName: "keyPhrases")); outputMappings.Add(new FieldMapping( sourceFieldName: "/document/sentiment", targetFieldName: "sentiment")); outputMappings.Add(new FieldMapping( sourceFieldName: "/document/merged_text", targetFieldName: "merged_text")); outputMappings.Add(new FieldMapping( sourceFieldName: "/document/greeting", targetFieldName: "greeting")); outputMappings.Add(new FieldMapping( sourceFieldName: "/document/top_10_words", targetFieldName: "top_10_words")); Indexer blobIndexer = new Indexer( name: "hotelreviews-blob-indexer", dataSourceName: blobDataSource.Name, targetIndexName: indexName, fieldMappings: map, outputFieldMappings: outputMappings, skillsetName: skillSet.Name, parameters: new IndexingParameters( maxFailedItems: -1, maxFailedItemsPerBatch: -1, configuration: config), schedule: new IndexingSchedule(TimeSpan.FromDays(1))); // Reset the indexer if it already exists bool exists = await searchService.Indexers.ExistsAsync(blobIndexer.Name); if (exists) { // await searchService.Indexers.ResetAsync(blobIndexer.Name); await searchService.Indexers.DeleteAsync(blobIndexer.Name); } await searchService.Indexers.CreateOrUpdateAsync(blobIndexer); Console.WriteLine("Running Blob Storage indexer...\n"); try { await searchService.Indexers.RunAsync(blobIndexer.Name); } catch (CloudException e) when(e.Response.StatusCode == (HttpStatusCode)429) { Console.WriteLine("Failed to run indexer: {0}", e.Response.Content); } }
static async Task Main(string[] args) { var services = new ServiceCollection(); services.AddLogging(l => { l.AddConsole(); }); var builder = new ConfigurationBuilder() .AddJsonFile("appsettings.json", false, true) //load base settings .AddJsonFile("appsettings.local.json", true, true); //load local settings var configuration = builder.Build(); services.AddBlobStorage(new BlobSettings( configuration.GetValue <string>("BlobStorage:ConnectionString"), configuration.GetValue <string>("BlobStorage:SearchServiceName"), configuration.GetValue <string>("BlobStorage:ApiKey"))); IServiceProvider provider = services.BuildServiceProvider(); var storage = provider.GetService <IIndexedBlobStorage>(); var logger = provider.GetService <ILoggerFactory>().CreateLogger("info"); // 1. create container var container = storage.GetOrCreateContainer(configuration.GetValue <string>("BlobStorage:Container")); var indexName = $"{container.Name}-index"; // 2.create blobs with documents new DirectoryInfo(configuration.GetValue <string>("AppSettings:DocumentFolder")) .GetFiles("*.*", SearchOption.AllDirectories) .Where(d => d.Extension == ".docx" || d.Extension == ".doc" || d.Extension == ".pdf") .ToList() .ForEach(async filename => { await container.UploadDocument(filename.FullName, metaData: new List <KeyValuePair <string, string> > { new KeyValuePair <string, string>("subject", "math") }); }); //3 add index var index = await container.GetOrCreateIndex(new Index(indexName, new List <Field>()) .AddDefaultWordFields("nl") .AddField(new Field("subject", AnalyzerName.NlLucene) { Type = DataType.String, IsFilterable = true }) ); //4 add datasource + indexer var datasource = await container .CreateDatasourceIfNotExists($"{container.Name}-datasource"); await datasource.CreateIndexerIfNotExists($"{container.Name}-indexer", $"{container.Name}-datasource", index.Name, new List <FieldMapping> { new FieldMapping("metadata_storage_path", FieldMappingFunction.Base64Encode()) //key cannot be an url therefore Encode it. }.ToArray()); //5. Search for words within a subject. var searchResult = container.Search(indexName, "Something to search for"); searchResult.ForEach(s => logger.LogInformation($"result found: {s}")); }