Beispiel #1
0
        public void CanRoundtripIndexerWithFieldMappingFunctions() =>
        Run(() =>
        {
            Indexer expectedIndexer = new Indexer(SearchTestUtilities.GenerateName(), Data.DataSourceName, Data.TargetIndexName)
            {
                FieldMappings = new[]
                {
                    // Try all the field mapping functions and parameters (even if they don't make sense in the context of the test DB).
                    new FieldMapping("feature_id", "a", FieldMappingFunction.Base64Encode()),
                    new FieldMapping("feature_id", "b", FieldMappingFunction.Base64Encode(useHttpServerUtilityUrlTokenEncode: true)),
                    new FieldMapping("feature_id", "c", FieldMappingFunction.ExtractTokenAtPosition(delimiter: " ", position: 0)),
                    new FieldMapping("feature_id", "d", FieldMappingFunction.Base64Decode()),
                    new FieldMapping("feature_id", "e", FieldMappingFunction.Base64Decode(useHttpServerUtilityUrlTokenDecode: false)),
                    new FieldMapping("feature_id", "f", FieldMappingFunction.JsonArrayToStringCollection())
                }
            };

            SearchServiceClient searchClient = Data.GetSearchServiceClient();

            // We need to add desired fields to the index before those fields can be referenced by the field mappings
            Index index         = searchClient.Indexes.Get(Data.TargetIndexName);
            string[] fieldNames = new[] { "a", "b", "c", "d", "e", "f" };
            index.Fields        = index.Fields.Concat(fieldNames.Select(name => new Field(name, DataType.String))).ToList();
            searchClient.Indexes.CreateOrUpdate(index);

            searchClient.Indexers.Create(expectedIndexer);

            Indexer actualIndexer = searchClient.Indexers.Get(expectedIndexer.Name);
            AssertIndexersEqual(expectedIndexer, actualIndexer);
        });
Beispiel #2
0
        private static void IndexBooks()
        {
            var definition = new Index
            {
                Name   = "books",
                Fields = FieldBuilder.BuildForType <Book>()
            };

            if (!_searchClient.Indexes.Exists(definition.Name))
            {
                _searchClient.Indexes.Create(definition);
            }

            if (!_searchClient.DataSources.Exists("booksdatasource"))
            {
                DataSource ds = new DataSource
                {
                    Name        = "booksdatasource",
                    Type        = DataSourceType.AzureSql,
                    Credentials = new DataSourceCredentials(ConfigurationManager.ConnectionStrings["BookContext"].ToString()),
                    Container   = new DataContainer("Books")
                };
                _searchClient.DataSources.CreateOrUpdate(ds);
            }

            if (!_searchClient.Indexers.Exists("booksindexer"))
            {
                var nameFm = new FieldMapping("Name", FieldMappingFunction.Base64Decode());
                _searchClient.Indexers.Create(new Indexer("booksindexer", "booksdatasource", definition.Name, fieldMappings: new[] { nameFm }));
            }

            _indexClient = _searchClient.Indexes.GetClient(definition.Name);
        }
        public async Task EnsureIndexerCreatedAsync()
        {
            var definition = new Index()
            {
                Name   = ConfigurationReader.SearchIndexName,
                Fields = FieldBuilder.BuildForType <IndexDocument>()
            };

            var dataSource = new DataSource();

            dataSource.Name        = ConfigurationReader.SearchDataSourceName;
            dataSource.Credentials = new DataSourceCredentials(ConfigurationReader.BlobStorageConnectionString);
            dataSource.Type        = DataSourceType.AzureBlob;
            dataSource.Container   = new DataContainer(ConfigurationReader.BlobStorageContainerName);

            await Task.WhenAll(
                ServiceClient.DataSources.CreateOrUpdateAsync(dataSource),
                ServiceClient.Indexes.CreateOrUpdateAsync(definition));

            var mappingFunctionId = new FieldMappingFunction();

            mappingFunctionId.Name       = "extractTokenAtPosition";
            mappingFunctionId.Parameters = new Dictionary <string, object>();
            mappingFunctionId.Parameters.Add("delimiter", ".");
            mappingFunctionId.Parameters.Add("position", 0);

            var fieldMappingId = new FieldMapping();

            fieldMappingId.SourceFieldName = "metadata_storage_name";             // source field name for azure blob name
            fieldMappingId.TargetFieldName = nameof(IndexDocument.Id);
            fieldMappingId.MappingFunction = mappingFunctionId;

            var fieldMappingContent = new FieldMapping();

            fieldMappingContent.SourceFieldName = "content";             // source field name for azure blob content
            fieldMappingContent.TargetFieldName = nameof(IndexDocument.Content);

            var indexer = new Indexer();

            indexer.Name            = ConfigurationReader.SearchIndexerName;
            indexer.DataSourceName  = dataSource.Name;
            indexer.TargetIndexName = definition.Name;
            indexer.Parameters      = new IndexingParameters();
            indexer.Parameters.DoNotFailOnUnsupportedContentType();
            indexer.Parameters.IndexFileNameExtensions(ConfigurationReader.SearchIndexerExtensions);
            indexer.Parameters.MaxFailedItems         = -1;
            indexer.Parameters.MaxFailedItemsPerBatch = -1;
            indexer.FieldMappings = new List <FieldMapping>();
            indexer.FieldMappings.Add(fieldMappingContent);
            indexer.FieldMappings.Add(fieldMappingId);
            indexer.Schedule = new IndexingSchedule(TimeSpan.FromMinutes(5));

            await ServiceClient.Indexers.CreateOrUpdateAsync(indexer);
        }
 public async Task <Indexer> CreateIndexerIfNotExists(string name, string datasourceName, string indexName, FieldMapping[] mapping) =>
 await SearchServiceClient.Indexers.ExistsAsync(name) ?
 await SearchServiceClient.Indexers.GetAsync(name) :
 Task.Run(() =>
 {
     var indexer = SearchServiceClient.Indexers.CreateOrUpdate(new Indexer(name, datasourceName, indexName,
                                                                           fieldMappings: new List <FieldMapping>
     {
         new FieldMapping("metadata_storage_path", FieldMappingFunction.Base64Encode())             //key cannot be an url therefore Encode it.
     }));
     Thread.Sleep(1000);
     SearchServiceClient.Indexers.Run(name);
     Thread.Sleep(1000);
     return(indexer);
 }).Result;
 public Indexer CreateTestIndexer() =>
 new Indexer(SearchTestUtilities.GenerateName(), DataSourceName, TargetIndexName)
 {
     // We can't test startTime because it's an absolute time that must be within 24 hours of the current
     // time. That doesn't play well with recorded mock payloads.
     Schedule      = new IndexingSchedule(interval: TimeSpan.FromDays(1)),
     FieldMappings = new[]
     {
         // Try all the field mapping functions (even if they don't make sense in the context of the test DB).
         new FieldMapping("feature_class", FieldMappingFunction.Base64Encode()),
         new FieldMapping("state_alpha", "state"),
         new FieldMapping("county_name", FieldMappingFunction.ExtractTokenAtPosition(" ", 0)),
         new FieldMapping("elev_in_m", "elevation"),
         new FieldMapping("map_name", FieldMappingFunction.Base64Decode()),
         new FieldMapping("history", FieldMappingFunction.JsonArrayToStringCollection())
     }
 };
Beispiel #6
0
 internal FieldMapping(string sourceFieldName, string targetFieldName, FieldMappingFunction mappingFunction)
 {
     SourceFieldName = sourceFieldName;
     TargetFieldName = targetFieldName;
     MappingFunction = mappingFunction;
 }
Beispiel #7
0
        public static async Task <SearchIndexer> CreateIndexerAsync(SearchIndexerClient indexerClient, SearchIndexerDataSourceConnection dataSource, SearchIndexerSkillset skillSet, SearchIndex index)
        {
            IndexingParameters indexingParameters = new IndexingParameters()
            {
                MaxFailedItems         = -1,
                MaxFailedItemsPerBatch = -1,
            };

            indexingParameters.IndexingParametersConfiguration = new IndexingParametersConfiguration();
            indexingParameters.IndexingParametersConfiguration.DataToExtract = BlobIndexerDataToExtract.ContentAndMetadata;
            indexingParameters.IndexingParametersConfiguration.ParsingMode   = BlobIndexerParsingMode.Text;

            string        indexerName = index.Name + "-indexer";
            SearchIndexer indexer     = new SearchIndexer(indexerName, dataSource.Name, index.Name)
            {
                Description  = index.Name + " Indexer",
                SkillsetName = skillSet.Name,
                Parameters   = indexingParameters
            };

            FieldMappingFunction mappingFunction = new FieldMappingFunction("base64Encode");

            mappingFunction.Parameters.Add("useHttpServerUtilityUrlTokenEncode", true);

            indexer.FieldMappings.Add(new FieldMapping("metadata_storage_path")
            {
                TargetFieldName = "metadata_storage_path",
                MappingFunction = mappingFunction
            });

            //indexer.FieldMappings.Add(new FieldMapping("metadata_storage_name")
            //{
            //    TargetFieldName = "FileName"
            //});

            //indexer.FieldMappings.Add(new FieldMapping("content")5
            //{
            //    TargetFieldName = "Content"
            //});

            //indexer.OutputFieldMappings.Add(new FieldMapping("/document/pages/*/organizations/*")
            //{
            //    TargetFieldName = "organizations"
            //});
            //indexer.OutputFieldMappings.Add(new FieldMapping("/document/pages/*/keyPhrases/*")
            //{
            //    TargetFieldName = "keyPhrases"
            //});
            //indexer.OutputFieldMappings.Add(new FieldMapping("/document/languageCode")
            //{
            //    TargetFieldName = "languageCode"
            //});

            try
            {
                await indexerClient.GetIndexerAsync(indexer.Name);

                await indexerClient.DeleteIndexerAsync(indexer.Name);
            }
            catch (RequestFailedException ex) when(ex.Status == 404)
            {
                //if the specified indexer not exist, 404 will be thrown.
            }

            try
            {
                await indexerClient.CreateIndexerAsync(indexer);
            }
            catch (RequestFailedException ex)
            {
                Console.WriteLine("Failed to create the indexer\n Exception message: {0}\n", ex.Message);
                ExitProgram("Cannot continue without creating an indexer");
            }

            return(indexer);
        }
        private static async Task CreateAndRunBlobIndexer(string indexName, SearchServiceClient searchService, Skillset skillSet)
        {
            DataSource blobDataSource = DataSource.AzureBlobStorage(
                name: configuration["BlobStorageAccountName"],
                storageConnectionString: configuration["BlobStorageConnectionString"],
                containerName: "qnateam7container");

            // The blob data source does not need to be deleted if it already exists,
            // but the connection string might need to be updated if it has changed.
            await searchService.DataSources.CreateOrUpdateAsync(blobDataSource);

            Console.WriteLine("Creating Blob Storage indexer...\n");

            IDictionary <string, object> config = new Dictionary <string, object>();

            config.Add(
                key: "dataToExtract",
                value: "contentAndMetadata");
            config.Add(
                key: "imageAction",
                value: "generateNormalizedImages");

            // Add a field mapping to match the Id field in the documents to
            // the HotelId key field in the index
            List <FieldMapping> map = new List <FieldMapping> {
                new FieldMapping("metadata_storage_path", "id", FieldMappingFunction.Base64Encode()),
                new FieldMapping("metadata_storage_path", "url"),
                new FieldMapping("metadata_storage_name", "file_name"),
                new FieldMapping("content", "content"),
                new FieldMapping("metadata_storage_size", "size"),
                new FieldMapping("metadata_storage_last_modified", "last_modified")
            };

            List <FieldMapping> outputMappings = new List <FieldMapping>();

            outputMappings.Add(new FieldMapping(
                                   sourceFieldName: "/document/content/persons/*",
                                   targetFieldName: "persons"));
            //  outputMappings.Add(new FieldMapping(
            //      sourceFieldName: "/document/pages/*/keyPhrases/*",
            //      targetFieldName: "keyPhrases"));
            outputMappings.Add(new FieldMapping(
                                   sourceFieldName: "/document/sentiment",
                                   targetFieldName: "sentiment"));

            outputMappings.Add(new FieldMapping(
                                   sourceFieldName: "/document/merged_text",
                                   targetFieldName: "merged_text"));

            outputMappings.Add(new FieldMapping(
                                   sourceFieldName: "/document/greeting",
                                   targetFieldName: "greeting"));

            outputMappings.Add(new FieldMapping(
                                   sourceFieldName: "/document/top_10_words",
                                   targetFieldName: "top_10_words"));

            Indexer blobIndexer = new Indexer(
                name: "hotelreviews-blob-indexer",
                dataSourceName: blobDataSource.Name,
                targetIndexName: indexName,
                fieldMappings: map,
                outputFieldMappings: outputMappings,
                skillsetName: skillSet.Name,
                parameters: new IndexingParameters(
                    maxFailedItems: -1,
                    maxFailedItemsPerBatch: -1,
                    configuration: config),
                schedule: new IndexingSchedule(TimeSpan.FromDays(1)));

            // Reset the indexer if it already exists
            bool exists = await searchService.Indexers.ExistsAsync(blobIndexer.Name);

            if (exists)
            {
                // await searchService.Indexers.ResetAsync(blobIndexer.Name);
                await searchService.Indexers.DeleteAsync(blobIndexer.Name);
            }
            await searchService.Indexers.CreateOrUpdateAsync(blobIndexer);

            Console.WriteLine("Running Blob Storage indexer...\n");

            try
            {
                await searchService.Indexers.RunAsync(blobIndexer.Name);
            }
            catch (CloudException e) when(e.Response.StatusCode == (HttpStatusCode)429)
            {
                Console.WriteLine("Failed to run indexer: {0}", e.Response.Content);
            }
        }
        private static SearchIndexer CreateDemoIndexer(SearchIndexerClient indexerClient, SearchIndexerDataSourceConnection dataSource, SearchIndexerSkillset skillSet, SearchIndex index)
        {
            IndexingParameters indexingParameters = new IndexingParameters()
            {
                MaxFailedItems         = -1,
                MaxFailedItemsPerBatch = -1,
            };

            indexingParameters.Configuration.Add("dataToExtract", "contentAndMetadata");
            indexingParameters.Configuration.Add("imageAction", "generateNormalizedImages");

            SearchIndexer indexer = new SearchIndexer("demoindexer", dataSource.Name, index.Name)
            {
                Description  = "Demo Indexer",
                SkillsetName = skillSet.Name,
                Parameters   = indexingParameters
            };

            FieldMappingFunction mappingFunction = new FieldMappingFunction("base64Encode");

            mappingFunction.Parameters.Add("useHttpServerUtilityUrlTokenEncode", true);

            indexer.FieldMappings.Add(new FieldMapping("metadata_storage_path")
            {
                TargetFieldName = "id",
                MappingFunction = mappingFunction
            });
            indexer.FieldMappings.Add(new FieldMapping("content")
            {
                TargetFieldName = "content"
            });

            indexer.OutputFieldMappings.Add(new FieldMapping("/document/pages/*/organizations/*")
            {
                TargetFieldName = "organizations"
            });
            indexer.OutputFieldMappings.Add(new FieldMapping("/document/pages/*/keyPhrases/*")
            {
                TargetFieldName = "keyPhrases"
            });
            indexer.OutputFieldMappings.Add(new FieldMapping("/document/languageCode")
            {
                TargetFieldName = "languageCode"
            });

            try
            {
                indexerClient.GetIndexer(indexer.Name);
                indexerClient.DeleteIndexer(indexer.Name);
            }
            catch (RequestFailedException ex) when(ex.Status == 404)
            {
                //if the specified indexer not exist, 404 will be thrown.
            }

            try
            {
                indexerClient.CreateIndexer(indexer);
            }
            catch (RequestFailedException ex)
            {
                Console.WriteLine("Failed to create the indexer\n Exception message: {0}\n", ex.Message);
                ExitProgram("Cannot continue without creating an indexer");
            }

            return(indexer);
        }
 /// <summary>
 /// Initializes a new instance of the FieldMapping class.
 /// </summary>
 /// <param name="sourceFieldName">The name of the field in the data source.</param>
 /// <param name="mappingFunction">A function to apply to each source field value before indexing.</param>
 public FieldMapping(string sourceFieldName, FieldMappingFunction mappingFunction) : this(sourceFieldName, sourceFieldName, mappingFunction)
 {
     // Other constructor does all initialization.
 }
Beispiel #11
0
        static async Task Main(string[] args)
        {
            var services = new ServiceCollection();

            services.AddLogging(l =>
            {
                l.AddConsole();
            });
            var builder = new ConfigurationBuilder()
                          .AddJsonFile("appsettings.json", false, true)       //load base settings
                          .AddJsonFile("appsettings.local.json", true, true); //load local settings
            var configuration = builder.Build();

            services.AddBlobStorage(new BlobSettings(
                                        configuration.GetValue <string>("BlobStorage:ConnectionString"),
                                        configuration.GetValue <string>("BlobStorage:SearchServiceName"),
                                        configuration.GetValue <string>("BlobStorage:ApiKey")));
            IServiceProvider provider = services.BuildServiceProvider();
            var storage = provider.GetService <IIndexedBlobStorage>();
            var logger  = provider.GetService <ILoggerFactory>().CreateLogger("info");

            // 1. create container
            var container = storage.GetOrCreateContainer(configuration.GetValue <string>("BlobStorage:Container"));
            var indexName = $"{container.Name}-index";

            // 2.create blobs with documents
            new DirectoryInfo(configuration.GetValue <string>("AppSettings:DocumentFolder"))
            .GetFiles("*.*", SearchOption.AllDirectories)
            .Where(d => d.Extension == ".docx" || d.Extension == ".doc" || d.Extension == ".pdf")
            .ToList()
            .ForEach(async filename =>
            {
                await container.UploadDocument(filename.FullName, metaData: new List <KeyValuePair <string, string> >
                {
                    new KeyValuePair <string, string>("subject", "math")
                });
            });
            //3 add index
            var index = await container.GetOrCreateIndex(new Index(indexName, new List <Field>())
                                                         .AddDefaultWordFields("nl")
                                                         .AddField(new Field("subject", AnalyzerName.NlLucene)
            {
                Type         = DataType.String,
                IsFilterable = true
            })
                                                         );

            //4 add datasource + indexer
            var datasource = await container
                             .CreateDatasourceIfNotExists($"{container.Name}-datasource");

            await datasource.CreateIndexerIfNotExists($"{container.Name}-indexer", $"{container.Name}-datasource", index.Name, new List <FieldMapping>
            {
                new FieldMapping("metadata_storage_path", FieldMappingFunction.Base64Encode())     //key cannot be an url therefore Encode it.
            }.ToArray());

            //5. Search for words within a subject.
            var searchResult = container.Search(indexName, "Something to search for");

            searchResult.ForEach(s => logger.LogInformation($"result found: {s}"));
        }