private static void ExecuteExtractiontest(IEnumerable <TextIndexingRequest> content, string keywordTest, ITextCache cache = null) { var host = new ExtractorHost(); host.RegisterScopedExtractor <PassthroughExtractor>(); host.RegisterScopedExtractor <DefaultTdfExtractor>(); // Testing instance approaching host.RegisterScopedExtractor(new DefaultHtmlExtractor()); // Testing factory approach host.RegisterScopedExtractor(p => new DefaultXmlExtractor(p)); host.Initialize(); cache = cache ?? new DefaultIntermediateCacheProvider(); cache.Clear(); var agent = new IndexerAgent(host, cache, (ISearchProvider)cache); var agentWorker = agent.IndexDocuments(CancellationToken.None, content); agentWorker.GetAwaiter().GetResult(); var resultsWorker = agent.Search(CancellationToken.None, new TextSearch(CultureInfo.InvariantCulture, "REGEX", keywordTest)); var results = resultsWorker.GetAwaiter().GetResult().ToArray(); Console.WriteLine(results.Length); Assert.IsTrue(results.Length > 0); }
public void ExtractorHostTestTextPlain() { var host = new ExtractorHost(); host.RegisterScopedExtractor <PassthroughExtractor>(); host.RegisterScopedExtractor <DefaultHtmlExtractor>(); host.Initialize(); using (host.BeginServiceScope(out var extractor)) { var detailsForHtml = new IndexingRequestDetails(CultureInfo.InvariantCulture, Encoding.Unicode, "text/plain", string.Empty, string.Empty); var finalHtml = extractor.ExtractText( detailsForHtml, GetTestObjectStream() ); Assert.IsTrue(finalHtml.ExtractionPointDetails.Any(c => c.Segment.Contains("1"))); Assert.IsTrue(finalHtml.ExtractionPointDetails.Any(c => c.Segment.Contains("2"))); Assert.IsTrue(finalHtml.ExtractionPointDetails.Any(c => c.Segment.Contains("3"))); Assert.IsTrue(finalHtml.ExtractionPointDetails.Any(c => c.Segment.Contains("_"))); Assert.IsTrue(finalHtml.ExtractionPointDetails.Any(c => c.Segment.Contains("4"))); Assert.IsTrue(finalHtml.ExtractionPointDetails.Any(c => c.Segment.Contains("html"))); } }
public IndexerAgent( ExtractorHost extractorHost, ITextCache cache, ISearchProvider searchProvider) { _extractorHost = extractorHost; _cache = cache; _searchProvider = searchProvider; }