Beispiel #1
0
        private static void ExecuteExtractiontest(IEnumerable <TextIndexingRequest> content,
                                                  string keywordTest,
                                                  ITextCache cache = null)
        {
            var host = new ExtractorHost();

            host.RegisterScopedExtractor <PassthroughExtractor>();

            host.RegisterScopedExtractor <DefaultTdfExtractor>();
            // Testing instance approaching
            host.RegisterScopedExtractor(new DefaultHtmlExtractor());
            // Testing factory approach
            host.RegisterScopedExtractor(p => new DefaultXmlExtractor(p));
            host.Initialize();
            cache = cache ?? new DefaultIntermediateCacheProvider();
            cache.Clear();
            var agent = new IndexerAgent(host, cache, (ISearchProvider)cache);


            var agentWorker = agent.IndexDocuments(CancellationToken.None, content);

            agentWorker.GetAwaiter().GetResult();

            var resultsWorker = agent.Search(CancellationToken.None,
                                             new TextSearch(CultureInfo.InvariantCulture,
                                                            "REGEX",
                                                            keywordTest));

            var results = resultsWorker.GetAwaiter().GetResult().ToArray();

            Console.WriteLine(results.Length);
            Assert.IsTrue(results.Length > 0);
        }
        public void ExtractorHostTestTextPlain()
        {
            var host = new ExtractorHost();

            host.RegisterScopedExtractor <PassthroughExtractor>();
            host.RegisterScopedExtractor <DefaultHtmlExtractor>();
            host.Initialize();
            using (host.BeginServiceScope(out var extractor)) {
                var detailsForHtml = new IndexingRequestDetails(CultureInfo.InvariantCulture,
                                                                Encoding.Unicode,
                                                                "text/plain",
                                                                string.Empty,
                                                                string.Empty);
                var finalHtml = extractor.ExtractText(
                    detailsForHtml,
                    GetTestObjectStream()
                    );
                Assert.IsTrue(finalHtml.ExtractionPointDetails.Any(c => c.Segment.Contains("1")));
                Assert.IsTrue(finalHtml.ExtractionPointDetails.Any(c => c.Segment.Contains("2")));
                Assert.IsTrue(finalHtml.ExtractionPointDetails.Any(c => c.Segment.Contains("3")));
                Assert.IsTrue(finalHtml.ExtractionPointDetails.Any(c => c.Segment.Contains("_")));
                Assert.IsTrue(finalHtml.ExtractionPointDetails.Any(c => c.Segment.Contains("4")));
                Assert.IsTrue(finalHtml.ExtractionPointDetails.Any(c => c.Segment.Contains("html")));
            }
        }
 public IndexerAgent(
     ExtractorHost extractorHost,
     ITextCache cache,
     ISearchProvider searchProvider)
 {
     _extractorHost  = extractorHost;
     _cache          = cache;
     _searchProvider = searchProvider;
 }