public void FullTextIncrementalIndexingLucene1() { //Lucene Index Directory dir = new RAMDirectory(); LuceneSubjectsIndexer indexer = new LuceneSubjectsIndexer(dir, new StandardAnalyzer(LuceneTestHarness.LuceneVersion), new DefaultIndexSchema()); //Test Graph Graph g = new Graph(); g.LoadFromEmbeddedResource("VDS.RDF.Configuration.configuration.ttl"); //Try indexing in 100 Triple chunks Random rnd = new Random(); String searchTerm = "http"; for (int i = 0; i < g.Triples.Count; i += 100) { //Index the Triples List <Triple> ts = g.Triples.Skip(i).Take(100).ToList(); foreach (Triple t in ts) { indexer.Index(t); } indexer.Flush(); //Now do a search to check some of those triples got indexed //Pick the first multi-word string literal we can find from the batch and grab one word from it INode targetNode = ts.Where(t => t.Object.NodeType == NodeType.Literal && t.Object.ToString().Contains("http")).Select(t => t.Subject).FirstOrDefault(); if (targetNode == null) { continue; } Console.WriteLine("Picked " + targetNode.ToString(this._formatter) + " as search target with search term '" + searchTerm + "'"); LuceneSearchProvider searcher = new LuceneSearchProvider(LuceneTestHarness.LuceneVersion, dir); IEnumerable <IFullTextSearchResult> results = searcher.Match(searchTerm); foreach (IFullTextSearchResult r in results) { Console.WriteLine("Got result " + r.Node.ToString(this._formatter) + " with score " + r.Score); } Assert.IsTrue(results.Any(r => r.Node.Equals(targetNode)), "Did not find expected node in search results"); searcher.Dispose(); Console.WriteLine(); } indexer.Dispose(); }
public void FullTextDatasetLucene1() { //Lucene Index Directory dir = new RAMDirectory(); LuceneSubjectsIndexer indexer = new LuceneSubjectsIndexer(dir, new StandardAnalyzer(LuceneTestHarness.LuceneVersion), new DefaultIndexSchema()); //Test Dataset InMemoryDataset memData = new InMemoryDataset(); FullTextIndexedDataset dataset = new FullTextIndexedDataset(memData, indexer, false); //Test Graph Graph g = new Graph(); g.LoadFromEmbeddedResource("VDS.RDF.Configuration.configuration.ttl"); dataset.AddGraph(g); Assert.True(dataset.HasGraph(g.BaseUri), "Graph should exist in dataset"); //Now do a search to check all the triples got indexed String searchTerm = "http"; IEnumerable <Triple> searchTriples = g.Triples.Where(t => t.Object.NodeType == NodeType.Literal && t.Object.ToString().Contains("http")); LuceneSearchProvider searcher = new LuceneSearchProvider(LuceneTestHarness.LuceneVersion, dir); foreach (Triple searchTriple in searchTriples) { INode targetNode = searchTriple.Subject; IEnumerable <IFullTextSearchResult> results = searcher.Match(searchTerm); Assert.True(results.Any(r => r.Node.Equals(targetNode)), "Did not find expected node " + targetNode.ToString(this._formatter) + " in search results using search term '" + searchTerm + "' (found " + results.Count() + " results)"); Console.WriteLine(); } //Now remove the Graph dataset.RemoveGraph(g.BaseUri); //Repeat the search to check all the triples got unindexed foreach (Triple searchTriple in searchTriples) { INode targetNode = searchTriple.Subject; IEnumerable <IFullTextSearchResult> results = searcher.Match(searchTerm); Assert.False(results.Any(r => r.Node.Equals(targetNode)), "Found unexpected node " + targetNode.ToString(this._formatter) + " in search results using search term '" + searchTerm + "' (found " + results.Count() + " results)"); Console.WriteLine(); } searcher.Dispose(); indexer.Dispose(); }
public void FullTextIndexCreationLuceneSubjects() { IFullTextIndexer indexer = null; try { indexer = new LuceneSubjectsIndexer(LuceneTestHarness.Index, LuceneTestHarness.Analyzer, LuceneTestHarness.Schema); indexer.Index(this.GetTestData()); } finally { if (indexer != null) { indexer.Dispose(); } } }
public void FullTextConfigSerializeIndexerLuceneSubjects() { LuceneSubjectsIndexer indexer = new LuceneSubjectsIndexer(LuceneTestHarness.Index, LuceneTestHarness.Analyzer, LuceneTestHarness.Schema); ConfigurationSerializationContext context = new ConfigurationSerializationContext(); INode obj = context.Graph.CreateBlankNode(); context.NextSubject = obj; indexer.SerializeConfiguration(context); indexer.Dispose(); TestTools.ShowGraph(context.Graph); ConfigurationLoader.AutoConfigureObjectFactories(context.Graph); Object temp = ConfigurationLoader.LoadObject(context.Graph, obj); Assert.IsTrue(temp is LuceneSubjectsIndexer, "Should have returned a LuceneSubjectsIndexer instance"); Assert.IsTrue(temp is IFullTextIndexer, "Should have returned a IFullTextIndexer instance"); }
public void FullTextIndexSearchLuceneSubjectsWithLimitAndThreshold() { IFullTextIndexer indexer = null; IFullTextSearchProvider provider = null; try { indexer = new LuceneSubjectsIndexer(LuceneTestHarness.Index, LuceneTestHarness.Analyzer, LuceneTestHarness.Schema); indexer.Index(this.GetTestData()); } finally { if (indexer != null) { indexer.Dispose(); } } try { provider = new LuceneSearchProvider(LuceneTestHarness.LuceneVersion, LuceneTestHarness.Index); NTriplesFormatter formatter = new NTriplesFormatter(); int i = 0; foreach (IFullTextSearchResult result in provider.Match("http", 1.0d, 5)) { i++; Console.WriteLine(result.Node.ToString(formatter) + " - Scores " + result.Score); Assert.IsTrue(result.Score >= 1.0d, "Score should be higher than desired threshold"); } Assert.IsTrue(i <= 5, "Should be a max of 5 results"); } finally { if (provider != null) { provider.Dispose(); } } }
public void FullTextIndexSearchLuceneSubjects() { IFullTextIndexer indexer = null; IFullTextSearchProvider provider = null; try { indexer = new LuceneSubjectsIndexer(LuceneTestHarness.Index, LuceneTestHarness.Analyzer, LuceneTestHarness.Schema); indexer.Index(this.GetTestData()); } finally { if (indexer != null) { indexer.Dispose(); } } try { provider = new LuceneSearchProvider(LuceneTestHarness.LuceneVersion, LuceneTestHarness.Index); NTriplesFormatter formatter = new NTriplesFormatter(); foreach (IFullTextSearchResult result in provider.Match("http")) { Console.WriteLine(result.Node.ToString(formatter) + " - Scores " + result.Score); } } finally { if (provider != null) { provider.Dispose(); } } }
public void FullTextIndexDestructionLuceneSubjects() { IFullTextIndexer indexer = null; try { LuceneSearchProvider provider = null; int origCount; try { provider = new LuceneSearchProvider(LuceneTestHarness.LuceneVersion, LuceneTestHarness.Index); origCount = provider.Match("http").Count(); provider.Dispose(); } catch { origCount = 0; } finally { if (provider != null) { provider.Dispose(); } } Console.WriteLine("Prior to indexing search returns " + origCount + " result(s)"); indexer = new LuceneSubjectsIndexer(LuceneTestHarness.Index, LuceneTestHarness.Analyzer, LuceneTestHarness.Schema); IGraph g = this.GetTestData(); indexer.Index(g); indexer.Dispose(); indexer = null; int currCount; try { provider = new LuceneSearchProvider(LuceneTestHarness.LuceneVersion, LuceneTestHarness.Index); currCount = provider.Match("http").Count(); provider.Dispose(); } catch { currCount = 0; } finally { if (provider != null) { provider.Dispose(); } } Console.WriteLine("After indexing search returns " + currCount + " result(s)"); indexer = new LuceneSubjectsIndexer(LuceneTestHarness.Index, LuceneTestHarness.Analyzer, LuceneTestHarness.Schema); indexer.Unindex(g); indexer.Dispose(); indexer = null; try { provider = new LuceneSearchProvider(LuceneTestHarness.LuceneVersion, LuceneTestHarness.Index); currCount = provider.Match("http").Count(); Console.WriteLine("After unindexing search returns " + currCount + " result(s)"); } finally { if (provider != null) { provider.Dispose(); } } Assert.Equal(origCount, currCount); } finally { if (indexer != null) { indexer.Dispose(); } LuceneTestHarness.Index.Dispose(); } }