public void FullTextIncrementalIndexingLucene1() { //Lucene Index Directory dir = new RAMDirectory(); LuceneSubjectsIndexer indexer = new LuceneSubjectsIndexer(dir, new StandardAnalyzer(LuceneTestHarness.LuceneVersion), new DefaultIndexSchema()); //Test Graph Graph g = new Graph(); g.LoadFromEmbeddedResource("VDS.RDF.Configuration.configuration.ttl"); //Try indexing in 100 Triple chunks Random rnd = new Random(); String searchTerm = "http"; for (int i = 0; i < g.Triples.Count; i += 100) { //Index the Triples List <Triple> ts = g.Triples.Skip(i).Take(100).ToList(); foreach (Triple t in ts) { indexer.Index(t); } indexer.Flush(); //Now do a search to check some of those triples got indexed //Pick the first multi-word string literal we can find from the batch and grab one word from it INode targetNode = ts.Where(t => t.Object.NodeType == NodeType.Literal && t.Object.ToString().Contains("http")).Select(t => t.Subject).FirstOrDefault(); if (targetNode == null) { continue; } Console.WriteLine("Picked " + targetNode.ToString(this._formatter) + " as search target with search term '" + searchTerm + "'"); LuceneSearchProvider searcher = new LuceneSearchProvider(LuceneTestHarness.LuceneVersion, dir); IEnumerable <IFullTextSearchResult> results = searcher.Match(searchTerm); foreach (IFullTextSearchResult r in results) { Console.WriteLine("Got result " + r.Node.ToString(this._formatter) + " with score " + r.Score); } Assert.IsTrue(results.Any(r => r.Node.Equals(targetNode)), "Did not find expected node in search results"); searcher.Dispose(); Console.WriteLine(); } indexer.Dispose(); }
public void FullTextDatasetLucene1() { //Lucene Index Directory dir = new RAMDirectory(); LuceneSubjectsIndexer indexer = new LuceneSubjectsIndexer(dir, new StandardAnalyzer(LuceneTestHarness.LuceneVersion), new DefaultIndexSchema()); //Test Dataset InMemoryDataset memData = new InMemoryDataset(); FullTextIndexedDataset dataset = new FullTextIndexedDataset(memData, indexer, false); //Test Graph Graph g = new Graph(); g.LoadFromEmbeddedResource("VDS.RDF.Configuration.configuration.ttl"); dataset.AddGraph(g); Assert.True(dataset.HasGraph(g.BaseUri), "Graph should exist in dataset"); //Now do a search to check all the triples got indexed String searchTerm = "http"; IEnumerable <Triple> searchTriples = g.Triples.Where(t => t.Object.NodeType == NodeType.Literal && t.Object.ToString().Contains("http")); LuceneSearchProvider searcher = new LuceneSearchProvider(LuceneTestHarness.LuceneVersion, dir); foreach (Triple searchTriple in searchTriples) { INode targetNode = searchTriple.Subject; IEnumerable <IFullTextSearchResult> results = searcher.Match(searchTerm); Assert.True(results.Any(r => r.Node.Equals(targetNode)), "Did not find expected node " + targetNode.ToString(this._formatter) + " in search results using search term '" + searchTerm + "' (found " + results.Count() + " results)"); Console.WriteLine(); } //Now remove the Graph dataset.RemoveGraph(g.BaseUri); //Repeat the search to check all the triples got unindexed foreach (Triple searchTriple in searchTriples) { INode targetNode = searchTriple.Subject; IEnumerable <IFullTextSearchResult> results = searcher.Match(searchTerm); Assert.False(results.Any(r => r.Node.Equals(targetNode)), "Found unexpected node " + targetNode.ToString(this._formatter) + " in search results using search term '" + searchTerm + "' (found " + results.Count() + " results)"); Console.WriteLine(); } searcher.Dispose(); indexer.Dispose(); }
public void FullTextIndexCreationLuceneSubjects() { IFullTextIndexer indexer = null; try { indexer = new LuceneSubjectsIndexer(LuceneTestHarness.Index, LuceneTestHarness.Analyzer, LuceneTestHarness.Schema); indexer.Index(this.GetTestData()); } finally { if (indexer != null) { indexer.Dispose(); } } }
public FullTextGraphScopingTests() { String data = @"<http://x> <http://p> ""This is sample text"" <http://g1> . <http://y> <http://p> ""This is sample text"" <http://g2> . <http://y> <http://p> ""Additional sample"" <http://g2> ."; this._store = new TripleStore(); StringParser.ParseDataset(this._store, data, new NQuadsParser()); this._index = new RAMDirectory(); using (LuceneSubjectsIndexer indexer = new LuceneSubjectsIndexer(this._index, new StandardAnalyzer(LucUtil.Version.LUCENE_30), new DefaultIndexSchema())) { foreach (IGraph g in this._store.Graphs) { indexer.Index(g); } } }
public void FullTextConfigSerializeIndexerLuceneSubjects() { LuceneSubjectsIndexer indexer = new LuceneSubjectsIndexer(LuceneTestHarness.Index, LuceneTestHarness.Analyzer, LuceneTestHarness.Schema); ConfigurationSerializationContext context = new ConfigurationSerializationContext(); INode obj = context.Graph.CreateBlankNode(); context.NextSubject = obj; indexer.SerializeConfiguration(context); indexer.Dispose(); TestTools.ShowGraph(context.Graph); ConfigurationLoader.AutoConfigureObjectFactories(context.Graph); Object temp = ConfigurationLoader.LoadObject(context.Graph, obj); Assert.IsTrue(temp is LuceneSubjectsIndexer, "Should have returned a LuceneSubjectsIndexer instance"); Assert.IsTrue(temp is IFullTextIndexer, "Should have returned a IFullTextIndexer instance"); }
public void FullTextIndexSearchLuceneSubjectsWithLimitAndThreshold() { IFullTextIndexer indexer = null; IFullTextSearchProvider provider = null; try { indexer = new LuceneSubjectsIndexer(LuceneTestHarness.Index, LuceneTestHarness.Analyzer, LuceneTestHarness.Schema); indexer.Index(this.GetTestData()); } finally { if (indexer != null) { indexer.Dispose(); } } try { provider = new LuceneSearchProvider(LuceneTestHarness.LuceneVersion, LuceneTestHarness.Index); NTriplesFormatter formatter = new NTriplesFormatter(); int i = 0; foreach (IFullTextSearchResult result in provider.Match("http", 1.0d, 5)) { i++; Console.WriteLine(result.Node.ToString(formatter) + " - Scores " + result.Score); Assert.IsTrue(result.Score >= 1.0d, "Score should be higher than desired threshold"); } Assert.IsTrue(i <= 5, "Should be a max of 5 results"); } finally { if (provider != null) { provider.Dispose(); } } }
public void FullTextIndexSearchLuceneSubjects() { IFullTextIndexer indexer = null; IFullTextSearchProvider provider = null; try { indexer = new LuceneSubjectsIndexer(LuceneTestHarness.Index, LuceneTestHarness.Analyzer, LuceneTestHarness.Schema); indexer.Index(this.GetTestData()); } finally { if (indexer != null) { indexer.Dispose(); } } try { provider = new LuceneSearchProvider(LuceneTestHarness.LuceneVersion, LuceneTestHarness.Index); NTriplesFormatter formatter = new NTriplesFormatter(); foreach (IFullTextSearchResult result in provider.Match("http")) { Console.WriteLine(result.Node.ToString(formatter) + " - Scores " + result.Score); } } finally { if (provider != null) { provider.Dispose(); } } }
/// <summary> /// Tries to load an object based on information from the Configuration Graph /// </summary> /// <param name="g">Configuration Graph</param> /// <param name="objNode">Object Node</param> /// <param name="targetType">Target Type</param> /// <param name="obj">Output Object</param> /// <returns></returns> public bool TryLoadObject(IGraph g, INode objNode, Type targetType, out object obj) { obj = null; INode index = g.CreateUriNode(new Uri(FullTextHelper.FullTextConfigurationNamespace + "index")); //INode indexer = g.CreateUriNode(new Uri(FullTextHelper.FullTextConfigurationNamespace + "indexer")); INode searcher = g.CreateUriNode(new Uri(FullTextHelper.FullTextConfigurationNamespace + "searcher")); INode analyzer = g.CreateUriNode(new Uri(FullTextHelper.FullTextConfigurationNamespace + "analyzer")); INode schema = g.CreateUriNode(new Uri(FullTextHelper.FullTextConfigurationNamespace + "schema")); INode version = g.CreateUriNode(new Uri(FullTextHelper.FullTextConfigurationNamespace + "version")); Object tempIndex, tempAnalyzer, tempSchema; int ver = 2900; //Always check for the version ver = ConfigurationLoader.GetConfigurationInt32(g, objNode, version, 2900); switch (targetType.FullName) { case DefaultIndexSchema: obj = new DefaultIndexSchema(); break; case FullTextOptimiser: //Need to get the Search Provider INode providerNode = ConfigurationLoader.GetConfigurationNode(g, objNode, searcher); if (providerNode == null) { throw new DotNetRdfConfigurationException("Unable to load the Full Text Optimiser specified by the Node '" + objNode.ToString() + "' as there was no value specified for the required dnr-ft:searcher property"); } Object tempSearcher = ConfigurationLoader.LoadObject(g, providerNode); if (tempSearcher is IFullTextSearchProvider) { obj = new FullTextOptimiser((IFullTextSearchProvider)tempSearcher); } else { throw new DotNetRdfConfigurationException("Unable to load the Full Text Optimiser specified by the Node '" + objNode.ToString() + "' as the value specified for the dnr-ft:searcher property pointed to an object which could not be loaded as a type that implements the required IFullTextSearchProvider interface"); } break; case LuceneObjectsIndexer: case LucenePredicatesIndexer: case LuceneSubjectsIndexer: case LuceneSearchProvider: //For any Lucene Indexer/Search Provider need to know the Index, Analyzer and Schema to be used //Then get the Index tempIndex = ConfigurationLoader.GetConfigurationNode(g, objNode, index); if (tempIndex == null) { throw new DotNetRdfConfigurationException("Unable to load the Lucene Indexer specified by the Node '" + objNode.ToString() + "' as there was no value specified for the required dnr-ft:index property"); } tempIndex = ConfigurationLoader.LoadObject(g, (INode)tempIndex); if (tempIndex is Directory) { //Next get the Analyzer (assume Standard if none specified) tempAnalyzer = ConfigurationLoader.GetConfigurationNode(g, objNode, analyzer); if (tempAnalyzer == null) { tempAnalyzer = new StandardAnalyzer(this.GetLuceneVersion(ver)); } else { tempAnalyzer = ConfigurationLoader.LoadObject(g, (INode)tempAnalyzer); } if (tempAnalyzer is Analyzer) { //Finally get the Schema (assume Default if none specified) tempSchema = ConfigurationLoader.GetConfigurationNode(g, objNode, schema); if (tempSchema == null) { tempSchema = new DefaultIndexSchema(); } else { tempSchema = ConfigurationLoader.LoadObject(g, (INode)tempSchema); } if (tempSchema is IFullTextIndexSchema) { //Now we can create the Object switch (targetType.FullName) { case LuceneObjectsIndexer: obj = new LuceneObjectsIndexer((Directory)tempIndex, (Analyzer)tempAnalyzer, (IFullTextIndexSchema)tempSchema); break; case LucenePredicatesIndexer: obj = new LucenePredicatesIndexer((Directory)tempIndex, (Analyzer)tempAnalyzer, (IFullTextIndexSchema)tempSchema); break; case LuceneSubjectsIndexer: obj = new LuceneSubjectsIndexer((Directory)tempIndex, (Analyzer)tempAnalyzer, (IFullTextIndexSchema)tempSchema); break; case LuceneSearchProvider: //Before the Search Provider has been loaded determine whether we need to carry out auto-indexing List <INode> sources = ConfigurationLoader.GetConfigurationData(g, objNode, g.CreateUriNode(new Uri(FullTextHelper.FullTextConfigurationNamespace + "buildIndexFor"))).ToList(); if (sources.Count > 0) { //If there are sources to index ensure we have an indexer to index with INode indexerNode = ConfigurationLoader.GetConfigurationNode(g, objNode, g.CreateUriNode(new Uri(FullTextHelper.FullTextConfigurationNamespace + "buildIndexWith"))); if (indexerNode == null) { throw new DotNetRdfConfigurationException("Unable to load the Lucene Search Provider specified by the Node '" + objNode.ToString() + "' as there were values specified for the dnr-ft:buildIndexFor property but no dnr-ft:buildIndexWith property was found"); } IFullTextIndexer indexer = ConfigurationLoader.LoadObject(g, indexerNode) as IFullTextIndexer; if (indexer == null) { throw new DotNetRdfConfigurationException("Unable to load the Lucene Search Provider specified by the Node '" + objNode.ToString() + "' as the value given for the dnr-ft:buildIndexWith property pointed to an Object which could not be loaded as a type that implements the required IFullTextIndexer interface"); } try { //For Each Source load it and Index it foreach (INode sourceNode in sources) { Object source = ConfigurationLoader.LoadObject(g, sourceNode); if (source is ISparqlDataset) { indexer.Index((ISparqlDataset)source); } else if (source is ITripleStore) { foreach (IGraph graph in ((ITripleStore)source).Graphs) { indexer.Index(graph); } } else if (source is IGraph) { indexer.Index((IGraph)source); } else { throw new DotNetRdfConfigurationException("Unable to load the Lucene Search Provider specified by the Node '" + objNode.ToString() + "' as a value given for the dnr-ft:buildIndexFor property ('" + sourceNode.ToString() + "') pointed to an Object which could not be loaded as a type that implements one of the required interfaces: IGraph, ITripleStore or ISparqlDataset"); } } } finally { indexer.Dispose(); } } //Then we actually load the Search Provider obj = new LuceneSearchProvider(this.GetLuceneVersion(ver), (Directory)tempIndex, (Analyzer)tempAnalyzer, (IFullTextIndexSchema)tempSchema); break; } } else { throw new DotNetRdfConfigurationException("Unable to load the Lucene Indexer specified by the Node '" + objNode.ToString() + "' as the value given for the dnr-ft:schema property pointed to an Object which could not be loaded as a type that implements the required IFullTextIndexSchema interface"); } } else { throw new DotNetRdfConfigurationException("Unable to load the Lucene Indexer specified by the Node '" + objNode.ToString() + "' as the value given for the dnr-ft:analyzer property pointed to an Object which could not be loaded as a type that derives from the required Lucene.Net.Analysis.Analyzer type"); } } else { throw new DotNetRdfConfigurationException("Unable to load the Lucene Indexer specified by the Node '" + objNode.ToString() + "' as the value given for the dnr-ft:index property pointed to an Object which could not be loaded as a type that derives from the required Lucene.Net.Store.Directory type"); } break; default: try { if (this._luceneAnalyzerType.IsAssignableFrom(targetType)) { if (targetType.GetConstructor(new Type[] { typeof(LucVersion) }) != null) { obj = Activator.CreateInstance(targetType, new Object[] { this.GetLuceneVersion(ver) }); } else { obj = Activator.CreateInstance(targetType); } } else if (this._luceneDirectoryType.IsAssignableFrom(targetType)) { String dir = ConfigurationLoader.GetConfigurationString(g, objNode, ConfigurationLoader.CreateConfigurationNode(g, ConfigurationLoader.PropertyFromFile)); if (dir != null) { try { obj = Activator.CreateInstance(targetType, new Object[] { dir }); } catch { MethodInfo method = targetType.GetMethod("Open", new Type[] { typeof(DirInfo) }); if (method != null) { obj = method.Invoke(null, new Object[] { new DirInfo(ConfigurationLoader.ResolvePath(dir)) }); } } } else { obj = Activator.CreateInstance(targetType); } //Ensure the Index if necessary if (obj != null) { if (ConfigurationLoader.GetConfigurationBoolean(g, objNode, g.CreateUriNode(new Uri(FullTextHelper.FullTextConfigurationNamespace + "ensureIndex")), false)) { IndexWriter writer = new IndexWriter((Directory)obj, new StandardAnalyzer(this.GetLuceneVersion(ver))); writer.Close(); } } } } catch { //Since we know we don't allow loading of all analyzers and directories we allow for users to inject other object factories //which may know how to load those specific instances obj = null; } break; } return(obj != null); }
public void FullTextIndexDestructionLuceneSubjects() { IFullTextIndexer indexer = null; try { LuceneSearchProvider provider = null; int origCount; try { provider = new LuceneSearchProvider(LuceneTestHarness.LuceneVersion, LuceneTestHarness.Index); origCount = provider.Match("http").Count(); provider.Dispose(); } catch { origCount = 0; } finally { if (provider != null) { provider.Dispose(); } } Console.WriteLine("Prior to indexing search returns " + origCount + " result(s)"); indexer = new LuceneSubjectsIndexer(LuceneTestHarness.Index, LuceneTestHarness.Analyzer, LuceneTestHarness.Schema); IGraph g = this.GetTestData(); indexer.Index(g); indexer.Dispose(); indexer = null; int currCount; try { provider = new LuceneSearchProvider(LuceneTestHarness.LuceneVersion, LuceneTestHarness.Index); currCount = provider.Match("http").Count(); provider.Dispose(); } catch { currCount = 0; } finally { if (provider != null) { provider.Dispose(); } } Console.WriteLine("After indexing search returns " + currCount + " result(s)"); indexer = new LuceneSubjectsIndexer(LuceneTestHarness.Index, LuceneTestHarness.Analyzer, LuceneTestHarness.Schema); indexer.Unindex(g); indexer.Dispose(); indexer = null; try { provider = new LuceneSearchProvider(LuceneTestHarness.LuceneVersion, LuceneTestHarness.Index); currCount = provider.Match("http").Count(); Console.WriteLine("After unindexing search returns " + currCount + " result(s)"); } finally { if (provider != null) { provider.Dispose(); } } Assert.Equal(origCount, currCount); } finally { if (indexer != null) { indexer.Dispose(); } LuceneTestHarness.Index.Dispose(); } }