private void DisableFullTextIndex() { if (this._dataset is WebDemandDataset) { WebDemandDataset ds = (WebDemandDataset)this._dataset; if (ds.UnderlyingDataset is FullTextIndexedDataset) { this._dataset = ds.UnderlyingDataset; this.DisableFullTextIndex(); this._dataset = new WebDemandDataset(this._dataset); } } else if (this._dataset is FullTextIndexedDataset) { SparqlOptimiser.RemoveOptimiser(this._ftOptimiser); this._ftOptimiser = null; this._ftSearcher.Dispose(); this._ftSearcher = null; this._dataset = ((FullTextIndexedDataset)this._dataset).UnderlyingDataset; this._ftIndexer.Dispose(); this._ftIndexer = null; this._ftIndex.Dispose(); this._ftIndex = null; } this._processor = new LeviathanQueryProcessor(this._dataset); }
/// <summary> /// Creates a Full Text Optimiser. /// </summary> /// <param name="provider">Full Text Search Provider.</param> public FullTextOptimiser(IFullTextSearchProvider provider) { if (provider == null) { throw new ArgumentNullException("Full Text Search Provider cannot be null"); } this._provider = provider; }
/// <summary> /// Creates a new Full Text Operator /// </summary> /// <param name="provider">Search Provider</param> /// <param name="algebra">Inner Algebra</param> /// <param name="matchVar">Match Variable</param> /// <param name="scoreVar">Score Variable</param> /// <param name="searchTerm">Search Term</param> /// <param name="limit">Limit</param> /// <param name="scoreThreshold">Score Threshold</param> public BaseFullTextOperator(IFullTextSearchProvider provider, ISparqlAlgebra algebra, PatternItem matchVar, PatternItem scoreVar, PatternItem searchTerm, int limit, double scoreThreshold) { this._provider = provider; this.InnerAlgebra = algebra; this._matchVar = matchVar; this._scoreVar = scoreVar; this._searchTerm = searchTerm; this._limit = limit; this._scoreThreshold = scoreThreshold; }
public void FullTextIndexSearchLucenePredicatesWithLimitAndThreshold() { IFullTextIndexer indexer = null; IFullTextSearchProvider provider = null; try { indexer = new LucenePredicatesIndexer(LuceneTestHarness.Index, LuceneTestHarness.Analyzer, LuceneTestHarness.Schema); indexer.Index(this.GetTestData()); } finally { if (indexer != null) { indexer.Dispose(); } } try { provider = new LuceneSearchProvider(LuceneTestHarness.LuceneVersion, LuceneTestHarness.Index); NTriplesFormatter formatter = new NTriplesFormatter(); int i = 0; foreach (IFullTextSearchResult result in provider.Match("http", 1.0d, 5)) { i++; Console.WriteLine(result.Node.ToString(formatter) + " - Scores " + result.Score); Assert.IsTrue(result.Score >= 1.0d, "Score should be higher than desired threshold"); } Assert.IsTrue(i <= 5, "Should be a max of 5 results"); } finally { if (provider != null) { provider.Dispose(); } } }
private void EnableFullTextIndex() { if (this._dataset is FullTextIndexedDataset) { //Nothing to do } else if (this._dataset is WebDemandDataset) { WebDemandDataset ds = (WebDemandDataset)this._dataset; this._dataset = ds.UnderlyingDataset; this.EnableFullTextIndex(); this._dataset = new WebDemandDataset(this._dataset); } else { //Create and ensure index ready for use this._ftIndex = new RAMDirectory(); var writer = new IndexWriter(this._ftIndex, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30), IndexWriter.MaxFieldLength.UNLIMITED); writer.Dispose(); //Create Indexer and wrap dataset this._ftIndexer = new LuceneObjectsIndexer(this._ftIndex, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30), new DefaultIndexSchema()); if (this._dataset is WebDemandDataset) { //Web Demand needs to go around Full Text as we want to index on demand loaded content this._dataset = new WebDemandDataset(new FullTextIndexedDataset(((WebDemandDataset)this._dataset).UnderlyingDataset, this._ftIndexer, true)); } else { this._dataset = new FullTextIndexedDataset(this._dataset, this._ftIndexer, true); } //Create and Register Optimizer this._ftSearcher = new LuceneSearchProvider(Lucene.Net.Util.Version.LUCENE_29, this._ftIndex); this._ftOptimiser = new FullTextOptimiser(this._ftSearcher); SparqlOptimiser.AddOptimiser(this._ftOptimiser); } this._processor = new LeviathanQueryProcessor(this._dataset); }
public void FullTextIndexSearchLuceneSubjectsWithThreshold() { IFullTextIndexer indexer = null; IFullTextSearchProvider provider = null; try { indexer = new LuceneSubjectsIndexer(LuceneTestHarness.Index, LuceneTestHarness.Analyzer, LuceneTestHarness.Schema); indexer.Index(this.GetTestData()); } finally { if (indexer != null) { indexer.Dispose(); } } try { provider = new LuceneSearchProvider(LuceneTestHarness.LuceneVersion, LuceneTestHarness.Index); NTriplesFormatter formatter = new NTriplesFormatter(); foreach (IFullTextSearchResult result in provider.Match("http", 0.75d)) { Console.WriteLine(result.Node.ToString(formatter) + " - Scores " + result.Score); Assert.IsTrue(result.Score >= 0.75d, "Score should be higher than desired threshold"); } } finally { if (provider != null) { provider.Dispose(); } } }
public void FullTextIndexSearchLuceneObjects() { IFullTextIndexer indexer = null; IFullTextSearchProvider provider = null; try { indexer = new LuceneObjectsIndexer(LuceneTestHarness.Index, LuceneTestHarness.Analyzer, LuceneTestHarness.Schema); indexer.Index(this.GetTestData()); } finally { if (indexer != null) { indexer.Dispose(); } } try { provider = new LuceneSearchProvider(LuceneTestHarness.LuceneVersion, LuceneTestHarness.Index); NTriplesFormatter formatter = new NTriplesFormatter(); foreach (IFullTextSearchResult result in provider.Match("http")) { Console.WriteLine(result.Node.ToString(formatter) + " - Scores " + result.Score); } } finally { if (provider != null) { provider.Dispose(); } } }
/// <summary> /// Creates a new Full Text Operator /// </summary> /// <param name="provider">Full Text Search Provider</param> /// <param name="algebra">Inner Algebra</param> /// <param name="matchVar">Match Variable</param> /// <param name="searchTerm">Search Term</param> /// <param name="limit">Result Limit</param> public FullTextMatch(IFullTextSearchProvider provider, ISparqlAlgebra algebra, PatternItem matchVar, PatternItem searchTerm, int limit) : this(provider, algebra, matchVar, null, searchTerm, limit, Double.NaN) { }
/// <summary> /// Creates a new Full Text Operator /// </summary> /// <param name="provider">Search Provider</param> /// <param name="algebra">Inner Algebra</param> /// <param name="matchVar">Match Variable</param> /// <param name="scoreVar">Score Variable</param> /// <param name="searchTerm">Search Term</param> /// <param name="limit">Limit</param> public BaseFullTextOperator(IFullTextSearchProvider provider, ISparqlAlgebra algebra, PatternItem matchVar, PatternItem scoreVar, PatternItem searchTerm, int limit) : this(provider, algebra, matchVar, scoreVar, searchTerm, limit, Double.NaN) { }
/// <summary> /// Creates a new Full Text Operator /// </summary> /// <param name="provider">Full Text Search Provider</param> /// <param name="algebra">Inner Algebra</param> /// <param name="matchVar">Match Variable</param> /// <param name="scoreVar">Score Variable</param> /// <param name="searchTerm">Search Term</param> public FullTextMatch(IFullTextSearchProvider provider, ISparqlAlgebra algebra, PatternItem matchVar, PatternItem scoreVar, PatternItem searchTerm) : this(provider, algebra, matchVar, scoreVar, searchTerm, -1, Double.NaN) { }
/// <summary> /// Creates a new Full Text Operator /// </summary> /// <param name="provider">Full Text Search Provider</param> /// <param name="algebra">Inner Algebra</param> /// <param name="matchVar">Match Variable</param> /// <param name="scoreVar">Score Variable</param> /// <param name="searchTerm">Search Term</param> /// <param name="limit">Result Limit</param> /// <param name="scoreThreshold">Score Threshold</param> public FullTextMatch(IFullTextSearchProvider provider, ISparqlAlgebra algebra, PatternItem matchVar, PatternItem scoreVar, PatternItem searchTerm, int limit, double scoreThreshold) : base(provider, algebra, matchVar, scoreVar, searchTerm, limit, scoreThreshold) { }
/// <summary> /// Creates a new Full Text Operator /// </summary> /// <param name="provider">Search Provider</param> /// <param name="algebra">Inner Algebra</param> /// <param name="matchVar">Match Variable</param> /// <param name="scoreVar">Score Variable</param> /// <param name="searchTerm">Search Term</param> /// <param name="scoreThreshold">Score Threshold</param> public BaseFullTextOperator(IFullTextSearchProvider provider, ISparqlAlgebra algebra, PatternItem matchVar, PatternItem scoreVar, PatternItem searchTerm, double scoreThreshold) : this(provider, algebra, matchVar, scoreVar, searchTerm, -1, scoreThreshold) { }
/// <summary> /// Gets the Full Text Results for a specific search query /// </summary> /// <param name="graphUris">Graph URIs</param> /// <param name="provider">Search Provider</param> /// <param name="search">Search Query</param> /// <param name="limit">Result Limit</param> /// <returns></returns> protected virtual IEnumerable <IFullTextSearchResult> GetResults(IEnumerable <Uri> graphUris, IFullTextSearchProvider provider, string search, int limit) { if (this._threshold.HasValue) { //Use a Score Threshold return(provider.Match(graphUris, search, this._threshold.Value, limit)); } else { return(provider.Match(graphUris, search, limit)); } }
/// <summary> /// Creates a new Full Text Operator /// </summary> /// <param name="provider">Full Text Search Provider</param> /// <param name="algebra">Inner Algebra</param> /// <param name="matchVar">Match Variable</param> /// <param name="searchTerm">Search Term</param> /// <param name="scoreThreshold">Score Threshold</param> public FullTextMatch(IFullTextSearchProvider provider, ISparqlAlgebra algebra, PatternItem matchVar, PatternItem searchTerm, double scoreThreshold) : this(provider, algebra, matchVar, null, searchTerm, -1, scoreThreshold) { }
/// <summary> /// Creates a Full Text Optimiser /// </summary> /// <param name="provider">Full Text Search Provider</param> public FullTextOptimiser(IFullTextSearchProvider provider) { if (provider == null) throw new ArgumentNullException("Full Text Search Provider cannot be null"); this._provider = provider; }
public void FullTextConfigSearchProviderLuceneWithBuildIndex() { //Add and test the Index Configuration IGraph g = this.GetBaseGraph(); INode indexObj = g.CreateBlankNode(); g.Assert(indexObj, g.CreateUriNode("rdf:type"), g.CreateUriNode("dnr-ft:Index")); g.Assert(indexObj, g.CreateUriNode("dnr:type"), g.CreateLiteralNode("Lucene.Net.Store.RAMDirectory, Lucene.Net")); g.Assert(indexObj, g.CreateUriNode("dnr-ft:ensureIndex"), (true).ToLiteral(g)); //Add and Test the analyzer Config INode analyzerObj = g.CreateBlankNode(); g.Assert(analyzerObj, g.CreateUriNode("rdf:type"), g.CreateUriNode("dnr-ft:Analyzer")); g.Assert(analyzerObj, g.CreateUriNode("dnr:type"), g.CreateLiteralNode("Lucene.Net.Analysis.Standard.StandardAnalyzer, Lucene.Net")); //Add and Test the schema config INode schemaObj = g.CreateBlankNode(); g.Assert(schemaObj, g.CreateUriNode("rdf:type"), g.CreateUriNode("dnr-ft:Schema")); g.Assert(schemaObj, g.CreateUriNode("dnr:type"), g.CreateLiteralNode("VDS.RDF.Query.FullText.Schema.DefaultIndexSchema, dotNetRDF.Query.FullText")); //Add the Searcher config which ties all the above together INode searcherObj = g.CreateBlankNode(); g.Assert(searcherObj, g.CreateUriNode("rdf:type"), g.CreateUriNode("dnr-ft:Searcher")); g.Assert(searcherObj, g.CreateUriNode("dnr:type"), g.CreateLiteralNode("VDS.RDF.Query.FullText.Search.Lucene.LuceneSearchProvider, dotNetRDF.Query.FullText")); g.Assert(searcherObj, g.CreateUriNode("dnr-ft:index"), indexObj); g.Assert(searcherObj, g.CreateUriNode("dnr-ft:analyzer"), analyzerObj); g.Assert(searcherObj, g.CreateUriNode("dnr-ft:schema"), schemaObj); //Now add the Graph we want to get auto-indexed INode graphObj = g.CreateBlankNode(); g.Assert(graphObj, g.CreateUriNode("rdf:type"), g.CreateUriNode("dnr:Graph")); g.Assert(graphObj, g.CreateUriNode("dnr:fromEmbedded"), g.CreateLiteralNode("VDS.RDF.Configuration.configuration.ttl")); //Then add the Indexer for use by the auto-indexing INode indexerObj = g.CreateBlankNode(); g.Assert(indexerObj, g.CreateUriNode("rdf:type"), g.CreateUriNode("dnr-ft:Indexer")); g.Assert(indexerObj, g.CreateUriNode("dnr:type"), g.CreateLiteralNode("VDS.RDF.Query.FullText.Indexing.Lucene.LuceneSubjectsIndexer, dotNetRDF.Query.FullText")); g.Assert(indexerObj, g.CreateUriNode("dnr-ft:index"), indexObj); g.Assert(indexerObj, g.CreateUriNode("dnr-ft:analyzer"), analyzerObj); g.Assert(indexerObj, g.CreateUriNode("dnr-ft:schema"), schemaObj); //Finally add the properties to indicate we want auto-indexing and what to index g.Assert(searcherObj, g.CreateUriNode("dnr-ft:buildIndexFor"), graphObj); g.Assert(searcherObj, g.CreateUriNode("dnr-ft:buildIndexWith"), indexerObj); TestTools.ShowGraph(g); ConfigurationLoader.AddObjectFactory(this._factory); Object temp = ConfigurationLoader.LoadObject(g, searcherObj); Assert.IsTrue(temp is LuceneSearchProvider, "Should have returned a LuceneSearchProvider Instance"); Assert.IsTrue(temp is IFullTextSearchProvider, "Should have returned a IFullTextSearchProvider Instance"); //Finally check that auto-indexing has worked OK IFullTextSearchProvider provider = (IFullTextSearchProvider)temp; try { int i = 0; foreach (IFullTextSearchResult result in provider.Match("http")) { Console.WriteLine(result.Node.ToString() + " - " + result.Score.ToString()); i++; } Assert.IsTrue(i > 0, "Expected 1 or more result due to the auto-indexed data"); } finally { provider.Dispose(); } }
/// <summary> /// Evaluates the property function /// </summary> /// <param name="context">Evaluation Context</param> /// <returns></returns> public BaseMultiset Evaluate(SparqlEvaluationContext context) { //The very first thing we must do is check the incoming input if (context.InputMultiset is NullMultiset) { return(context.InputMultiset); //Can abort evaluation if input is null } if (context.InputMultiset.IsEmpty) { return(context.InputMultiset); //Can abort evaluation if input is null } //Then we need to retrieve the full text search provider IFullTextSearchProvider provider = context[FullTextHelper.ContextKey] as IFullTextSearchProvider; if (provider == null) { throw new FullTextQueryException("No Full Text Search Provider is available, please ensure you attach a FullTextQueryOptimiser to your query"); } //First determine whether we can apply the limit when talking to the provider //Essentially as long as the Match Variable (the one we'll bind results to) is not already //bound AND we are actually using a limit bool applyLimitDirect = this._limit.HasValue && this._limit.Value > -1 && this._matchVar.VariableName != null && !context.InputMultiset.ContainsVariable(this._matchVar.VariableName); //Is there a constant for the Match Item? If so extract it now //Otherwise are we needing to check against existing bindings INode matchConstant = null; bool checkExisting = false; HashSet <INode> existing = null; if (this._matchVar.VariableName == null) { matchConstant = ((NodeMatchPattern)this._matchVar).Node; } else if (this._matchVar.VariableName != null && context.InputMultiset.ContainsVariable(this._matchVar.VariableName)) { checkExisting = true; existing = new HashSet <INode>(); foreach (INode n in context.InputMultiset.Sets.Select(s => s[this._matchVar.VariableName]).Where(s => s != null)) { existing.Add(n); } } //Then check that the score variable is not already bound, if so error //If a Score Variable is provided and it is OK then we'll bind scores at a later stage if (this._scoreVar != null) { if (this._scoreVar.VariableName == null) { throw new FullTextQueryException("Queries using full text search that wish to return result scores must provide a variable"); } if (this._scoreVar.VariableName != null && context.InputMultiset.ContainsVariable(this._scoreVar.VariableName)) { throw new FullTextQueryException("Queries using full text search that wish to return result scores must use an unbound variable to do so"); } } //Next ensure that the search text is a node and not a variable if (this._searchVar.VariableName != null) { throw new FullTextQueryException("Queries using full text search must provide a constant value for the search term"); } INode searchNode = ((NodeMatchPattern)this._searchVar).Node; if (searchNode.NodeType != NodeType.Literal) { throw new FullTextQueryException("Queries using full text search must use a literal value for the search term"); } String search = ((ILiteralNode)searchNode).Value; //Determine which graphs we are operating over IEnumerable <Uri> graphUris = context.Data.ActiveGraphUris; //Now we can use the full text search provider to start getting results context.OutputMultiset = new Multiset(); IEnumerable <IFullTextSearchResult> results = applyLimitDirect ? this.GetResults(graphUris, provider, search, this._limit.Value) : this.GetResults(graphUris, provider, search); int r = 0; String matchVar = this._matchVar.VariableName; String scoreVar = this._scoreVar != null ? this._scoreVar.VariableName : null; foreach (IFullTextSearchResult result in results) { if (matchConstant != null) { //Check against constant if present if (result.Node.Equals(matchConstant)) { r++; context.OutputMultiset.Add(result.ToSet(matchVar, scoreVar)); } } else if (checkExisting) { //Check against existing bindings if present if (existing.Contains(result.Node)) { r++; context.OutputMultiset.Add(result.ToSet(matchVar, scoreVar)); } } else { //Otherwise all results are acceptable r++; context.OutputMultiset.Add(result.ToSet(matchVar, scoreVar)); } //Apply the limit locally if necessary if (!applyLimitDirect && this._limit > -1 && r >= this._limit) { break; } } return(context.OutputMultiset); }
/// <summary> /// Creates a new Full Text Query algebra /// </summary> /// <param name="searchProvider">Search Provider</param> /// <param name="algebra">Inner Algebra</param> public FullTextQuery(IFullTextSearchProvider searchProvider, ISparqlAlgebra algebra) { this._provider = searchProvider; this.InnerAlgebra = algebra; }