public override int NextDoc() { var pathSelector = new Lucene.Net.Documents.MapFieldSelector(LuceneIndexBuilder.FIELD_PATH); while (mDocIndex < mDocCount - 1) { ++mDocIndex; // Skip deleted files. if (!mIndexReader.IsDeleted(mDocIndex)) { var document = mIndexReader.Document(mDocIndex, pathSelector); var fieldPath = document.GetField(LuceneIndexBuilder.FIELD_PATH); if (fieldPath != null) { string path = fieldPath.StringValue; // Return only, if matches file extension and directory filter if (MatchesExtensionFilter(path) && MatchesDirectoryFilter(path)) { return(mDocIndex); } } } } return(NO_MORE_DOCS); }
private ICollection DoLowLevelRDFQuery (Query query, PropertyType pred_type, string predicate, string field_value, TextCache text_cache) { Stopwatch total, a, b, c, d, e, f; total = new Stopwatch (); a = new Stopwatch (); b = new Stopwatch (); c = new Stopwatch (); d = new Stopwatch (); e = new Stopwatch (); f = new Stopwatch (); total.Start (); a.Start (); // Assemble all of the parts into a bunch of Lucene queries ArrayList primary_required_part_queries; ArrayList secondary_required_part_queries; LNS.BooleanQuery primary_prohibited_part_query; LNS.BooleanQuery secondary_prohibited_part_query; AndHitFilter all_hit_filters; ArrayList term_list; // Assemble all of the parts into a bunch of Lucene queries term_list = AssembleQuery (query, null, null, out primary_required_part_queries, out secondary_required_part_queries, out primary_prohibited_part_query, out secondary_prohibited_part_query, out all_hit_filters); a.Stop (); if (Debug) Log.Debug ("###### {0}: Building queries took {1}", IndexName, a); // If we have no required parts, give up. if (primary_required_part_queries == null) return null; b.Start (); // // Now that we have all of these nice queries, let's execute them! // // Create the searchers that we will need. IndexReader primary_reader; LNS.IndexSearcher primary_searcher; IndexReader secondary_reader; LNS.IndexSearcher secondary_searcher; // Create the searchers that we will need. if (! BuildSearchers (out primary_reader, out primary_searcher, out secondary_reader, out secondary_searcher)) return null; b.Stop (); if (Debug) Log.Debug ("###### {0}: Readers/searchers built in {1}", IndexName, b); // Build whitelists and blacklists for search subsets. c.Start (); // Possibly create our whitelists from the search subset. LuceneBitArray primary_whitelist, secondary_whitelist; CreateQueryWhitelists (null, primary_searcher, secondary_searcher, primary_prohibited_part_query, secondary_prohibited_part_query, out primary_whitelist, out secondary_whitelist); c.Stop (); if (Debug) Log.Debug ("###### {0}: Whitelists and blacklists built in {1}", IndexName, c); // Now run the low level queries against our indexes. d.Start (); BetterBitArray primary_matches = null; if (primary_required_part_queries != null) { if (secondary_searcher != null) primary_matches = DoRequiredQueries_TwoIndex (primary_searcher, secondary_searcher, primary_required_part_queries, secondary_required_part_queries, primary_whitelist, secondary_whitelist); else primary_matches = DoRequiredQueries (primary_searcher, primary_required_part_queries, primary_whitelist); } d.Stop (); if (Debug) Logger.Log.Debug ("###### {0}: Low-level queries finished in {1} and returned {2} matches", IndexName, d, primary_matches.TrueCount); e.Start (); int count = 0; Document doc; ArrayList hits = new ArrayList (primary_matches.TrueCount); TermDocs secondary_term_docs = null; if (secondary_searcher != null) secondary_term_docs = secondary_searcher.Reader.TermDocs (); FieldSelector fields = null; if (predicate != null) fields = new MapFieldSelector (new string[] { "Uri", "Timestamp", PropertyToFieldName (pred_type, predicate)}); for (int match_index = primary_matches.GetNextTrueIndex (0); match_index < primary_matches.Count; match_index = primary_matches.GetNextTrueIndex (++ match_index)) { count++; // If we have a HitFilter, apply it. // RDF FIXME: Ignore Hit Filter for now // If predicate was not specified but object was specified, // then figure out the right predicate if (predicate == null && field_value != null) { Hit hit = new Hit (); doc = primary_searcher.Doc (match_index); hit.Uri = GetUriFromDocument (doc); hit.Timestamp = StringFu.StringToDateTime (doc.Get ("Timestamp")); bool found_matching_predicate = false; foreach (Field field in doc.Fields ()) { if (! FieldIsPredicate (field, field_value)) continue; Property prop = new Property (); prop.Type = pred_type; prop.Key = predicate; prop.Value = field_value; hit.AddProperty (prop); found_matching_predicate = true; } // Now get the matching predicate from the secondary index if (secondary_searcher == null) { doc = null; } else { Term term = new Term ("Uri", doc.Get ("Uri")); secondary_term_docs.Seek (term); if (secondary_term_docs.Next ()) doc = secondary_searcher.Doc (secondary_term_docs.Doc ()); } if (doc != null) { foreach (Field field in doc.Fields ()) { if (! FieldIsPredicate (field, field_value)) continue; Property prop = new Property (); prop.Type = pred_type; prop.Key = predicate; prop.Value = field_value; hit.AddProperty (prop); found_matching_predicate = true; } } if (! found_matching_predicate) { // No matching predicate found // This means some unstored field matched the query // FIXME: Add a synthetic property #text hit.AddProperty (Property.New ("#text", field_value)); } hits.Add (hit); } else if (predicate == "TextLinks") { // Special treatment: TextLinks is not stored but can be queried doc = primary_searcher.Doc (match_index, fields_timestamp_uri); Hit hit = CreateHit (doc, secondary_reader, secondary_term_docs, fields); if (field_value != null) hit.AddProperty (Property.New ("TextLinks", field_value)); else { foreach (Property text_link_property in GetTextLinks (hit.Uri, text_cache)) hit.AddProperty (text_link_property); } hits.Add (hit); } else { doc = primary_searcher.Doc (match_index, fields); Hit hit = CreateHit (doc, secondary_reader, secondary_term_docs, fields); foreach (Property prop in hit.Properties) { if (prop.Key == predicate) prop.Value = field_value; } hits.Add (hit); } } e.Stop (); if (Debug) Log.Debug ("###### {0}: Query results generated in {1}", IndexName, e); // // Finally, we clean up after ourselves. // f.Start (); CloseSearchers (primary_reader, primary_searcher, secondary_reader, secondary_searcher); f.Stop (); if (Debug) Log.Debug ("###### {0}: Readers/searchers released in {1}", IndexName, f); total.Stop (); if (Debug) { Log.Debug ("###### {0}: Query time breakdown:", IndexName); Log.Debug ("###### {0}: Build queries {1,6} ({2:0.0}%)", IndexName, a, 100 * a.ElapsedTime / total.ElapsedTime); Log.Debug ("###### {0}: Got readers {1,6} ({2:0.0}%)", IndexName, b, 100 * b.ElapsedTime / total.ElapsedTime); Log.Debug ("###### {0}: Whitelists {1,6} ({2:0.0}%)", IndexName, c, 100 * c.ElapsedTime / total.ElapsedTime); Log.Debug ("###### {0}: Queries {1,6} ({2:0.0}%)", IndexName, d, 100 * d.ElapsedTime / total.ElapsedTime); Log.Debug ("###### {0}: Gen'd Results {1,6} ({2:0.0}%)", IndexName, e, 100 * e.ElapsedTime / total.ElapsedTime); Log.Debug ("###### {0}: Reader cleanup {1,6} ({2:0.0}%)", IndexName, f, 100 * f.ElapsedTime / total.ElapsedTime); Log.Debug ("###### {0}: TOTAL {1,6}", IndexName, total); Logger.Log.Debug ("###### {0}: Total query run in {1}", IndexName, total); } return hits; }
///////// RDF fu /////////////////////////////////////////////// // Returns a collection of Uris // HitFilter and UriFilter are ignored for now // They will come into play in the final FetchDocument part // FIXME: Should RDFQuery do any query mapping using backend_query_part_hook ? // I think it should not. QueryPart hooks are for human beings, RDF is for softwares. public ICollection DoRDFQuery (Query _query, TextCache text_cache) { RDFQuery query = (RDFQuery) _query; string subject, predicate, _object; PropertyType pred_type; subject = query.SubjectString; predicate = query.Predicate; pred_type = query.PredicateType; _object = query.Object; if (Debug) Logger.Log.Debug ("###### {0}: Starting low-level queries '{1}' : '{4}:{2}' = '{3}'", IndexName, subject, predicate, _object, pred_type); // ******** 8 cases ********** // Return all uris if (subject == String.Empty && predicate == String.Empty && _object == String.Empty) { ICollection hits = GetAllHitsByUri ().Values; foreach (Hit hit in hits) foreach (Property text_link_property in GetTextLinks (hit.Uri, text_cache)) hit.AddProperty (text_link_property); return hits; } // Normal query if (subject == String.Empty && predicate == String.Empty && _object != String.Empty) { QueryPart_Text part = new QueryPart_Text (); part.Text = _object; part.SearchFullText = false; // We only search properties in RDF query query.AddPart (part); return DoLowLevelRDFQuery (query, pred_type, predicate, _object, text_cache); } // Return uris for all documents with this property if (subject == String.Empty && predicate != String.Empty && _object == String.Empty) { string field_name = PropertyToFieldName (pred_type, predicate); QueryPart_Property part = new QueryPart_Property (); part.Type = PropertyType.Internal; part.Key = "Properties"; part.Value = field_name; query.AddPart (part); return DoLowLevelRDFQuery (query, pred_type, predicate, null, text_cache); } // Property query if (subject == String.Empty && predicate != String.Empty && _object != String.Empty) { QueryPart_Property part = new QueryPart_Property (); part.Type = pred_type; part.Key = predicate; part.Value = _object; query.AddPart (part); return DoLowLevelRDFQuery (query, pred_type, predicate, _object, text_cache); } // Return if the URI exists if (subject != String.Empty && predicate == String.Empty && _object == String.Empty) { QueryPart_Uri part = new QueryPart_Uri (); part.Uri = new Uri (subject, true); // better be URI! query.AddPart (part); // FIXME: Which properties to return in the hit? All or none ? return DoLowLevelRDFQuery (query, pred_type, predicate, null, text_cache); } // Normal query in the document with this URI if (subject != String.Empty && predicate == String.Empty && _object != String.Empty) { QueryPart_Uri uri_part = new QueryPart_Uri (); uri_part.Uri = new Uri (subject, true); // better be URI! query.AddPart (uri_part); QueryPart_Text part = new QueryPart_Text (); part.Text = _object; part.SearchFullText = false; // We only search properties in RDF query query.AddPart (part); return DoLowLevelRDFQuery (query, pred_type, predicate, _object, text_cache); } // Return URI if the document with this URI contains this property if (subject != String.Empty && predicate != String.Empty && _object == String.Empty) { ArrayList returned_uris = new ArrayList (1); ArrayList uri_list = new ArrayList (1); uri_list.Add (new Uri (subject, true)); string field_name = PropertyToFieldName (pred_type, predicate); FieldSelector fields = new MapFieldSelector (new string[] { "Uri", "Timestamp", field_name }); ICollection hits = GetHitsForUris (uri_list, fields); if (predicate == "TextLinks") { foreach (Hit hit in hits) foreach (Property text_link_property in GetTextLinks (hit.Uri, text_cache)) hit.AddProperty (text_link_property); } return hits; } // Property query in the document with this URI if (subject != String.Empty && predicate != String.Empty && _object != String.Empty) { QueryPart_Uri uri_part = new QueryPart_Uri (); uri_part.Uri = new Uri (subject, true); // better be URI! query.AddPart (uri_part); QueryPart_Property part = new QueryPart_Property (); part.Type = pred_type; part.Key = predicate; part.Value = _object; query.AddPart (part); return DoLowLevelRDFQuery (query, pred_type, predicate, _object, text_cache); } throw new Exception ("Never reaches"); }
//private Lucene.Net.Search.Searchable GetRemote() //{ // try // { // return LookupRemote(); // } // catch (System.Exception) // { // StartServer(); // return LookupRemote(); // } //} //private Lucene.Net.Search.Searchable LookupRemote() //{ // return (Lucene.Net.Search.Searchable) Activator.GetObject(typeof(Lucene.Net.Search.Searchable), @"http://localhost:1099/Searchable"); //} //[SetUp] //public void StartServer() //{ // try // { // System.Runtime.Remoting.Channels.ChannelServices.RegisterChannel(new System.Runtime.Remoting.Channels.Http.HttpChannel(1099), false); // } // catch (System.Net.Sockets.SocketException ex) // { // if (ex.ErrorCode == 10048) // return; // EADDRINUSE? // throw ex; // } // // construct an index // RAMDirectory indexStore = new RAMDirectory(); // IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true); // Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); // doc.Add(new Field("test", "test text", Field.Store.YES, Field.Index.TOKENIZED)); // doc.Add(new Field("other", "other test text", Field.Store.YES, Field.Index.TOKENIZED)); // writer.AddDocument(doc); // writer.Optimize(); // writer.Close(); // // publish it // Lucene.Net.Search.Searchable local = new IndexSearcher(indexStore); // RemoteSearchable impl = new RemoteSearchable(local); // System.Runtime.Remoting.RemotingServices.Marshal(impl, "Searchable"); //} private void Search(Query query) { // try to search the published index Lucene.Net.Search.Searchable[] searchables = new Lucene.Net.Search.Searchable[]{GetRemote()}; Searcher searcher = new MultiSearcher(searchables); Hits result = searcher.Search(query); Assert.AreEqual(1, result.Length()); Document document = result.Doc(0); Assert.IsTrue(document != null, "document is null and it shouldn't be"); Assert.AreEqual(document.Get("test"), "test text"); Assert.IsTrue(document.GetFields().Count == 2, "document.getFields() Size: " + document.GetFields().Count + " is not: " + 2); System.Collections.Hashtable ftl = new System.Collections.Hashtable(); ftl.Add("other", "other"); FieldSelector fs = new SetBasedFieldSelector(ftl, new System.Collections.Hashtable()); document = searcher.Doc(0, fs); Assert.IsTrue(document != null, "document is null and it shouldn't be"); Assert.IsTrue(document.GetFields().Count == 1, "document.getFields() Size: " + document.GetFields().Count + " is not: " + 1); fs = new MapFieldSelector(new System.String[]{"other"}); document = searcher.Doc(0, fs); Assert.IsTrue(document != null, "document is null and it shouldn't be"); Assert.IsTrue(document.GetFields().Count == 1, "document.getFields() Size: " + document.GetFields().Count + " is not: " + 1); }