public override int NextDoc()
                    {
                        var pathSelector = new Lucene.Net.Documents.MapFieldSelector(LuceneIndexBuilder.FIELD_PATH);

                        while (mDocIndex < mDocCount - 1)
                        {
                            ++mDocIndex;
                            // Skip deleted files.
                            if (!mIndexReader.IsDeleted(mDocIndex))
                            {
                                var document  = mIndexReader.Document(mDocIndex, pathSelector);
                                var fieldPath = document.GetField(LuceneIndexBuilder.FIELD_PATH);
                                if (fieldPath != null)
                                {
                                    string path = fieldPath.StringValue;
                                    // Return only, if matches file extension and directory filter
                                    if (MatchesExtensionFilter(path) && MatchesDirectoryFilter(path))
                                    {
                                        return(mDocIndex);
                                    }
                                }
                            }
                        }
                        return(NO_MORE_DOCS);
                    }
Ejemplo n.º 2
0
		private ICollection DoLowLevelRDFQuery (Query query,
							PropertyType pred_type,
							string predicate,
							string field_value,
							TextCache text_cache)
		{

			Stopwatch total, a, b, c, d, e, f;

			total = new Stopwatch ();
			a = new Stopwatch ();
			b = new Stopwatch ();
			c = new Stopwatch ();
			d = new Stopwatch ();
			e = new Stopwatch ();
			f = new Stopwatch ();

			total.Start ();
			a.Start ();

			// Assemble all of the parts into a bunch of Lucene queries

			ArrayList primary_required_part_queries;
			ArrayList secondary_required_part_queries;

			LNS.BooleanQuery primary_prohibited_part_query;
			LNS.BooleanQuery secondary_prohibited_part_query;

			AndHitFilter all_hit_filters;

			ArrayList term_list;

			// Assemble all of the parts into a bunch of Lucene queries

			term_list = AssembleQuery (query,
				null,
				null,
				out primary_required_part_queries,
				out secondary_required_part_queries,
				out primary_prohibited_part_query,
				out secondary_prohibited_part_query,
				out all_hit_filters);

			a.Stop ();
			if (Debug)
				Log.Debug ("###### {0}: Building queries took {1}", IndexName, a);

			// If we have no required parts, give up.
			if (primary_required_part_queries == null)
				return null;

			b.Start ();
			
			//
			// Now that we have all of these nice queries, let's execute them!
			//

			// Create the searchers that we will need.

			IndexReader primary_reader;
			LNS.IndexSearcher primary_searcher;
			IndexReader secondary_reader;
			LNS.IndexSearcher secondary_searcher;

			// Create the searchers that we will need.
			if (! BuildSearchers (out primary_reader, out primary_searcher, out secondary_reader, out secondary_searcher))
				return null;

			b.Stop ();
			if (Debug)
				Log.Debug ("###### {0}: Readers/searchers built in {1}", IndexName, b);

			// Build whitelists and blacklists for search subsets.
			c.Start ();
			
			// Possibly create our whitelists from the search subset.
			LuceneBitArray primary_whitelist, secondary_whitelist;
			CreateQueryWhitelists (null,
				primary_searcher,
				secondary_searcher,
				primary_prohibited_part_query,
				secondary_prohibited_part_query,
				out primary_whitelist,
				out secondary_whitelist);

			c.Stop ();
			if (Debug)
				Log.Debug ("###### {0}: Whitelists and blacklists built in {1}", IndexName, c);

			// Now run the low level queries against our indexes.
			d.Start ();

			BetterBitArray primary_matches = null;

			if (primary_required_part_queries != null) {

				if (secondary_searcher != null)
					primary_matches = DoRequiredQueries_TwoIndex (primary_searcher,
										      secondary_searcher,
										      primary_required_part_queries,
										      secondary_required_part_queries,
										      primary_whitelist,
										      secondary_whitelist);
				else
					primary_matches = DoRequiredQueries (primary_searcher,
									     primary_required_part_queries,
									     primary_whitelist);

			} 

			d.Stop ();
			if (Debug)
				Logger.Log.Debug ("###### {0}: Low-level queries finished in {1} and returned {2} matches", IndexName, d, primary_matches.TrueCount);

			e.Start ();

			int count = 0;
			Document doc;
			ArrayList hits = new ArrayList (primary_matches.TrueCount);

			TermDocs secondary_term_docs = null;
			if (secondary_searcher != null)
				secondary_term_docs = secondary_searcher.Reader.TermDocs ();
		
			FieldSelector fields = null;
			if (predicate != null)
				fields = new MapFieldSelector (new string[] { "Uri", "Timestamp", PropertyToFieldName (pred_type, predicate)});

			for (int match_index = primary_matches.GetNextTrueIndex (0);
			     match_index < primary_matches.Count; 
			     match_index = primary_matches.GetNextTrueIndex (++ match_index)) {

				count++;

				// If we have a HitFilter, apply it.
				// RDF FIXME: Ignore Hit Filter for now

				// If predicate was not specified but object was specified,
				// then figure out the right predicate
				if (predicate == null && field_value != null) {
					Hit hit = new Hit ();
					doc = primary_searcher.Doc (match_index);
					hit.Uri = GetUriFromDocument (doc);
					hit.Timestamp = StringFu.StringToDateTime (doc.Get ("Timestamp"));

					bool found_matching_predicate = false;

					foreach (Field field in doc.Fields ()) {
						if (! FieldIsPredicate (field, field_value))
							continue;

						Property prop = new Property ();
						prop.Type = pred_type;
						prop.Key = predicate;
						prop.Value = field_value;
						hit.AddProperty (prop);

						found_matching_predicate = true;
					}

					// Now get the matching predicate from the secondary index
					if (secondary_searcher == null) {
						doc = null;
					} else {
						Term term = new Term ("Uri", doc.Get ("Uri"));
						secondary_term_docs.Seek (term);
						if (secondary_term_docs.Next ())
							doc = secondary_searcher.Doc (secondary_term_docs.Doc ());
					}

					if (doc != null) {
						foreach (Field field in doc.Fields ()) {
							if (! FieldIsPredicate (field, field_value))
								continue;

							Property prop = new Property ();
							prop.Type = pred_type;
							prop.Key = predicate;
							prop.Value = field_value;
							hit.AddProperty (prop);

							found_matching_predicate = true;
						}
					}

					if (! found_matching_predicate) {
						// No matching predicate found
						// This means some unstored field matched the query
						// FIXME: Add a synthetic property #text
						hit.AddProperty (Property.New ("#text", field_value));
					}
					
					hits.Add (hit);
				} else if (predicate == "TextLinks") {
					// Special treatment: TextLinks is not stored but can be queried
					doc = primary_searcher.Doc (match_index, fields_timestamp_uri);
					Hit hit = CreateHit (doc, secondary_reader, secondary_term_docs, fields);
					if (field_value != null)
						hit.AddProperty (Property.New ("TextLinks", field_value));
					else {
						foreach (Property text_link_property in GetTextLinks (hit.Uri, text_cache))
							hit.AddProperty (text_link_property);
					}
					hits.Add (hit);
				} else {
					doc = primary_searcher.Doc (match_index, fields);
					Hit hit = CreateHit (doc, secondary_reader, secondary_term_docs, fields);
					foreach (Property prop in hit.Properties) {
						if (prop.Key == predicate)
							prop.Value = field_value;
					}

					hits.Add (hit);
				}
			}

			e.Stop ();

			if (Debug)
				Log.Debug ("###### {0}: Query results generated in {1}", IndexName, e);

			//
			// Finally, we clean up after ourselves.
			//

			f.Start ();
			CloseSearchers (primary_reader, primary_searcher, secondary_reader, secondary_searcher);
			f.Stop ();
			
			if (Debug)
				Log.Debug ("###### {0}: Readers/searchers released in {1}", IndexName, f);

			total.Stop ();
			if (Debug) {
				Log.Debug ("###### {0}: Query time breakdown:", IndexName);
				Log.Debug ("###### {0}:    Build queries {1,6} ({2:0.0}%)", IndexName, a, 100 * a.ElapsedTime / total.ElapsedTime);
				Log.Debug ("###### {0}:      Got readers {1,6} ({2:0.0}%)", IndexName, b, 100 * b.ElapsedTime / total.ElapsedTime);
				Log.Debug ("###### {0}:       Whitelists {1,6} ({2:0.0}%)", IndexName, c, 100 * c.ElapsedTime / total.ElapsedTime);
				Log.Debug ("###### {0}:          Queries {1,6} ({2:0.0}%)", IndexName, d, 100 * d.ElapsedTime / total.ElapsedTime);
				Log.Debug ("###### {0}:    Gen'd Results {1,6} ({2:0.0}%)", IndexName, e, 100 * e.ElapsedTime / total.ElapsedTime);
				Log.Debug ("###### {0}:   Reader cleanup {1,6} ({2:0.0}%)", IndexName, f, 100 * f.ElapsedTime / total.ElapsedTime);
				Log.Debug ("###### {0}:            TOTAL {1,6}", IndexName, total);

				Logger.Log.Debug ("###### {0}: Total query run in {1}", IndexName, total);
			}

			return hits;
		}
Ejemplo n.º 3
0
		///////// RDF fu ///////////////////////////////////////////////

		// Returns a collection of Uris
		// HitFilter and UriFilter are ignored for now
		// They will come into play in the final FetchDocument part
		// FIXME: Should RDFQuery do any query mapping using backend_query_part_hook ?
		// I think it should not. QueryPart hooks are for human beings, RDF is for softwares.
		public ICollection DoRDFQuery (Query _query, TextCache text_cache)
		{
			RDFQuery query = (RDFQuery) _query;

			string subject, predicate, _object;
			PropertyType pred_type;

			subject = query.SubjectString;
			predicate = query.Predicate;
			pred_type = query.PredicateType;
			_object = query.Object;

			if (Debug)
				Logger.Log.Debug ("###### {0}: Starting low-level queries '{1}' : '{4}:{2}' = '{3}'", IndexName, subject, predicate, _object, pred_type);

			// ******** 8 cases **********

			// Return all uris
			if (subject == String.Empty && predicate == String.Empty && _object == String.Empty) {
				ICollection hits = GetAllHitsByUri ().Values;
				foreach (Hit hit in hits)
					foreach (Property text_link_property in GetTextLinks (hit.Uri, text_cache))
						hit.AddProperty (text_link_property);
				return hits;
			}

			// Normal query
			if (subject == String.Empty && predicate == String.Empty && _object != String.Empty) {
				QueryPart_Text part = new QueryPart_Text ();
				part.Text = _object;
				part.SearchFullText = false; // We only search properties in RDF query
				query.AddPart (part);
				return DoLowLevelRDFQuery (query, pred_type, predicate, _object, text_cache);
			}

			// Return uris for all documents with this property
			if (subject == String.Empty && predicate != String.Empty && _object == String.Empty) {
				string field_name = PropertyToFieldName (pred_type, predicate);

				QueryPart_Property part = new QueryPart_Property ();
				part.Type = PropertyType.Internal;
				part.Key = "Properties";
				part.Value = field_name;
				query.AddPart (part);

				return DoLowLevelRDFQuery (query, pred_type, predicate, null, text_cache);
			}

			// Property query
			if (subject == String.Empty && predicate != String.Empty && _object != String.Empty) {
				QueryPart_Property part = new QueryPart_Property ();
				part.Type = pred_type;
				part.Key = predicate;
				part.Value = _object;
				query.AddPart (part);
				return DoLowLevelRDFQuery (query, pred_type, predicate, _object, text_cache);
			}

			// Return if the URI exists
			if (subject != String.Empty && predicate == String.Empty && _object == String.Empty) {
				QueryPart_Uri part = new QueryPart_Uri ();
				part.Uri = new Uri (subject, true); // better be URI!
				query.AddPart (part);
				// FIXME: Which properties to return in the hit? All or none ?
				return DoLowLevelRDFQuery (query, pred_type, predicate, null, text_cache);
			}

			// Normal query in the document with this URI
			if (subject != String.Empty && predicate == String.Empty && _object != String.Empty) {
				QueryPart_Uri uri_part = new QueryPart_Uri ();
				uri_part.Uri = new Uri (subject, true); // better be URI!
				query.AddPart (uri_part);

				QueryPart_Text part = new QueryPart_Text ();
				part.Text = _object;
				part.SearchFullText = false; // We only search properties in RDF query
				query.AddPart (part);

				return DoLowLevelRDFQuery (query, pred_type, predicate, _object, text_cache);
			}

			// Return URI if the document with this URI contains this property
			if (subject != String.Empty && predicate != String.Empty && _object == String.Empty) {
				ArrayList returned_uris = new ArrayList (1);

				ArrayList uri_list = new ArrayList (1);
				uri_list.Add (new Uri (subject, true));

				string field_name = PropertyToFieldName (pred_type, predicate);
				FieldSelector fields = new MapFieldSelector (new string[] { "Uri", "Timestamp", field_name });
				ICollection hits = GetHitsForUris (uri_list, fields);
				if (predicate == "TextLinks") {
					foreach (Hit hit in hits)
						foreach (Property text_link_property in GetTextLinks (hit.Uri, text_cache))
							hit.AddProperty (text_link_property);
				}

				return hits;
			}

			// Property query in the document with this URI
			if (subject != String.Empty && predicate != String.Empty && _object != String.Empty) {
				QueryPart_Uri uri_part = new QueryPart_Uri ();
				uri_part.Uri = new Uri (subject, true); // better be URI!
				query.AddPart (uri_part);

				QueryPart_Property part = new QueryPart_Property ();
				part.Type = pred_type;
				part.Key = predicate;
				part.Value = _object;
				query.AddPart (part);

				return DoLowLevelRDFQuery (query, pred_type, predicate, _object, text_cache);
			}

			throw new Exception ("Never reaches");
		}
		//private Lucene.Net.Search.Searchable GetRemote()
		//{
		//    try
		//    {
		//        return LookupRemote();
		//    }
		//    catch (System.Exception)
		//    {
		//        StartServer();
		//        return LookupRemote();
		//    }
		//}
		
		//private  Lucene.Net.Search.Searchable LookupRemote()
		//{
		//    return (Lucene.Net.Search.Searchable) Activator.GetObject(typeof(Lucene.Net.Search.Searchable), @"http://localhost:1099/Searchable");
		//}
		
		//[SetUp]
		//public void StartServer()
		//{
		//    try
		//    {
		//        System.Runtime.Remoting.Channels.ChannelServices.RegisterChannel(new System.Runtime.Remoting.Channels.Http.HttpChannel(1099), false);
		//    }
		//    catch (System.Net.Sockets.SocketException ex)
		//    {
		//        if (ex.ErrorCode == 10048)
		//            return;     // EADDRINUSE?
		//        throw ex;
		//    }

		//    // construct an index
		//    RAMDirectory indexStore = new RAMDirectory();
		//    IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true);
		//    Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
		//    doc.Add(new Field("test", "test text", Field.Store.YES, Field.Index.TOKENIZED));
		//    doc.Add(new Field("other", "other test text", Field.Store.YES, Field.Index.TOKENIZED));
		//    writer.AddDocument(doc);
		//    writer.Optimize();
		//    writer.Close();
			
		//    // publish it
		//    Lucene.Net.Search.Searchable local = new IndexSearcher(indexStore);
		//    RemoteSearchable impl = new RemoteSearchable(local);
		//    System.Runtime.Remoting.RemotingServices.Marshal(impl, "Searchable");
		//}
		
		private void  Search(Query query)
		{
			// try to search the published index
			Lucene.Net.Search.Searchable[] searchables = new Lucene.Net.Search.Searchable[]{GetRemote()};
			Searcher searcher = new MultiSearcher(searchables);
			Hits result = searcher.Search(query);
			
			Assert.AreEqual(1, result.Length());
			Document document = result.Doc(0);
			Assert.IsTrue(document != null, "document is null and it shouldn't be");
			Assert.AreEqual(document.Get("test"), "test text");
			Assert.IsTrue(document.GetFields().Count == 2, "document.getFields() Size: " + document.GetFields().Count + " is not: " + 2);
			System.Collections.Hashtable ftl = new System.Collections.Hashtable();
			ftl.Add("other", "other");
			FieldSelector fs = new SetBasedFieldSelector(ftl, new System.Collections.Hashtable());
			document = searcher.Doc(0, fs);
			Assert.IsTrue(document != null, "document is null and it shouldn't be");
			Assert.IsTrue(document.GetFields().Count == 1, "document.getFields() Size: " + document.GetFields().Count + " is not: " + 1);
			fs = new MapFieldSelector(new System.String[]{"other"});
			document = searcher.Doc(0, fs);
			Assert.IsTrue(document != null, "document is null and it shouldn't be");
			Assert.IsTrue(document.GetFields().Count == 1, "document.getFields() Size: " + document.GetFields().Count + " is not: " + 1);
		}