コード例 #1
0
ファイル: LuceneCommon.cs プロジェクト: zweib730/beagrep
 static protected void AddPropertiesToHit (Hit hit, Document doc, bool from_primary_index)
 {
         Property prop;
         foreach (Field f in doc.Fields ()) {
                 prop = GetPropertyFromDocument (f, doc, from_primary_index);
                 if (prop != null)
                         hit.AddProperty (prop);
         }
 }
コード例 #2
0
		private ICollection DoLowLevelRDFQuery (Query query,
							PropertyType pred_type,
							string predicate,
							string field_value,
							TextCache text_cache)
		{

			Stopwatch total, a, b, c, d, e, f;

			total = new Stopwatch ();
			a = new Stopwatch ();
			b = new Stopwatch ();
			c = new Stopwatch ();
			d = new Stopwatch ();
			e = new Stopwatch ();
			f = new Stopwatch ();

			total.Start ();
			a.Start ();

			// Assemble all of the parts into a bunch of Lucene queries

			ArrayList primary_required_part_queries;
			ArrayList secondary_required_part_queries;

			LNS.BooleanQuery primary_prohibited_part_query;
			LNS.BooleanQuery secondary_prohibited_part_query;

			AndHitFilter all_hit_filters;

			ArrayList term_list;

			// Assemble all of the parts into a bunch of Lucene queries

			term_list = AssembleQuery (query,
				null,
				null,
				out primary_required_part_queries,
				out secondary_required_part_queries,
				out primary_prohibited_part_query,
				out secondary_prohibited_part_query,
				out all_hit_filters);

			a.Stop ();
			if (Debug)
				Log.Debug ("###### {0}: Building queries took {1}", IndexName, a);

			// If we have no required parts, give up.
			if (primary_required_part_queries == null)
				return null;

			b.Start ();
			
			//
			// Now that we have all of these nice queries, let's execute them!
			//

			// Create the searchers that we will need.

			IndexReader primary_reader;
			LNS.IndexSearcher primary_searcher;
			IndexReader secondary_reader;
			LNS.IndexSearcher secondary_searcher;

			// Create the searchers that we will need.
			if (! BuildSearchers (out primary_reader, out primary_searcher, out secondary_reader, out secondary_searcher))
				return null;

			b.Stop ();
			if (Debug)
				Log.Debug ("###### {0}: Readers/searchers built in {1}", IndexName, b);

			// Build whitelists and blacklists for search subsets.
			c.Start ();
			
			// Possibly create our whitelists from the search subset.
			LuceneBitArray primary_whitelist, secondary_whitelist;
			CreateQueryWhitelists (null,
				primary_searcher,
				secondary_searcher,
				primary_prohibited_part_query,
				secondary_prohibited_part_query,
				out primary_whitelist,
				out secondary_whitelist);

			c.Stop ();
			if (Debug)
				Log.Debug ("###### {0}: Whitelists and blacklists built in {1}", IndexName, c);

			// Now run the low level queries against our indexes.
			d.Start ();

			BetterBitArray primary_matches = null;

			if (primary_required_part_queries != null) {

				if (secondary_searcher != null)
					primary_matches = DoRequiredQueries_TwoIndex (primary_searcher,
										      secondary_searcher,
										      primary_required_part_queries,
										      secondary_required_part_queries,
										      primary_whitelist,
										      secondary_whitelist);
				else
					primary_matches = DoRequiredQueries (primary_searcher,
									     primary_required_part_queries,
									     primary_whitelist);

			} 

			d.Stop ();
			if (Debug)
				Logger.Log.Debug ("###### {0}: Low-level queries finished in {1} and returned {2} matches", IndexName, d, primary_matches.TrueCount);

			e.Start ();

			int count = 0;
			Document doc;
			ArrayList hits = new ArrayList (primary_matches.TrueCount);

			TermDocs secondary_term_docs = null;
			if (secondary_searcher != null)
				secondary_term_docs = secondary_searcher.Reader.TermDocs ();
		
			FieldSelector fields = null;
			if (predicate != null)
				fields = new MapFieldSelector (new string[] { "Uri", "Timestamp", PropertyToFieldName (pred_type, predicate)});

			for (int match_index = primary_matches.GetNextTrueIndex (0);
			     match_index < primary_matches.Count; 
			     match_index = primary_matches.GetNextTrueIndex (++ match_index)) {

				count++;

				// If we have a HitFilter, apply it.
				// RDF FIXME: Ignore Hit Filter for now

				// If predicate was not specified but object was specified,
				// then figure out the right predicate
				if (predicate == null && field_value != null) {
					Hit hit = new Hit ();
					doc = primary_searcher.Doc (match_index);
					hit.Uri = GetUriFromDocument (doc);
					hit.Timestamp = StringFu.StringToDateTime (doc.Get ("Timestamp"));

					bool found_matching_predicate = false;

					foreach (Field field in doc.Fields ()) {
						if (! FieldIsPredicate (field, field_value))
							continue;

						Property prop = new Property ();
						prop.Type = pred_type;
						prop.Key = predicate;
						prop.Value = field_value;
						hit.AddProperty (prop);

						found_matching_predicate = true;
					}

					// Now get the matching predicate from the secondary index
					if (secondary_searcher == null) {
						doc = null;
					} else {
						Term term = new Term ("Uri", doc.Get ("Uri"));
						secondary_term_docs.Seek (term);
						if (secondary_term_docs.Next ())
							doc = secondary_searcher.Doc (secondary_term_docs.Doc ());
					}

					if (doc != null) {
						foreach (Field field in doc.Fields ()) {
							if (! FieldIsPredicate (field, field_value))
								continue;

							Property prop = new Property ();
							prop.Type = pred_type;
							prop.Key = predicate;
							prop.Value = field_value;
							hit.AddProperty (prop);

							found_matching_predicate = true;
						}
					}

					if (! found_matching_predicate) {
						// No matching predicate found
						// This means some unstored field matched the query
						// FIXME: Add a synthetic property #text
						hit.AddProperty (Property.New ("#text", field_value));
					}
					
					hits.Add (hit);
				} else if (predicate == "TextLinks") {
					// Special treatment: TextLinks is not stored but can be queried
					doc = primary_searcher.Doc (match_index, fields_timestamp_uri);
					Hit hit = CreateHit (doc, secondary_reader, secondary_term_docs, fields);
					if (field_value != null)
						hit.AddProperty (Property.New ("TextLinks", field_value));
					else {
						foreach (Property text_link_property in GetTextLinks (hit.Uri, text_cache))
							hit.AddProperty (text_link_property);
					}
					hits.Add (hit);
				} else {
					doc = primary_searcher.Doc (match_index, fields);
					Hit hit = CreateHit (doc, secondary_reader, secondary_term_docs, fields);
					foreach (Property prop in hit.Properties) {
						if (prop.Key == predicate)
							prop.Value = field_value;
					}

					hits.Add (hit);
				}
			}

			e.Stop ();

			if (Debug)
				Log.Debug ("###### {0}: Query results generated in {1}", IndexName, e);

			//
			// Finally, we clean up after ourselves.
			//

			f.Start ();
			CloseSearchers (primary_reader, primary_searcher, secondary_reader, secondary_searcher);
			f.Stop ();
			
			if (Debug)
				Log.Debug ("###### {0}: Readers/searchers released in {1}", IndexName, f);

			total.Stop ();
			if (Debug) {
				Log.Debug ("###### {0}: Query time breakdown:", IndexName);
				Log.Debug ("###### {0}:    Build queries {1,6} ({2:0.0}%)", IndexName, a, 100 * a.ElapsedTime / total.ElapsedTime);
				Log.Debug ("###### {0}:      Got readers {1,6} ({2:0.0}%)", IndexName, b, 100 * b.ElapsedTime / total.ElapsedTime);
				Log.Debug ("###### {0}:       Whitelists {1,6} ({2:0.0}%)", IndexName, c, 100 * c.ElapsedTime / total.ElapsedTime);
				Log.Debug ("###### {0}:          Queries {1,6} ({2:0.0}%)", IndexName, d, 100 * d.ElapsedTime / total.ElapsedTime);
				Log.Debug ("###### {0}:    Gen'd Results {1,6} ({2:0.0}%)", IndexName, e, 100 * e.ElapsedTime / total.ElapsedTime);
				Log.Debug ("###### {0}:   Reader cleanup {1,6} ({2:0.0}%)", IndexName, f, 100 * f.ElapsedTime / total.ElapsedTime);
				Log.Debug ("###### {0}:            TOTAL {1,6}", IndexName, total);

				Logger.Log.Debug ("###### {0}: Total query run in {1}", IndexName, total);
			}

			return hits;
		}