static protected void AddPropertiesToHit (Hit hit, Document doc, bool from_primary_index) { Property prop; foreach (Field f in doc.Fields ()) { prop = GetPropertyFromDocument (f, doc, from_primary_index); if (prop != null) hit.AddProperty (prop); } }
private ICollection DoLowLevelRDFQuery (Query query, PropertyType pred_type, string predicate, string field_value, TextCache text_cache) { Stopwatch total, a, b, c, d, e, f; total = new Stopwatch (); a = new Stopwatch (); b = new Stopwatch (); c = new Stopwatch (); d = new Stopwatch (); e = new Stopwatch (); f = new Stopwatch (); total.Start (); a.Start (); // Assemble all of the parts into a bunch of Lucene queries ArrayList primary_required_part_queries; ArrayList secondary_required_part_queries; LNS.BooleanQuery primary_prohibited_part_query; LNS.BooleanQuery secondary_prohibited_part_query; AndHitFilter all_hit_filters; ArrayList term_list; // Assemble all of the parts into a bunch of Lucene queries term_list = AssembleQuery (query, null, null, out primary_required_part_queries, out secondary_required_part_queries, out primary_prohibited_part_query, out secondary_prohibited_part_query, out all_hit_filters); a.Stop (); if (Debug) Log.Debug ("###### {0}: Building queries took {1}", IndexName, a); // If we have no required parts, give up. if (primary_required_part_queries == null) return null; b.Start (); // // Now that we have all of these nice queries, let's execute them! // // Create the searchers that we will need. IndexReader primary_reader; LNS.IndexSearcher primary_searcher; IndexReader secondary_reader; LNS.IndexSearcher secondary_searcher; // Create the searchers that we will need. if (! BuildSearchers (out primary_reader, out primary_searcher, out secondary_reader, out secondary_searcher)) return null; b.Stop (); if (Debug) Log.Debug ("###### {0}: Readers/searchers built in {1}", IndexName, b); // Build whitelists and blacklists for search subsets. c.Start (); // Possibly create our whitelists from the search subset. LuceneBitArray primary_whitelist, secondary_whitelist; CreateQueryWhitelists (null, primary_searcher, secondary_searcher, primary_prohibited_part_query, secondary_prohibited_part_query, out primary_whitelist, out secondary_whitelist); c.Stop (); if (Debug) Log.Debug ("###### {0}: Whitelists and blacklists built in {1}", IndexName, c); // Now run the low level queries against our indexes. d.Start (); BetterBitArray primary_matches = null; if (primary_required_part_queries != null) { if (secondary_searcher != null) primary_matches = DoRequiredQueries_TwoIndex (primary_searcher, secondary_searcher, primary_required_part_queries, secondary_required_part_queries, primary_whitelist, secondary_whitelist); else primary_matches = DoRequiredQueries (primary_searcher, primary_required_part_queries, primary_whitelist); } d.Stop (); if (Debug) Logger.Log.Debug ("###### {0}: Low-level queries finished in {1} and returned {2} matches", IndexName, d, primary_matches.TrueCount); e.Start (); int count = 0; Document doc; ArrayList hits = new ArrayList (primary_matches.TrueCount); TermDocs secondary_term_docs = null; if (secondary_searcher != null) secondary_term_docs = secondary_searcher.Reader.TermDocs (); FieldSelector fields = null; if (predicate != null) fields = new MapFieldSelector (new string[] { "Uri", "Timestamp", PropertyToFieldName (pred_type, predicate)}); for (int match_index = primary_matches.GetNextTrueIndex (0); match_index < primary_matches.Count; match_index = primary_matches.GetNextTrueIndex (++ match_index)) { count++; // If we have a HitFilter, apply it. // RDF FIXME: Ignore Hit Filter for now // If predicate was not specified but object was specified, // then figure out the right predicate if (predicate == null && field_value != null) { Hit hit = new Hit (); doc = primary_searcher.Doc (match_index); hit.Uri = GetUriFromDocument (doc); hit.Timestamp = StringFu.StringToDateTime (doc.Get ("Timestamp")); bool found_matching_predicate = false; foreach (Field field in doc.Fields ()) { if (! FieldIsPredicate (field, field_value)) continue; Property prop = new Property (); prop.Type = pred_type; prop.Key = predicate; prop.Value = field_value; hit.AddProperty (prop); found_matching_predicate = true; } // Now get the matching predicate from the secondary index if (secondary_searcher == null) { doc = null; } else { Term term = new Term ("Uri", doc.Get ("Uri")); secondary_term_docs.Seek (term); if (secondary_term_docs.Next ()) doc = secondary_searcher.Doc (secondary_term_docs.Doc ()); } if (doc != null) { foreach (Field field in doc.Fields ()) { if (! FieldIsPredicate (field, field_value)) continue; Property prop = new Property (); prop.Type = pred_type; prop.Key = predicate; prop.Value = field_value; hit.AddProperty (prop); found_matching_predicate = true; } } if (! found_matching_predicate) { // No matching predicate found // This means some unstored field matched the query // FIXME: Add a synthetic property #text hit.AddProperty (Property.New ("#text", field_value)); } hits.Add (hit); } else if (predicate == "TextLinks") { // Special treatment: TextLinks is not stored but can be queried doc = primary_searcher.Doc (match_index, fields_timestamp_uri); Hit hit = CreateHit (doc, secondary_reader, secondary_term_docs, fields); if (field_value != null) hit.AddProperty (Property.New ("TextLinks", field_value)); else { foreach (Property text_link_property in GetTextLinks (hit.Uri, text_cache)) hit.AddProperty (text_link_property); } hits.Add (hit); } else { doc = primary_searcher.Doc (match_index, fields); Hit hit = CreateHit (doc, secondary_reader, secondary_term_docs, fields); foreach (Property prop in hit.Properties) { if (prop.Key == predicate) prop.Value = field_value; } hits.Add (hit); } } e.Stop (); if (Debug) Log.Debug ("###### {0}: Query results generated in {1}", IndexName, e); // // Finally, we clean up after ourselves. // f.Start (); CloseSearchers (primary_reader, primary_searcher, secondary_reader, secondary_searcher); f.Stop (); if (Debug) Log.Debug ("###### {0}: Readers/searchers released in {1}", IndexName, f); total.Stop (); if (Debug) { Log.Debug ("###### {0}: Query time breakdown:", IndexName); Log.Debug ("###### {0}: Build queries {1,6} ({2:0.0}%)", IndexName, a, 100 * a.ElapsedTime / total.ElapsedTime); Log.Debug ("###### {0}: Got readers {1,6} ({2:0.0}%)", IndexName, b, 100 * b.ElapsedTime / total.ElapsedTime); Log.Debug ("###### {0}: Whitelists {1,6} ({2:0.0}%)", IndexName, c, 100 * c.ElapsedTime / total.ElapsedTime); Log.Debug ("###### {0}: Queries {1,6} ({2:0.0}%)", IndexName, d, 100 * d.ElapsedTime / total.ElapsedTime); Log.Debug ("###### {0}: Gen'd Results {1,6} ({2:0.0}%)", IndexName, e, 100 * e.ElapsedTime / total.ElapsedTime); Log.Debug ("###### {0}: Reader cleanup {1,6} ({2:0.0}%)", IndexName, f, 100 * f.ElapsedTime / total.ElapsedTime); Log.Debug ("###### {0}: TOTAL {1,6}", IndexName, total); Logger.Log.Debug ("###### {0}: Total query run in {1}", IndexName, total); } return hits; }