HitFilter C# (CSharp) примеры использования

Пример #1

0

Показать файл

Файл: LuceneCommon.cs Проект: zweib730/beagrep

                static private LNS.Query GetDateRangeQuery (QueryPart_DateRange part, out HitFilter hit_filter)
                {
                        string field_name;
                        if (part.Key == QueryPart_DateRange.AllPropertiesKey)
                                field_name = TypeToWildcardField (PropertyType.Date);
                        else if (part.Key == QueryPart_DateRange.TimestampKey)
                                field_name = "Timestamp";
                        else
                                field_name = PropertyToFieldName (PropertyType.Date, part.Key);

                        // FIXME: We could optimize this and reduce the size of our range
                        // queries if we actually new the min and max date that appear in
                        // any properties in the index.  We would need to inspect the index to
                        // determine that at start-up, and then track it as new documents
                        // get added to the index.
                        if (part.StartDate < lower_bound)
                                part.StartDate = lower_bound;
                        if (part.EndDate > upper_bound || part.EndDate == DateTime.MinValue)
                                part.EndDate = upper_bound;

                        // Swap the start and end dates if they come in reversed.
                        if (part.StartDate > part.EndDate) {
                                DateTime swap;
                                swap = part.StartDate;
                                part.StartDate = part.EndDate;
                                part.EndDate = swap;
                        }

                        // Set up our hit filter to cull out the bad dates.
                        DateRangeHitFilter drhf;
                        drhf = new DateRangeHitFilter ();
                        drhf.Key = part.Key;
                        drhf.StartDate = part.StartDate;
                        drhf.EndDate = part.EndDate;
                        hit_filter = new HitFilter (drhf.HitFilter);

                        Logger.Log.Debug ("Building new date range query");
                        Logger.Log.Debug ("Start: {0}", part.StartDate);
                        Logger.Log.Debug ("End: {0}", part.EndDate);

                        int y1, m1, d1, y2, m2, d2;
                        y1 = part.StartDate.Year;
                        m1 = part.StartDate.Month;
                        d1 = part.StartDate.Day;
                        y2 = part.EndDate.Year;
                        m2 = part.EndDate.Month;
                        d2 = part.EndDate.Day;

                        LNS.BooleanQuery top_level_query;
                        top_level_query = new LNS.BooleanQuery ();

                        // A special case: both the start and the end of our range fall
                        // in the same month.
                        if (y1 == y2 && m1 == m2) {
                                LNS.Query ym_query;
                                ym_query = NewYearMonthQuery (field_name, y1, m1);

                                // If our range only covers a part of the month, do a range query on the days.
                                if (d1 != 1 || d2 != DateTime.DaysInMonth (y2, m2)) {
                                        LNS.BooleanQuery sub_query;
                                        sub_query = new LNS.BooleanQuery ();
                                        sub_query.Add (ym_query, LNS.BooleanClause.Occur.MUST);
                                        sub_query.Add (NewDayQuery (field_name, d1, d2), LNS.BooleanClause.Occur.MUST);
                                        top_level_query.Add (sub_query, LNS.BooleanClause.Occur.SHOULD);
                                } else {
                                        top_level_query.Add (ym_query, LNS.BooleanClause.Occur.SHOULD);
                                }

                        } else {

                                // Handle a partial month at the beginning of our range.
                                if (d1 > 1) {
                                        LNS.BooleanQuery sub_query;
                                        sub_query = new LNS.BooleanQuery ();
                                        sub_query.Add (NewYearMonthQuery (field_name, y1, m1), LNS.BooleanClause.Occur.MUST);
                                        sub_query.Add (NewDayQuery (field_name, d1, DateTime.DaysInMonth (y1, m1)), LNS.BooleanClause.Occur.MUST);
                                        top_level_query.Add (sub_query, LNS.BooleanClause.Occur.SHOULD);

                                        ++m1;
                                        if (m1 == 13) {
                                                m1 = 1;
                                                ++y1;
                                        }
                                }

                                // And likewise, handle a partial month at the end of our range.
                                if (d2 < DateTime.DaysInMonth (y2, m2)) {
                                        LNS.BooleanQuery sub_query;
                                        sub_query = new LNS.BooleanQuery ();
                                        sub_query.Add (NewYearMonthQuery (field_name, y2, m2), LNS.BooleanClause.Occur.MUST);
                                        sub_query.Add (NewDayQuery (field_name, 1, d2), LNS.BooleanClause.Occur.MUST);
                                        top_level_query.Add (sub_query, LNS.BooleanClause.Occur.SHOULD);

                                        --m2;
                                        if (m2 == 0) {
                                                m2 = 12;
                                                --y2;
                                        }
                                }

                                // Generate the query for the "middle" of our period, if it is non-empty
                                if (y1 < y2 || ((y1 == y2) && m1 <= m2))
                                        top_level_query.Add (NewYearMonthQuery (field_name, y1, m1, y2, m2),
                                                             LNS.BooleanClause.Occur.SHOULD);
                        }

                        return top_level_query;
                }

Пример #2

0

Показать файл

Файл: LuceneCommon.cs Проект: zweib730/beagrep

                // search_subset_uris is a list of Uris that this search should be
                // limited to.
                static protected void QueryPartToQuery (QueryPart     abstract_part,
                                                        bool          only_build_primary_query,
                                                        ArrayList     term_list,
                                                        QueryPartHook query_part_hook,
                                                        out LNS.Query primary_query,
                                                        out LNS.Query secondary_query,
                                                        out HitFilter hit_filter)
                {
                        primary_query = null;
                        secondary_query = null;

                        // By default, we assume that our lucene queries will return exactly the
                        // matching set of objects.  We need to set the hit filter if further
                        // refinement of the search results is required.  (As in the case of
                        // date range queries, for example.)  We essentially have to do this
                        // to make OR queries work correctly.
                        hit_filter = true_hit_filter;

                        // The exception is when dealing with a prohibited part.  Just return
                        // null for the hit filter in that case.  This works since
                        // prohibited parts are not allowed inside of OR queries.
                        if (abstract_part.Logic == QueryPartLogic.Prohibited)
                                hit_filter = null;

                        if (abstract_part == null)
                                return;

                        // Run the backend hook first.
                        // This gives a chance to modify create new queries based on
                        // backend specific properties

                        if (query_part_hook != null)
                                abstract_part = query_part_hook (abstract_part);

                        if (abstract_part == null)
                                return;

                        if (abstract_part is QueryPart_Text) {
                                QueryPart_Text part = (QueryPart_Text) abstract_part;

                                if (! (part.SearchFullText || part.SearchTextProperties))
                                        return;

                                LNS.BooleanQuery p_query = new LNS.BooleanQuery ();
                                LNS.BooleanQuery s_query = new LNS.BooleanQuery ();

                                bool added_subquery = false;

                                if (part.SearchFullText) {
                                        LNS.Query subquery;
                                        subquery = StringToQuery ("Text", part.Text, term_list);
                                        if (subquery != null) {
                                                p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
                                                added_subquery = true;
                                        }

                                        // FIXME: HotText is ignored for now!
                                        // subquery = StringToQuery ("HotText", part.Text);
                                        // if (subquery != null) {
                                        //    p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
                                        //    added_subquery = true;
                                        // }
                                }

                                if (part.SearchTextProperties) {
                                        LNS.Query subquery;
                                        subquery = StringToQuery ("PropertyText", part.Text, term_list);
                                        if (subquery != null) {
                                                p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
                                                // Properties can live in either index
                                                if (! only_build_primary_query)
                                                        s_query.Add (subquery.Clone () as LNS.Query, LNS.BooleanClause.Occur.SHOULD);
                                                added_subquery = true;
                                        }

                                        // The "added_subquery" check is to handle the situation where
                                        // a part of the text is a stop word.  Normally, a search for
                                        // "hello world" would break down into this query:
                                        //
                                        // (Text:hello OR PropertyText:hello OR PropertyKeyword:hello)
                                        // AND (Text:world OR PropertText:world OR PropertyKeyword:world)
                                        //
                                        // This fails with stop words, though.  Let's assume that "world"
                                        // is a stop word.  You would end up with:
                                        //
                                        // (Text:hello OR PropertyText:hello OR PropertyKeyword:hello)
                                        // AND (PropertyKeyword:world)
                                        //
                                        // Which is not what we want.  We'd want to match documents that
                                        // had only "hello" without also having a keyword "world".  In
                                        // this case, don't create the PropertyKeyword part of the query,
                                        // since it would be included in the larger set if it weren't
                                        // required anyway.
                                        if (added_subquery) {
                                                Term term;
                                                term = new Term ("PropertyKeyword", part.Text.ToLower ()); // make sure text is lowercased
                                                // FIXME: terms are already added in term_list. But they may have been tokenized
                                                // The term here is non-tokenized version. Should this be added to term_list ?
                                                // term_list is used to calculate scores
                                                if (term_list != null)
                                                        term_list.Add (term);
                                                subquery = new LNS.TermQuery (term);
                                                p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
                                                // Properties can live in either index
                                                if (! only_build_primary_query)
                                                        s_query.Add (subquery.Clone () as LNS.Query, LNS.BooleanClause.Occur.SHOULD);
                                        } else {
                                                // Reset these so we return a null query
                                                p_query = null;
                                                s_query = null;
                                        }
                                }

                                primary_query = p_query;
                                if (! only_build_primary_query)
                                        secondary_query = s_query;

                                return;
                        }

                        if (abstract_part is QueryPart_Wildcard) {
                                QueryPart_Wildcard part = (QueryPart_Wildcard) abstract_part;

                                LNS.BooleanQuery p_query = new LNS.BooleanQuery ();
                                LNS.BooleanQuery s_query = new LNS.BooleanQuery ();

                                Term term;
                                LNS.Query subquery;

                                // Lower case the terms for searching
                                string query_string_lower = part.QueryString.ToLower ();

                                // Search text content
                                if (! part.PropertyOnly) {
                                    term = new Term ("Text", query_string_lower);
                                    subquery = new LNS.WildcardQuery (term);
                                    p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
                                    term_list.Add (term);
                                }

                                // Search text properties
                                term = new Term ("PropertyText", query_string_lower);
                                subquery = new LNS.WildcardQuery (term);
                                p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
                                // Properties can live in either index
                                if (! only_build_primary_query)
                                        s_query.Add (subquery.Clone () as LNS.Query, LNS.BooleanClause.Occur.SHOULD);
                                term_list.Add (term);

                                if (! part.PropertyOnly) {
                                    // Search property keywords
                                    term = new Term ("PropertyKeyword", query_string_lower);
                                    term_list.Add (term);
                                    subquery = new LNS.WildcardQuery (term);
                                    p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
                                    // Properties can live in either index
                                    if (! only_build_primary_query)
                                        s_query.Add (subquery.Clone () as LNS.Query, LNS.BooleanClause.Occur.SHOULD);
                                }

                                primary_query = p_query;
                                if (! only_build_primary_query)
                                        secondary_query = s_query;

                                return;
                        }

                        if (abstract_part is QueryPart_DateRange) {

                                QueryPart_DateRange part = (QueryPart_DateRange) abstract_part;

                                // FIXME: We don't handle prohibited queries with sub-date
                                // accuracy.  For example, if we say we prohibit matches
                                // between 5 May 2007 at 2 PM and 8 May at 5 AM, we'll
                                // miss any matches that happen between midnight and 2 PM
                                // on 5 May 2007 and between midnight and 5 AM on 8 May.

                                primary_query = GetDateRangeQuery (part, out hit_filter);
                                // Date properties can live in either index
                                if (! only_build_primary_query && primary_query != null)
                                        secondary_query = primary_query.Clone () as LNS.Query;

                                return;
                        }

                        if (abstract_part is QueryPart_Or) {
                                QueryPart_Or part = (QueryPart_Or) abstract_part;

                                // Assemble a new BooleanQuery combining all of the sub-parts.
                                LNS.BooleanQuery p_query;
                                p_query = new LNS.BooleanQuery ();

                                LNS.BooleanQuery s_query = null;
                                if (! only_build_primary_query)
                                        s_query = new LNS.BooleanQuery ();

                                primary_query = p_query;
                                secondary_query = s_query;

                                OrHitFilter or_hit_filter = null;

                                foreach (QueryPart  sub_part in part.SubParts) {
                                        LNS.Query p_subq, s_subq;
                                        HitFilter sub_hit_filter; // FIXME: This is (and must be) ignored
                                        // FIXME: Any subpart in an OR which has a hit filter won't work
                                        // correctly, because we can't tell which part of an OR we matched
                                        // against to filter correctly.  This affects date range queries.
                                        QueryPartToQuery (sub_part, only_build_primary_query,
                                                          term_list, query_part_hook,
                                                          out p_subq, out s_subq, out sub_hit_filter);
                                        if (p_subq != null)
                                                p_query.Add (p_subq, LNS.BooleanClause.Occur.SHOULD);
                                        if (s_subq != null)
                                                s_query.Add (s_subq, LNS.BooleanClause.Occur.SHOULD);
                                        if (sub_hit_filter != null) {
                                                if (or_hit_filter == null)
                                                        or_hit_filter = new OrHitFilter ();
                                                or_hit_filter.Add (sub_hit_filter);
                                        }
                                }

                                if (or_hit_filter != null)
                                        hit_filter = new HitFilter (or_hit_filter.HitFilter);

                                return;
                        }

                        if (abstract_part is QueryPart_Uri) {
                                QueryPart_Uri part = (QueryPart_Uri) abstract_part;

                                // Do a term query on the Uri field.
                                // This is probably less efficient that using a TermEnum;
                                // but this is required for the query API where the uri query
                                // can be part of a prohibited query or a boolean or query.
                                Term term;
                                term = new Term ("Uri", UriFu.UriToEscapedString (part.Uri));
                                if (term_list != null)
                                        term_list.Add (term);
                                primary_query = new LNS.TermQuery (term);

                                // Query only the primary index
                                return;
                        }

                        if (abstract_part is QueryPart_Property) {
                                QueryPart_Property part = (QueryPart_Property) abstract_part;

                                string field_name;
                                if (part.Key == QueryPart_Property.AllProperties)
                                        field_name = TypeToWildcardField (part.Type);
                                else
                                        field_name = PropertyToFieldName (part.Type, part.Key);

                                // Details of the conversion here depends on BeagrepAnalyzer::TokenStream
                                if (part.Type == PropertyType.Text)
                                        primary_query = StringToQuery (field_name, part.Value, term_list);
                                else {
                                        Term term;
                                        // FIXME: Handle date queries for other date fields
                                        if (part.Type == PropertyType.Internal || field_name.StartsWith ("prop:k:" + Property.PrivateNamespace))
                                                term = new Term (field_name, part.Value);
                                        else
                                                term = new Term (field_name, part.Value.ToLower ());
                                        if (term_list != null)
                                                term_list.Add (term);
                                        primary_query = new LNS.TermQuery (term);
                                }

                                // Properties can live in either index
                                if (! only_build_primary_query && primary_query != null)
                                        secondary_query = primary_query.Clone () as LNS.Query;

                                return;
                        }

                        throw new Exception ("Unhandled QueryPart type! " + abstract_part.ToString ());
                }

Пример #3

0

Показать файл

Файл: LuceneCommon.cs Проект: zweib730/beagrep

 public void Add (HitFilter hit_filter)
 {
         all.Add (hit_filter);
 }

Пример #4

0

Показать файл

Файл: LuceneCommon.cs Проект: zweib730/beagrep

 public NotHitFilter (HitFilter original)
 {
         this.original = original;
 }

Пример #5

0

Показать файл

Файл: LuceneQueryingDriver.cs Проект: universsky/beagrep

        private static void GenerateQueryResults(IndexReader primary_reader,
                                                 IndexReader secondary_reader,
                                                 BetterBitArray primary_matches,
                                                 IQueryResult result,
                                                 ICollection query_term_list,
                                                 int max_results,
                                                 HitFilter hit_filter,
                                                 string index_name)
        {
            int num_hits;

            if (Debug)
            {
                Logger.Log.Debug(">>> {0}: Initially handed {1} matches", index_name, primary_matches.TrueCount);
            }

            if (primary_matches.TrueCount <= max_results)
            {
                if (Debug)
                {
                    Logger.Log.Debug(">>> {0}: Initial count is within our limit of {1}", index_name, max_results);
                }
                num_hits = primary_matches.TrueCount;
            }
            else
            {
                if (Debug)
                {
                    Logger.Log.Debug(">>> {0}: Number of hits is capped at {1}", index_name, max_results);
                }
                num_hits = max_results;
            }

            Stopwatch total, d, e;

            total = new Stopwatch();
            d     = new Stopwatch();
            e     = new Stopwatch();

            total.Start();

            ArrayList final_list_of_hits = null;

            // This is used only for scoring
            Dictionary <int, Hit> hits_by_id = new Dictionary <int, Hit> (num_hits);

            int total_number_of_matches = primary_matches.TrueCount;

            if (primary_matches.TrueCount > max_results)
            {
                final_list_of_hits = ScanRecentDocs(primary_reader,
                                                    secondary_reader,
                                                    primary_matches,
                                                    hits_by_id,
                                                    max_results,
                                                    ref total_number_of_matches,
                                                    hit_filter,
                                                    index_name);
            }

            if (final_list_of_hits == null)
            {
                final_list_of_hits = FindRecentResults(primary_reader,
                                                       secondary_reader,
                                                       primary_matches,
                                                       hits_by_id,
                                                       max_results,
                                                       ref total_number_of_matches,
                                                       hit_filter,
                                                       index_name);
            }

            d.Start();

            ScoreHits(hits_by_id, primary_reader, query_term_list);
            hits_by_id = null;

            d.Stop();

            if (Debug)
            {
                Log.Debug(">>> {0}: Scored hits in {1}", index_name, d);
            }

            e.Start();

            // 25 hits seems to be the sweet spot: anything lower
            // and serialization overhead gets us, higher takes
            // longer to send out.
            const int MAX_QUEUED_HITS = 25;
            int       sent_index      = 0;

            // Break up the hits into reasonably sized chunks for
            // sending over the wire.
            for (int i = 0; i < final_list_of_hits.Count; ++i)
            {
                // Flush our hits
                if (i > 0 && i % MAX_QUEUED_HITS == 0)
                {
                    result.Add(final_list_of_hits.GetRange(0, MAX_QUEUED_HITS));
                    final_list_of_hits.RemoveRange(0, MAX_QUEUED_HITS);
                    i -= MAX_QUEUED_HITS;
                }
            }

            // Flush the remaining hits
            result.Add(final_list_of_hits, total_number_of_matches);
            final_list_of_hits = null;

            e.Stop();

            if (Debug)
            {
                Log.Debug(">>> {0}: Hit filters executed and results sent in {1}", index_name, e);
            }

            total.Stop();

            if (Debug)
            {
                Logger.Log.Debug(">>> {0}: GenerateQueryResults time statistics:", index_name);
                //Logger.Log.Debug (">>> {0}:   Short circuit {1,6} ({2:0.0}%)", index_name, a == null ? "N/A" : a.ToString (), a == null ? 0.0 : 100 * a.ElapsedTime / total.ElapsedTime);
                //Logger.Log.Debug (">>> {0}:     Create docs {1,6} ({2:0.0}%)", index_name, b, 100 * b.ElapsedTime / total.ElapsedTime);
                //Logger.Log.Debug (">>> {0}:    Hit assembly {1,6} ({2:0.0}%)", index_name, c, 100 * c.ElapsedTime / total.ElapsedTime);
                Logger.Log.Debug(">>> {0}:     Scored hits {1,6} ({2:0.0}%)", index_name, d, 100 * d.ElapsedTime / total.ElapsedTime);
                Logger.Log.Debug(">>> {0}:    Results sent {1,6} ({2:0.0}%)", index_name, e, 100 * e.ElapsedTime / total.ElapsedTime);
                Logger.Log.Debug(">>> {0}:           TOTAL {1,6}", index_name, total);
            }
        }

Пример #6

0

Показать файл

Файл: LuceneCommon.cs Проект: zweib730/beagrep

 public void Add (HitFilter hit_filter)
 {
         if (hit_filter == true_hit_filter)
                 contains_known_true = true;
         all.Add (hit_filter);
 }

Пример #7

0

Показать файл

Файл: LuceneQueryingDriver.cs Проект: universsky/beagrep

        ////////////////////////////////////////////////////////////////

        // Returns the lists of terms in the query
        private ArrayList AssembleQuery(Query query,
                                        QueryPartHook query_part_hook,
                                        HitFilter hit_filter,
                                        out ArrayList primary_required_part_queries,
                                        out ArrayList secondary_required_part_queries,
                                        out LNS.BooleanQuery primary_prohibited_part_query,
                                        out LNS.BooleanQuery secondary_prohibited_part_query,
                                        out AndHitFilter all_hit_filters)
        {
            primary_required_part_queries   = null;
            secondary_required_part_queries = null;
            primary_prohibited_part_query   = null;
            secondary_prohibited_part_query = null;

            all_hit_filters = new AndHitFilter();
            if (hit_filter != null)
            {
                all_hit_filters.Add(hit_filter);
            }

            ArrayList term_list = new ArrayList();

            foreach (QueryPart part in query.Parts)
            {
                LNS.Query primary_part_query;
                LNS.Query secondary_part_query;
                HitFilter part_hit_filter;
                QueryPartToQuery(part,
                                 false,                  // we want both primary and secondary queries
                                 part.Logic == QueryPartLogic.Required ? term_list : null,
                                 query_part_hook,
                                 out primary_part_query,
                                 out secondary_part_query,
                                 out part_hit_filter);

                if (primary_part_query == null)
                {
                    continue;
                }

                switch (part.Logic)
                {
                case QueryPartLogic.Required:
                    if (primary_required_part_queries == null)
                    {
                        primary_required_part_queries   = new ArrayList();
                        secondary_required_part_queries = new ArrayList();
                    }
                    primary_required_part_queries.Add(primary_part_query);
                    secondary_required_part_queries.Add(secondary_part_query);

                    if (part_hit_filter != null)
                    {
                        all_hit_filters.Add(part_hit_filter);
                    }

                    break;

                case QueryPartLogic.Prohibited:
                    if (primary_prohibited_part_query == null)
                    {
                        primary_prohibited_part_query = new LNS.BooleanQuery();
                    }
                    primary_prohibited_part_query.Add(primary_part_query, LNS.BooleanClause.Occur.SHOULD);

                    if (secondary_part_query != null)
                    {
                        if (secondary_prohibited_part_query == null)
                        {
                            secondary_prohibited_part_query = new LNS.BooleanQuery();
                        }
                        secondary_prohibited_part_query.Add(secondary_part_query, LNS.BooleanClause.Occur.SHOULD);
                    }

                    if (part_hit_filter != null)
                    {
                        NotHitFilter nhf;
                        nhf = new NotHitFilter(part_hit_filter);
                        all_hit_filters.Add(new HitFilter(nhf.HitFilter));
                    }

                    break;
                }
            }

            return(term_list);
        }

Пример #8

0

Показать файл

Файл: LuceneQueryingDriver.cs Проект: universsky/beagrep

        ////////////////////////////////////////////////////////////////

        public void DoQuery(Query query,
                            IQueryResult result,
                            ICollection search_subset_uris,                  // should be internal uris
                            QueryPartHook query_part_hook,
                            HitFilter hit_filter)
        {
            if (Debug)
            {
                Logger.Log.Debug("###### {0}: Starting low-level queries", IndexName);
            }

            Stopwatch total, a, b, c, d, e, f;

            total = new Stopwatch();
            a     = new Stopwatch();
            b     = new Stopwatch();
            c     = new Stopwatch();
            d     = new Stopwatch();
            e     = new Stopwatch();
            f     = new Stopwatch();

            total.Start();
            a.Start();

            ArrayList primary_required_part_queries;
            ArrayList secondary_required_part_queries;

            LNS.BooleanQuery primary_prohibited_part_query;
            LNS.BooleanQuery secondary_prohibited_part_query;

            AndHitFilter all_hit_filters;

            ArrayList term_list;

            // Assemble all of the parts into a bunch of Lucene queries

            term_list = AssembleQuery(query,
                                      query_part_hook,
                                      hit_filter,
                                      out primary_required_part_queries,
                                      out secondary_required_part_queries,
                                      out primary_prohibited_part_query,
                                      out secondary_prohibited_part_query,
                                      out all_hit_filters);

            a.Stop();
            if (Debug)
            {
                Log.Debug("###### {0}: Building queries took {1}", IndexName, a);
            }

            // If we have no required parts, give up.
            if (primary_required_part_queries == null)
            {
                return;
            }

            b.Start();

            //
            // Now that we have all of these nice queries, let's execute them!
            //

            IndexReader primary_reader;

            LNS.IndexSearcher primary_searcher;
            IndexReader       secondary_reader;

            LNS.IndexSearcher secondary_searcher;

            // Create the searchers that we will need.
            if (!BuildSearchers(out primary_reader, out primary_searcher, out secondary_reader, out secondary_searcher))
            {
                return;
            }

            b.Stop();
            if (Debug)
            {
                Log.Debug("###### {0}: Readers/searchers built in {1}", IndexName, b);
            }

            // Build whitelists and blacklists for search subsets.
            c.Start();

            // Possibly create our whitelists from the search subset.
            LuceneBitArray primary_whitelist, secondary_whitelist;

            CreateQueryWhitelists(search_subset_uris,
                                  primary_searcher,
                                  secondary_searcher,
                                  primary_prohibited_part_query,
                                  secondary_prohibited_part_query,
                                  out primary_whitelist,
                                  out secondary_whitelist);

            c.Stop();
            if (Debug)
            {
                Log.Debug("###### {0}: Whitelists and blacklists built in {1}", IndexName, c);
            }

            // Now run the low level queries against our indexes.
            d.Start();

            BetterBitArray primary_matches = null;

            if (primary_required_part_queries != null)
            {
                if (secondary_searcher != null)
                {
                    primary_matches = DoRequiredQueries_TwoIndex(primary_searcher,
                                                                 secondary_searcher,
                                                                 primary_required_part_queries,
                                                                 secondary_required_part_queries,
                                                                 primary_whitelist,
                                                                 secondary_whitelist);
                }
                else
                {
                    primary_matches = DoRequiredQueries(primary_searcher,
                                                        primary_required_part_queries,
                                                        primary_whitelist);
                }
            }

            d.Stop();
            if (Debug)
            {
                Logger.Log.Debug("###### {0}: Low-level queries finished in {1}", IndexName, d);
            }

            e.Start();
            // Only generate results if we got some matches
            if (primary_matches != null && primary_matches.ContainsTrue())
            {
                GenerateQueryResults(primary_reader,
                                     secondary_reader,
                                     primary_matches,
                                     result,
                                     term_list,
                                     query.MaxHits,
                                     new HitFilter(all_hit_filters.HitFilter),
                                     IndexName);
            }

            e.Stop();

            if (Debug)
            {
                Log.Debug("###### {0}: Query results generated in {1}", IndexName, e);
            }

            //
            // Finally, we clean up after ourselves.
            //

            f.Start();
            CloseSearchers(primary_reader, primary_searcher, secondary_reader, secondary_searcher);
            f.Stop();

            if (Debug)
            {
                Log.Debug("###### {0}: Readers/searchers released in {1}", IndexName, f);
            }

            total.Stop();
            if (Debug)
            {
                Log.Debug("###### {0}: Query time breakdown:", IndexName);
                Log.Debug("###### {0}:    Build queries {1,6} ({2:0.0}%)", IndexName, a, 100 * a.ElapsedTime / total.ElapsedTime);
                Log.Debug("###### {0}:      Got readers {1,6} ({2:0.0}%)", IndexName, b, 100 * b.ElapsedTime / total.ElapsedTime);
                Log.Debug("###### {0}:       Whitelists {1,6} ({2:0.0}%)", IndexName, c, 100 * c.ElapsedTime / total.ElapsedTime);
                Log.Debug("###### {0}:          Queries {1,6} ({2:0.0}%)", IndexName, d, 100 * d.ElapsedTime / total.ElapsedTime);
                Log.Debug("###### {0}:    Gen'd Results {1,6} ({2:0.0}%)", IndexName, e, 100 * e.ElapsedTime / total.ElapsedTime);
                Log.Debug("###### {0}:   Reader cleanup {1,6} ({2:0.0}%)", IndexName, f, 100 * f.ElapsedTime / total.ElapsedTime);
                Log.Debug("###### {0}:            TOTAL {1,6}", IndexName, total);

                Logger.Log.Debug("###### {0}: Total query run in {1}", IndexName, total);
            }
        }

Пример #9

0

Показать файл

Файл: LuceneQueryingDriver.cs Проект: universsky/beagrep

        // There are two ways we can determine the max_results
        // most recent items:
        //
        // One is to instantiate Lucene documents for each of
        // the document IDs in primary_matches.  This is a
        // fairly expensive operation.
        //
        // The other is to walk through the list of all
        // document IDs in descending time order.  This is
        // a less expensive operation, but adds up over time
        // on large data sets.
        //
        // We can walk about 2.5 docs for every Document we
        // instantiate.  So what we'll do, if we have more
        // matches than available hits, is walk (m * 1.25)
        // docs to see if we can fill out the top 100 hits.
        // If not, we'll fall back to creating documents
        // for all of them.

        private static ArrayList ScanRecentDocs(IndexReader primary_reader,
                                                IndexReader secondary_reader,
                                                BetterBitArray primary_matches,
                                                Dictionary <int, Hit> hits_by_id,
                                                int max_results,
                                                ref int total_number_of_matches,
                                                HitFilter hit_filter,
                                                string index_name)
        {
            Stopwatch a = new Stopwatch();

            a.Start();

            TermDocs  docs               = primary_reader.TermDocs();
            TermEnum  enumerator         = primary_reader.Terms(new Term("InvertedTimestamp", String.Empty));
            ArrayList results            = new ArrayList(max_results);
            int       docs_found         = 0;
            int       docs_walked        = 0;
            int       hit_filter_removed = 0;
            int       max_docs           = (int)(primary_matches.TrueCount * 1.25);

            Term     term;
            TermDocs secondary_term_docs = null;

            if (secondary_reader != null)
            {
                secondary_term_docs = secondary_reader.TermDocs();
            }

            do
            {
                term = enumerator.Term();

                if (term.Field() != "InvertedTimestamp")
                {
                    break;
                }

                docs.Seek(enumerator);

                while (docs.Next() &&
                       docs_found < max_results &&
                       docs_walked < max_docs)
                {
                    int doc_id = docs.Doc();

                    if (primary_matches.Get(doc_id))
                    {
                        Document doc = primary_reader.Document(doc_id);
                        Hit      hit = CreateHit(doc, secondary_reader, secondary_term_docs);

                        // If we have a HitFilter, apply it.
                        if (hit_filter != null && !hit_filter(hit))
                        {
                            if (Debug)
                            {
                                Log.Debug("Filtered out {0}", hit.Uri);
                            }
                            hit_filter_removed++;
                            continue;
                        }
                        hits_by_id [doc_id] = hit;
                        // Add the result, last modified first
                        results.Add(hit);
                        docs_found++;
                    }

                    docs_walked++;
                }
            } while (enumerator.Next() &&
                     docs_found < max_results &&
                     docs_walked < max_docs);

            docs.Close();
            if (secondary_term_docs != null)
            {
                secondary_term_docs.Close();
            }

            // If we've found all the docs we can return in a subset!
            // Fantastic, we've probably short circuited a slow search.
            if (docs_found != max_results)
            {
                // Otherwise bad luck! Not all docs found
                // Start afresh - this time traversing all results
                results = null;
            }
            else
            {
                // Adjust total_number_of_matches. We need to do this to avoid scenarios like the following:
                // max_hits = 100. Matched 100 results. But hit filter removed 30. So 70 results will be returned.
                // We want to avoid saying "Showing top 70 of 100". Note that since we are not passing
                // every document in the index through the hit_filter, when we say "Showing top 100 of 1234", the
                // 1234 could actually be much less. But since max_hits was 100, that will not mislead the user.
                total_number_of_matches -= hit_filter_removed;
            }

            a.Stop();
            if (Debug)
            {
                Log.Debug(">>> {0}: Walked {1} items, populated an enum with {2} items in {3}", index_name, docs_walked, docs_found, a);

                if (docs_found == max_results)
                {
                    Log.Debug(">>> {0}: Successfully short circuited timestamp ordering!", index_name);
                }
            }

            return(results);
        }

Пример #10

0

Показать файл

Файл: LuceneQueryingDriver.cs Проект: universsky/beagrep

        private static ArrayList   FindRecentResults(IndexReader primary_reader,
                                                     IndexReader secondary_reader,
                                                     BetterBitArray primary_matches,
                                                     Dictionary <int, Hit> hits_by_id,
                                                     int max_results,
                                                     ref int total_number_of_matches,
                                                     HitFilter hit_filter,
                                                     string index_name)
        {
            Stopwatch b = new Stopwatch();

            b.Start();

            int      count = 0;
            Document doc;

            ArrayList all_docs  = null;
            TopScores top_docs  = null;
            TermDocs  term_docs = null;

            if (primary_matches.TrueCount > max_results)
            {
                top_docs = new TopScores(max_results);
            }
            else
            {
                all_docs = new ArrayList(primary_matches.TrueCount);
            }

            if (secondary_reader != null)
            {
                term_docs = secondary_reader.TermDocs();
            }

            for (int match_index = primary_matches.Count; ; match_index--)
            {
                // Walk across the matches backwards, since newer
                // documents are more likely to be at the end of
                // the index.
                match_index = primary_matches.GetPreviousTrueIndex(match_index);
                if (match_index < 0)
                {
                    break;
                }

                count++;

                doc = primary_reader.Document(match_index, fields_timestamp_uri);

                // Check the timestamp --- if we have already reached our
                // limit, we might be able to reject it immediately.
                string timestamp_str;
                long   timestamp_num = 0;

                timestamp_str = doc.Get("Timestamp");
                if (timestamp_str == null)
                {
                    Logger.Log.Warn("No timestamp on {0}!", GetUriFromDocument(doc));
                }
                else
                {
                    timestamp_num = Int64.Parse(doc.Get("Timestamp"));
                    if (top_docs != null && !top_docs.WillAccept(timestamp_num))
                    {
                        continue;
                    }
                }

                // Get the actual hit now
                // doc was created with only 2 fields, so first get the complete lucene document for primary document.
                // Also run our hit_filter now, if we have one. Since we insist of returning max_results
                // most recent hits, any hits that would be filtered out should happen now and not later.
                Hit hit = CreateHit(primary_reader.Document(match_index), secondary_reader, term_docs);
                if (hit_filter != null && !hit_filter(hit))
                {
                    if (Debug)
                    {
                        Log.Debug("Filtered out {0}", hit.Uri);
                    }
                    total_number_of_matches--;
                    continue;
                }

                hits_by_id [match_index] = hit;

                // Add the document to the appropriate data structure.
                // We use the timestamp_num as the score, so high
                // scores correspond to more-recent timestamps.
                if (all_docs != null)
                {
                    all_docs.Add(hit);
                }
                else
                {
                    top_docs.Add(timestamp_num, hit);
                }
            }

            if (term_docs != null)
            {
                term_docs.Close();
            }

            b.Stop();

            if (Debug)
            {
                Log.Debug(">>> {0}: Instantiated and scanned {1} documents in {2}", index_name, count, b);
            }

            if (all_docs != null)
            {
                // Sort results before sending
                all_docs.Sort();
                return(all_docs);
            }
            else
            {
                return(top_docs.TopScoringObjects);
            }
        }

Пример #11

0

Показать файл