Exemplo n.º 1
0
                // search_subset_uris is a list of Uris that this search should be
                // limited to.
                static protected void QueryPartToQuery (QueryPart     abstract_part,
                                                        bool          only_build_primary_query,
                                                        ArrayList     term_list,
                                                        QueryPartHook query_part_hook,
                                                        out LNS.Query primary_query,
                                                        out LNS.Query secondary_query,
                                                        out HitFilter hit_filter)
                {
                        primary_query = null;
                        secondary_query = null;

                        // By default, we assume that our lucene queries will return exactly the
                        // matching set of objects.  We need to set the hit filter if further
                        // refinement of the search results is required.  (As in the case of
                        // date range queries, for example.)  We essentially have to do this
                        // to make OR queries work correctly.
                        hit_filter = true_hit_filter;

                        // The exception is when dealing with a prohibited part.  Just return
                        // null for the hit filter in that case.  This works since
                        // prohibited parts are not allowed inside of OR queries.
                        if (abstract_part.Logic == QueryPartLogic.Prohibited)
                                hit_filter = null;

                        if (abstract_part == null)
                                return;

                        // Run the backend hook first.
                        // This gives a chance to modify create new queries based on
                        // backend specific properties

                        if (query_part_hook != null)
                                abstract_part = query_part_hook (abstract_part);

                        if (abstract_part == null)
                                return;

                        if (abstract_part is QueryPart_Text) {
                                QueryPart_Text part = (QueryPart_Text) abstract_part;

                                if (! (part.SearchFullText || part.SearchTextProperties))
                                        return;

                                LNS.BooleanQuery p_query = new LNS.BooleanQuery ();
                                LNS.BooleanQuery s_query = new LNS.BooleanQuery ();

                                bool added_subquery = false;

                                if (part.SearchFullText) {
                                        LNS.Query subquery;
                                        subquery = StringToQuery ("Text", part.Text, term_list);
                                        if (subquery != null) {
                                                p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
                                                added_subquery = true;
                                        }

                                        // FIXME: HotText is ignored for now!
                                        // subquery = StringToQuery ("HotText", part.Text);
                                        // if (subquery != null) {
                                        //    p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
                                        //    added_subquery = true;
                                        // }
                                }

                                if (part.SearchTextProperties) {
                                        LNS.Query subquery;
                                        subquery = StringToQuery ("PropertyText", part.Text, term_list);
                                        if (subquery != null) {
                                                p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
                                                // Properties can live in either index
                                                if (! only_build_primary_query)
                                                        s_query.Add (subquery.Clone () as LNS.Query, LNS.BooleanClause.Occur.SHOULD);
                                                added_subquery = true;
                                        }

                                        // The "added_subquery" check is to handle the situation where
                                        // a part of the text is a stop word.  Normally, a search for
                                        // "hello world" would break down into this query:
                                        //
                                        // (Text:hello OR PropertyText:hello OR PropertyKeyword:hello)
                                        // AND (Text:world OR PropertText:world OR PropertyKeyword:world)
                                        //
                                        // This fails with stop words, though.  Let's assume that "world"
                                        // is a stop word.  You would end up with:
                                        //
                                        // (Text:hello OR PropertyText:hello OR PropertyKeyword:hello)
                                        // AND (PropertyKeyword:world)
                                        //
                                        // Which is not what we want.  We'd want to match documents that
                                        // had only "hello" without also having a keyword "world".  In
                                        // this case, don't create the PropertyKeyword part of the query,
                                        // since it would be included in the larger set if it weren't
                                        // required anyway.
                                        if (added_subquery) {
                                                Term term;
                                                term = new Term ("PropertyKeyword", part.Text.ToLower ()); // make sure text is lowercased
                                                // FIXME: terms are already added in term_list. But they may have been tokenized
                                                // The term here is non-tokenized version. Should this be added to term_list ?
                                                // term_list is used to calculate scores
                                                if (term_list != null)
                                                        term_list.Add (term);
                                                subquery = new LNS.TermQuery (term);
                                                p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
                                                // Properties can live in either index
                                                if (! only_build_primary_query)
                                                        s_query.Add (subquery.Clone () as LNS.Query, LNS.BooleanClause.Occur.SHOULD);
                                        } else {
                                                // Reset these so we return a null query
                                                p_query = null;
                                                s_query = null;
                                        }
                                }

                                primary_query = p_query;
                                if (! only_build_primary_query)
                                        secondary_query = s_query;

                                return;
                        }

                        if (abstract_part is QueryPart_Wildcard) {
                                QueryPart_Wildcard part = (QueryPart_Wildcard) abstract_part;

                                LNS.BooleanQuery p_query = new LNS.BooleanQuery ();
                                LNS.BooleanQuery s_query = new LNS.BooleanQuery ();

                                Term term;
                                LNS.Query subquery;

                                // Lower case the terms for searching
                                string query_string_lower = part.QueryString.ToLower ();

                                // Search text content
                                if (! part.PropertyOnly) {
                                    term = new Term ("Text", query_string_lower);
                                    subquery = new LNS.WildcardQuery (term);
                                    p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
                                    term_list.Add (term);
                                }

                                // Search text properties
                                term = new Term ("PropertyText", query_string_lower);
                                subquery = new LNS.WildcardQuery (term);
                                p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
                                // Properties can live in either index
                                if (! only_build_primary_query)
                                        s_query.Add (subquery.Clone () as LNS.Query, LNS.BooleanClause.Occur.SHOULD);
                                term_list.Add (term);

                                if (! part.PropertyOnly) {
                                    // Search property keywords
                                    term = new Term ("PropertyKeyword", query_string_lower);
                                    term_list.Add (term);
                                    subquery = new LNS.WildcardQuery (term);
                                    p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
                                    // Properties can live in either index
                                    if (! only_build_primary_query)
                                        s_query.Add (subquery.Clone () as LNS.Query, LNS.BooleanClause.Occur.SHOULD);
                                }

                                primary_query = p_query;
                                if (! only_build_primary_query)
                                        secondary_query = s_query;

                                return;
                        }

                        if (abstract_part is QueryPart_DateRange) {

                                QueryPart_DateRange part = (QueryPart_DateRange) abstract_part;

                                // FIXME: We don't handle prohibited queries with sub-date
                                // accuracy.  For example, if we say we prohibit matches
                                // between 5 May 2007 at 2 PM and 8 May at 5 AM, we'll
                                // miss any matches that happen between midnight and 2 PM
                                // on 5 May 2007 and between midnight and 5 AM on 8 May.

                                primary_query = GetDateRangeQuery (part, out hit_filter);
                                // Date properties can live in either index
                                if (! only_build_primary_query && primary_query != null)
                                        secondary_query = primary_query.Clone () as LNS.Query;

                                return;
                        }

                        if (abstract_part is QueryPart_Or) {
                                QueryPart_Or part = (QueryPart_Or) abstract_part;

                                // Assemble a new BooleanQuery combining all of the sub-parts.
                                LNS.BooleanQuery p_query;
                                p_query = new LNS.BooleanQuery ();

                                LNS.BooleanQuery s_query = null;
                                if (! only_build_primary_query)
                                        s_query = new LNS.BooleanQuery ();

                                primary_query = p_query;
                                secondary_query = s_query;

                                OrHitFilter or_hit_filter = null;

                                foreach (QueryPart  sub_part in part.SubParts) {
                                        LNS.Query p_subq, s_subq;
                                        HitFilter sub_hit_filter; // FIXME: This is (and must be) ignored
                                        // FIXME: Any subpart in an OR which has a hit filter won't work
                                        // correctly, because we can't tell which part of an OR we matched
                                        // against to filter correctly.  This affects date range queries.
                                        QueryPartToQuery (sub_part, only_build_primary_query,
                                                          term_list, query_part_hook,
                                                          out p_subq, out s_subq, out sub_hit_filter);
                                        if (p_subq != null)
                                                p_query.Add (p_subq, LNS.BooleanClause.Occur.SHOULD);
                                        if (s_subq != null)
                                                s_query.Add (s_subq, LNS.BooleanClause.Occur.SHOULD);
                                        if (sub_hit_filter != null) {
                                                if (or_hit_filter == null)
                                                        or_hit_filter = new OrHitFilter ();
                                                or_hit_filter.Add (sub_hit_filter);
                                        }
                                }

                                if (or_hit_filter != null)
                                        hit_filter = new HitFilter (or_hit_filter.HitFilter);

                                return;
                        }

                        if (abstract_part is QueryPart_Uri) {
                                QueryPart_Uri part = (QueryPart_Uri) abstract_part;

                                // Do a term query on the Uri field.
                                // This is probably less efficient that using a TermEnum;
                                // but this is required for the query API where the uri query
                                // can be part of a prohibited query or a boolean or query.
                                Term term;
                                term = new Term ("Uri", UriFu.UriToEscapedString (part.Uri));
                                if (term_list != null)
                                        term_list.Add (term);
                                primary_query = new LNS.TermQuery (term);

                                // Query only the primary index
                                return;
                        }

                        if (abstract_part is QueryPart_Property) {
                                QueryPart_Property part = (QueryPart_Property) abstract_part;

                                string field_name;
                                if (part.Key == QueryPart_Property.AllProperties)
                                        field_name = TypeToWildcardField (part.Type);
                                else
                                        field_name = PropertyToFieldName (part.Type, part.Key);

                                // Details of the conversion here depends on BeagrepAnalyzer::TokenStream
                                if (part.Type == PropertyType.Text)
                                        primary_query = StringToQuery (field_name, part.Value, term_list);
                                else {
                                        Term term;
                                        // FIXME: Handle date queries for other date fields
                                        if (part.Type == PropertyType.Internal || field_name.StartsWith ("prop:k:" + Property.PrivateNamespace))
                                                term = new Term (field_name, part.Value);
                                        else
                                                term = new Term (field_name, part.Value.ToLower ());
                                        if (term_list != null)
                                                term_list.Add (term);
                                        primary_query = new LNS.TermQuery (term);
                                }

                                // Properties can live in either index
                                if (! only_build_primary_query && primary_query != null)
                                        secondary_query = primary_query.Clone () as LNS.Query;

                                return;
                        }

                        throw new Exception ("Unhandled QueryPart type! " + abstract_part.ToString ());
                }