Exemplo n.º 1
0
        public string Visit_WithValidBoolQuery_ReturnsSuccess()
        {
            var luceneNetBoolQuery = new Lucene.Net.Search.BooleanQuery();

            luceneNetBoolQuery.Clauses.Add(
                new Lucene.Net.Search.BooleanClause(
                    new Lucene.Net.Search.WildcardQuery(
                        new Lucene.Net.Index.Term("*", "Lo*d?n")),
                    Lucene.Net.Search.Occur.MUST_NOT));

            var boolQuery = new LuceneBoolQuery
            {
                LuceneQuery = luceneNetBoolQuery,
            };

            var luceneVisitor = new LuceneVisitor();

            luceneVisitor.Visit(boolQuery);

            var es = boolQuery.ESQuery;

            Assert.NotNull(es);

            var visitor = new ElasticSearchDSLVisitor(SchemaRetrieverMock.CreateMockSchemaRetriever());

            visitor.Visit((BoolQuery)es);

            return(((BoolQuery)es).KustoQL);
        }
        /// <summary> Parses a query, searching on the fields specified. Use this if you need to
        /// specify certain fields as required, and others as prohibited.
        /// <p>
        /// <pre>
        /// Usage:
        /// <code>
        /// String[] fields = { &quot;filename&quot;, &quot;contents&quot;, &quot;description&quot; };
        /// int[] flags = { MultiFieldQueryParser.NORMAL_FIELD,
        /// MultiFieldQueryParser.REQUIRED_FIELD,
        /// MultiFieldQueryParser.PROHIBITED_FIELD, };
        /// parse(query, fields, flags, analyzer);
        /// </code>
        /// </pre>
        ///
        /// <p>
        /// The code above would construct a query:
        /// <pre>
        /// <code>
        /// (filename:query1) +(contents:query2) -(description:query3)
        /// </code>
        /// </pre>
        ///
        /// </summary>
        /// <param name="queries">Queries string to parse
        /// </param>
        /// <param name="fields">Fields to search on
        /// </param>
        /// <param name="flags">Flags describing the fields
        /// </param>
        /// <param name="analyzer">Analyzer to use
        /// </param>
        /// <throws>  ParseException if query parsing fails </throws>
        /// <throws>  TokenMgrError if query parsing fails </throws>
        /// <throws>  IllegalArgumentException if the length of the queries, fields, and flags array differ </throws>
        /// <deprecated> use {@link #Parse(String[], String[], BooleanClause.Occur[], Analyzer)} instead
        /// </deprecated>
        public static Query Parse(System.String[] queries, System.String[] fields, int[] flags, Analyzer analyzer)
        {
            if (!(queries.Length == fields.Length && queries.Length == flags.Length))
            {
                throw new System.ArgumentException("queries, fields, and flags array have have different length");
            }
            BooleanQuery bQuery = new BooleanQuery();

            for (int i = 0; i < fields.Length; i++)
            {
                QueryParser qp   = new QueryParser(fields[i], analyzer);
                Query       q    = qp.Parse(queries[i]);
                int         flag = flags[i];
                switch (flag)
                {
                case REQUIRED_FIELD:
                    bQuery.Add(q, BooleanClause.Occur.MUST);
                    break;

                case PROHIBITED_FIELD:
                    bQuery.Add(q, BooleanClause.Occur.MUST_NOT);
                    break;

                default:
                    bQuery.Add(q, BooleanClause.Occur.SHOULD);
                    break;
                }
            }
            return(bQuery);
        }
Exemplo n.º 3
0
        public override Query Rewrite(IndexReader reader)
        {
            Query orig = new RegexQuery(term).Rewrite(reader);

            // RegexQuery (via MultiTermQuery).rewrite always returns a BooleanQuery
            BooleanQuery bq = (BooleanQuery)orig;

            BooleanClause[] clauses = bq.GetClauses();
            SpanQuery[]     sqs     = new SpanQuery[clauses.Length];
            for (int i = 0; i < clauses.Length; i++)
            {
                BooleanClause clause = clauses[i];

                // Clauses from RegexQuery.rewrite are always TermQuery's
                TermQuery tq = (TermQuery)clause.GetQuery();

                sqs[i] = new SpanTermQuery(tq.GetTerm());
                sqs[i].SetBoost(tq.GetBoost());
            }

            SpanOrQuery query = new SpanOrQuery(sqs);

            query.SetBoost(orig.GetBoost());

            return(query);
        }
Exemplo n.º 4
0
        /// <summary> Add a clause to a boolean query.</summary>
        private static void  Add(BooleanQuery q, System.String k, System.String v, float boost)
        {
            Query tq = new TermQuery(new Term(k, v));

            tq.SetBoost(boost);
            q.Add(new BooleanClause(tq, BooleanClause.Occur.SHOULD));
        }
Exemplo n.º 5
0
        public void TestBooleanQuerySerialization()
        {
            Lucene.Net.Search.BooleanQuery lucQuery = new Lucene.Net.Search.BooleanQuery();

            lucQuery.Add(new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("field", "x")), Lucene.Net.Search.BooleanClause.Occur.MUST);

            System.Runtime.Serialization.Formatters.Binary.BinaryFormatter bf = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter();
            System.IO.MemoryStream ms = new System.IO.MemoryStream();
            bf.Serialize(ms, lucQuery);
            ms.Seek(0, System.IO.SeekOrigin.Begin);
            Lucene.Net.Search.BooleanQuery lucQuery2 = (Lucene.Net.Search.BooleanQuery)bf.Deserialize(ms);
            ms.Close();

            Assert.AreEqual(lucQuery, lucQuery2, "Error in serialization");

            Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir, true);

            int hitCount = searcher.Search(lucQuery, 20).totalHits;

            searcher.Close();
            searcher = new Lucene.Net.Search.IndexSearcher(dir, true);

            int hitCount2 = searcher.Search(lucQuery2, 20).totalHits;

            Assert.AreEqual(hitCount, hitCount2, "Error in serialization - different hit counts");
        }
Exemplo n.º 6
0
 public virtual void TestBooleanMust()
 {
     BooleanQuery expected = new BooleanQuery();
     expected.Add(new TermQuery(new Term("field", "foo")), BooleanClause.Occur.MUST);
     expected.Add(new TermQuery(new Term("field", "bar")), BooleanClause.Occur.MUST);
     QueryBuilder builder = new QueryBuilder(new MockAnalyzer(Random()));
     Assert.AreEqual(expected, builder.CreateBooleanQuery("field", "foo bar", BooleanClause.Occur.MUST));
 }
Exemplo n.º 7
0
        public virtual void  TestBoolean2()
        {
            BooleanQuery query = new BooleanQuery();

            query.Add(new TermQuery(new Term("field", "sevento")), Occur.MUST);
            query.Add(new TermQuery(new Term("field", "sevenly")), Occur.MUST);
            CheckHits(query, new int[] {});
        }
Exemplo n.º 8
0
        public virtual void  TestBoolean()
        {
            BooleanQuery query = new BooleanQuery();

            query.Add(new TermQuery(new Term("field", "seventy")), Occur.MUST);
            query.Add(new TermQuery(new Term("field", "seven")), Occur.MUST);
            CheckHits(query, new int[] { 77, 777, 177, 277, 377, 477, 577, 677, 770, 771, 772, 773, 774, 775, 776, 778, 779, 877, 977 });
        }
 /// <summary> extractTerms is currently the only query-independent means of introspecting queries but it only reveals
 /// a list of terms for that query - not the boosts each individual term in that query may or may not have.
 /// "Container" queries such as BooleanQuery should be unwrapped to get at the boost info held
 /// in each child element.
 /// Some discussion around this topic here:
 /// http://www.gossamer-threads.com/lists/lucene/java-dev/34208?search_string=introspection;#34208
 /// Unfortunately there seemed to be limited interest in requiring all Query objects to implement
 /// something common which would allow access to child queries so what follows here are query-specific
 /// implementations for accessing embedded query elements.
 /// </summary>
 private static void  GetTermsFromBooleanQuery(BooleanQuery query, System.Collections.Hashtable terms, bool prohibited, System.String fieldName)
 {
     BooleanClause[] queryClauses = query.GetClauses();
     for (int i = 0; i < queryClauses.Length; i++)
     {
         if (prohibited || queryClauses[i].GetOccur() != BooleanClause.Occur.MUST_NOT)
         {
             GetTerms(queryClauses[i].GetQuery(), terms, prohibited, fieldName);
         }
     }
 }
Exemplo n.º 10
0
        /// <summary> Parses a query which searches on the fields specified.
        /// If x fields are specified, this effectively constructs:
        ///
        /// <code>
        /// (field1:query) (field2:query) (field3:query)...(fieldx:query)
        /// </code>
        ///
        /// </summary>
        /// <param name="query">Query string to parse
        /// </param>
        /// <param name="fields">Fields to search on
        /// </param>
        /// <param name="analyzer">Analyzer to use
        /// </param>
        /// <throws>  ParseException if query parsing fails </throws>
        /// <throws>  TokenMgrError if query parsing fails </throws>
        /// <deprecated> use {@link #Parse(String)} instead but note that it
        /// returns a different query for queries where all terms are required:
        /// its query excepts all terms, no matter in what field they occur whereas
        /// the query built by this (deprecated) method expected all terms in all fields
        /// at the same time.
        /// </deprecated>
        public static Query Parse(System.String query, System.String[] fields, Analyzer analyzer)
        {
            BooleanQuery bQuery = new BooleanQuery();

            for (int i = 0; i < fields.Length; i++)
            {
                Query q = Parse(query, fields[i], analyzer);
                bQuery.Add(q, BooleanClause.Occur.SHOULD);
            }
            return(bQuery);
        }
Exemplo n.º 11
0
 public bool CheckDocExist(OfficeData officeData)
 {
     Lucene.Net.Search.Query        query1 = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("FileName", officeData.FileName));
     Lucene.Net.Search.Query        query2 = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("LastWriteTime", officeData.LastWriteTime));
     Lucene.Net.Search.BooleanQuery query3 = new Lucene.Net.Search.BooleanQuery();
     query3.Add(query1, Lucene.Net.Search.Occur.MUST);
     query3.Add(query2, Lucene.Net.Search.Occur.MUST);
     Lucene.Net.Search.TopDocs topDocs = searcher.Search(query3, 2);
     if (topDocs.TotalHits == 0)
     {
         return(false);
     }
     return(true);
 }
Exemplo n.º 12
0
        public virtual void  TestBooleanQuery()
        {
            BooleanQuery.SetMaxClauseCount(2);
            try
            {
                QueryParsers.QueryParser.Parse("one two three", "Field", new WhitespaceAnalyzer());
                Assert.Fail("ParseException expected due to too many boolean clauses");
            }
            catch (Lucene.Net.QueryParsers.ParseException expected)
            {
                // too many boolean clauses, so ParseException is expected
            }

            BooleanQuery.SetMaxClauseCount(originalMaxClauses);
        }
Exemplo n.º 13
0
		public virtual void  TestQueries()
		{
			QueryTest(new TermQuery(new Term("f1", "v1")));
			QueryTest(new TermQuery(new Term("f1", "v2")));
			QueryTest(new TermQuery(new Term("f2", "v1")));
			QueryTest(new TermQuery(new Term("f2", "v2")));
			QueryTest(new TermQuery(new Term("f3", "v1")));
			QueryTest(new TermQuery(new Term("f3", "v2")));
			QueryTest(new TermQuery(new Term("f4", "v1")));
			QueryTest(new TermQuery(new Term("f4", "v2")));
			
			BooleanQuery bq1 = new BooleanQuery();
			bq1.Add(new TermQuery(new Term("f1", "v1")), Occur.MUST);
			bq1.Add(new TermQuery(new Term("f4", "v1")), Occur.MUST);
			QueryTest(bq1);
		}
Exemplo n.º 14
0
        /// <summary> Parses a query, searching on the fields specified.
        /// Use this if you need to specify certain fields as required,
        /// and others as prohibited.
        /// <p><pre>
        /// Usage:
        /// <code>
        /// String[] fields = {"filename", "contents", "description"};
        /// BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
        /// BooleanClause.Occur.MUST,
        /// BooleanClause.Occur.MUST_NOT};
        /// MultiFieldQueryParser.parse("query", fields, flags, analyzer);
        /// </code>
        /// </pre>
        /// <p>
        /// The code above would construct a query:
        /// <pre>
        /// <code>
        /// (filename:query) +(contents:query) -(description:query)
        /// </code>
        /// </pre>
        ///
        /// </summary>
        /// <param name="query">Query string to parse
        /// </param>
        /// <param name="fields">Fields to search on
        /// </param>
        /// <param name="flags">Flags describing the fields
        /// </param>
        /// <param name="analyzer">Analyzer to use
        /// </param>
        /// <throws>  ParseException if query parsing fails </throws>
        /// <throws>  IllegalArgumentException if the length of the fields array differs </throws>
        /// <summary>  from the length of the flags array
        /// </summary>
        public static Query Parse(System.String query, System.String[] fields, BooleanClause.Occur[] flags, Analyzer analyzer)
        {
            if (fields.Length != flags.Length)
            {
                throw new System.ArgumentException("fields.length != flags.length");
            }
            BooleanQuery bQuery = new BooleanQuery();

            for (int i = 0; i < fields.Length; i++)
            {
                QueryParser qp = new QueryParser(fields[i], analyzer);
                Query       q  = qp.Parse(query);
                bQuery.Add(q, flags[i]);
            }
            return(bQuery);
        }
Exemplo n.º 15
0
        /// <summary> Parses a query, searching on the fields specified.
        /// Use this if you need to specify certain fields as required,
        /// and others as prohibited.
        /// <p><pre>
        /// Usage:
        /// <code>
        /// String[] query = {"query1", "query2", "query3"};
        /// String[] fields = {"filename", "contents", "description"};
        /// BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
        /// BooleanClause.Occur.MUST,
        /// BooleanClause.Occur.MUST_NOT};
        /// MultiFieldQueryParser.parse(query, fields, flags, analyzer);
        /// </code>
        /// </pre>
        /// <p>
        /// The code above would construct a query:
        /// <pre>
        /// <code>
        /// (filename:query1) +(contents:query2) -(description:query3)
        /// </code>
        /// </pre>
        ///
        /// </summary>
        /// <param name="queries">Queries string to parse
        /// </param>
        /// <param name="fields">Fields to search on
        /// </param>
        /// <param name="flags">Flags describing the fields
        /// </param>
        /// <param name="analyzer">Analyzer to use
        /// </param>
        /// <throws>  ParseException if query parsing fails </throws>
        /// <throws>  IllegalArgumentException if the length of the queries, fields, </throws>
        /// <summary>  and flags array differ
        /// </summary>
        public static Query Parse(System.String[] queries, System.String[] fields, BooleanClause.Occur[] flags, Analyzer analyzer)
        {
            if (!(queries.Length == fields.Length && queries.Length == flags.Length))
            {
                throw new System.ArgumentException("queries, fields, and flags array have have different length");
            }
            BooleanQuery bQuery = new BooleanQuery();

            for (int i = 0; i < fields.Length; i++)
            {
                QueryParser qp = new QueryParser(fields[i], analyzer);
                Query       q  = qp.Parse(queries[i]);
                bQuery.Add(q, flags[i]);
            }
            return(bQuery);
        }
Exemplo n.º 16
0
        public virtual void  TestQueries()
        {
            QueryTest(new TermQuery(new Term("f1", "v1")));
            QueryTest(new TermQuery(new Term("f1", "v2")));
            QueryTest(new TermQuery(new Term("f2", "v1")));
            QueryTest(new TermQuery(new Term("f2", "v2")));
            QueryTest(new TermQuery(new Term("f3", "v1")));
            QueryTest(new TermQuery(new Term("f3", "v2")));
            QueryTest(new TermQuery(new Term("f4", "v1")));
            QueryTest(new TermQuery(new Term("f4", "v2")));

            BooleanQuery bq1 = new BooleanQuery();

            bq1.Add(new TermQuery(new Term("f1", "v1")), Occur.MUST);
            bq1.Add(new TermQuery(new Term("f4", "v1")), Occur.MUST);
            QueryTest(bq1);
        }
        public virtual void  TestNoPayload()
        {
            BoostingTermQuery q1    = new BoostingTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "zero"));
            BoostingTermQuery q2    = new BoostingTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "foo"));
            BooleanClause     c1    = new BooleanClause(q1, BooleanClause.Occur.MUST);
            BooleanClause     c2    = new BooleanClause(q2, BooleanClause.Occur.MUST_NOT);
            BooleanQuery      query = new BooleanQuery();

            query.Add(c1);
            query.Add(c2);
            TopDocs hits = searcher.Search(query, null, 100);

            Assert.IsTrue(hits != null, "hits is null and it shouldn't be");
            Assert.IsTrue(hits.TotalHits == 1, "hits Size: " + hits.TotalHits + " is not: " + 1);
            int[] results = new int[1];
            results[0] = 0;             //hits.scoreDocs[0].doc;
            CheckHits.CheckHitCollector(query, PayloadHelper.NO_PAYLOAD_FIELD, searcher, results);
        }
Exemplo n.º 18
0
        public void Test_Util_Parameter()
        {
            Lucene.Net.Search.BooleanQuery queryPreSerialized = new Lucene.Net.Search.BooleanQuery();
            queryPreSerialized.Add(new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("country", "Russia")), Lucene.Net.Search.BooleanClause.Occur.MUST);
            queryPreSerialized.Add(new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("country", "France")), Lucene.Net.Search.BooleanClause.Occur.MUST);

            //now serialize it
            System.Runtime.Serialization.Formatters.Binary.BinaryFormatter serializer = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter();
            System.IO.MemoryStream memoryStream = new System.IO.MemoryStream();
            serializer.Serialize(memoryStream, queryPreSerialized);

            //now deserialize
            memoryStream.Seek(0, System.IO.SeekOrigin.Begin);
            Lucene.Net.Search.BooleanQuery queryPostSerialized = (Lucene.Net.Search.BooleanQuery)serializer.Deserialize(memoryStream);

            memoryStream.Close();

            Assert.AreEqual(queryPreSerialized, queryPostSerialized, "See the issue: LUCENENET-170");
        }
Exemplo n.º 19
0
        /// <summary> Parses a query, searching on the fields specified. Use this if you need
        /// to specify certain fields as required, and others as prohibited.
        /// <p/>
        ///
        /// <pre>
        /// Usage:
        /// &lt;code&gt;
        /// String[] query = {&quot;query1&quot;, &quot;query2&quot;, &quot;query3&quot;};
        /// String[] fields = {&quot;filename&quot;, &quot;contents&quot;, &quot;description&quot;};
        /// BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
        /// BooleanClause.Occur.MUST,
        /// BooleanClause.Occur.MUST_NOT};
        /// MultiFieldQueryParser.parse(query, fields, flags, analyzer);
        /// &lt;/code&gt;
        /// </pre>
        /// <p/>
        /// The code above would construct a query:
        ///
        /// <pre>
        /// &lt;code&gt;
        /// (filename:query1) +(contents:query2) -(description:query3)
        /// &lt;/code&gt;
        /// </pre>
        ///
        /// </summary>
        /// <param name="matchVersion">Lucene version to match; this is passed through to
        /// QueryParser.
        /// </param>
        /// <param name="queries">Queries string to parse
        /// </param>
        /// <param name="fields">Fields to search on
        /// </param>
        /// <param name="flags">Flags describing the fields
        /// </param>
        /// <param name="analyzer">Analyzer to use
        /// </param>
        /// <throws>  ParseException </throws>
        /// <summary>             if query parsing fails
        /// </summary>
        /// <throws>  IllegalArgumentException </throws>
        /// <summary>             if the length of the queries, fields, and flags array differ
        /// </summary>
        public static Query Parse(Version matchVersion, System.String[] queries, System.String[] fields, BooleanClause.Occur[] flags, Analyzer analyzer)
        {
            if (!(queries.Length == fields.Length && queries.Length == flags.Length))
            {
                throw new System.ArgumentException("queries, fields, and flags array have have different length");
            }
            BooleanQuery bQuery = new BooleanQuery();

            for (int i = 0; i < fields.Length; i++)
            {
                QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer);
                Query       q  = qp.Parse(queries[i]);
                if (q != null && (!(q is BooleanQuery) || ((BooleanQuery)q).GetClauses().Length > 0))
                {
                    bQuery.Add(q, flags[i]);
                }
            }
            return(bQuery);
        }
Exemplo n.º 20
0
        /// <summary> Parses a query which searches on the fields specified.
        /// <p>
        /// If x fields are specified, this effectively constructs:
        /// <pre>
        /// <code>
        /// (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
        /// </code>
        /// </pre>
        /// </summary>
        /// <param name="queries">Queries strings to parse
        /// </param>
        /// <param name="fields">Fields to search on
        /// </param>
        /// <param name="analyzer">Analyzer to use
        /// </param>
        /// <throws>  ParseException if query parsing fails </throws>
        /// <throws>  IllegalArgumentException if the length of the queries array differs </throws>
        /// <summary>  from the length of the fields array
        /// </summary>
        public static Query Parse(System.String[] queries, System.String[] fields, Analyzer analyzer)
        {
            if (queries.Length != fields.Length)
            {
                throw new System.ArgumentException("queries.length != fields.length");
            }
            BooleanQuery bQuery = new BooleanQuery();

            for (int i = 0; i < fields.Length; i++)
            {
                QueryParser qp = new QueryParser(fields[i], analyzer);
                Query       q  = qp.Parse(queries[i]);
                if (q != null && (!(q is BooleanQuery) || ((BooleanQuery)q).GetClauses().Length > 0))
                {
                    bQuery.Add(q, BooleanClause.Occur.SHOULD);
                }
            }
            return(bQuery);
        }
Exemplo n.º 21
0
        /// <summary> Create the More like query from a PriorityQueue</summary>
        private Query CreateQuery(PriorityQueue q)
        {
            BooleanQuery query = new BooleanQuery();

            System.Object cur;
            int           qterms    = 0;
            float         bestScore = 0;

            while (((cur = q.Pop()) != null))
            {
                System.Object[] ar = (System.Object[])cur;
                TermQuery       tq = new TermQuery(new Term((System.String)ar[1], (System.String)ar[0]));

                if (boost)
                {
                    if (qterms == 0)
                    {
                        bestScore = (float)((System.Single)ar[2]);
                    }
                    float myScore = (float)((System.Single)ar[2]);

                    tq.SetBoost(myScore / bestScore);
                }

                try
                {
                    query.Add(tq, BooleanClause.Occur.SHOULD);
                }
                catch (BooleanQuery.TooManyClauses)
                {
                    break;
                }

                qterms++;
                if (maxQueryTerms > 0 && qterms >= maxQueryTerms)
                {
                    break;
                }
            }

            return(query);
        }
Exemplo n.º 22
0
        /// <summary> Simple similarity query generators.
        /// Takes every unique word and forms a boolean query where all words are optional.
        /// After you get this you'll use to to query your <see cref="IndexSearcher"/> for similar docs.
        /// The only caveat is the first hit returned <b>should be</b> your source document - you'll
        /// need to then ignore that.
        ///
        /// <p/>
        ///
        /// So, if you have a code fragment like this:
        /// <br/>
        /// <code>
        /// Query q = formSimilaryQuery( "I use Lucene to search fast. Fast searchers are good", new StandardAnalyzer(), "contents", null);
        /// </code>
        ///
        /// <p/>
        /// The query returned, in string form, will be <c>'(i use lucene to search fast searchers are good')</c>.
        ///
        /// <p/>
        /// The philosophy behind this method is "two documents are similar if they share lots of words".
        /// Note that behind the scenes, Lucenes scoring algorithm will tend to give two documents a higher similarity score if the share more uncommon words.
        ///
        /// <P/>
        /// This method is fail-safe in that if a long 'body' is passed in and
        /// <see cref="BooleanQuery.Add(BooleanClause)"/> (used internally)
        /// throws
        /// <see cref="BooleanQuery.TooManyClauses"/>, the
        /// query as it is will be returned.
        ///
        ///
        ///
        ///
        ///
        /// </summary>
        /// <param name="body">the body of the document you want to find similar documents to
        /// </param>
        /// <param name="a">the analyzer to use to parse the body
        /// </param>
        /// <param name="field">the field you want to search on, probably something like "contents" or "body"
        /// </param>
        /// <param name="stop">optional set of stop words to ignore
        /// </param>
        /// <returns> a query with all unique words in 'body'
        /// </returns>
        /// <throws>  IOException this can't happen... </throws>
        public static Query FormSimilarQuery(System.String body, Analyzer a, System.String field, System.Collections.Hashtable stop)
        {
            TokenStream ts = a.TokenStream(field, new System.IO.StringReader(body));

            Lucene.Net.Analysis.Token t;
            BooleanQuery tmp = new BooleanQuery();

            System.Collections.Hashtable already = new System.Collections.Hashtable();             // ignore dups
            while ((t = ts.Next()) != null)
            {
                System.String word = t.TermText();
                // ignore opt stop words
                if (stop != null && stop.Contains(word))
                {
                    continue;
                }
                // ignore dups
                if (already.Contains(word) == true)
                {
                    continue;
                }
                already.Add(word, word);
                // add to query
                TermQuery tq = new TermQuery(new Term(field, word));
                try
                {
                    tmp.Add(tq, BooleanClause.Occur.SHOULD);                     //false, false);
                }
                catch (BooleanQuery.TooManyClauses)
                {
                    // fail-safe, just return what we have, not the end of the world
                    break;
                }
            }
            return(tmp);
        }
Exemplo n.º 23
0
		public virtual void  TestNoPayload()
		{
			BoostingTermQuery q1 = new BoostingTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "zero"));
			BoostingTermQuery q2 = new BoostingTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "foo"));
			BooleanClause c1 = new BooleanClause(q1, BooleanClause.Occur.MUST);
			BooleanClause c2 = new BooleanClause(q2, BooleanClause.Occur.MUST_NOT);
			BooleanQuery query = new BooleanQuery();
			query.Add(c1);
			query.Add(c2);
			TopDocs hits = searcher.Search(query, null, 100);
			Assert.IsTrue(hits != null, "hits is null and it shouldn't be");
			Assert.IsTrue(hits.totalHits == 1, "hits Size: " + hits.totalHits + " is not: " + 1);
			int[] results = new int[1];
			results[0] = 0; //hits.scoreDocs[0].doc;
			CheckHits.CheckHitCollector(query, PayloadHelper.NO_PAYLOAD_FIELD, searcher, results);
		}
Exemplo n.º 24
0
		/// <summary> Parses a query, searching on the fields specified.
		/// Use this if you need to specify certain fields as required,
		/// and others as prohibited.
		/// <p><pre>
		/// Usage:
		/// <code>
		/// String[] fields = {"filename", "contents", "description"};
		/// BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
		/// BooleanClause.Occur.MUST,
		/// BooleanClause.Occur.MUST_NOT};
		/// MultiFieldQueryParser.parse("query", fields, flags, analyzer);
		/// </code>
		/// </pre>
		/// <p>
		/// The code above would construct a query:
		/// <pre>
		/// <code>
		/// (filename:query) +(contents:query) -(description:query)
		/// </code>
		/// </pre>
		/// 
		/// </summary>
		/// <param name="query">Query string to parse
		/// </param>
		/// <param name="fields">Fields to search on
		/// </param>
		/// <param name="flags">Flags describing the fields
		/// </param>
		/// <param name="analyzer">Analyzer to use
		/// </param>
		/// <throws>  ParseException if query parsing fails </throws>
		/// <throws>  TokenMgrError if query parsing fails </throws>
		/// <throws>  IllegalArgumentException if the length of the fields array differs </throws>
		/// <summary>  from the length of the flags array
		/// </summary>
		public static Query Parse(System.String query, System.String[] fields, BooleanClause.Occur[] flags, Analyzer analyzer)
		{
			if (fields.Length != flags.Length)
				throw new System.ArgumentException("fields.length != flags.length");
			BooleanQuery bQuery = new BooleanQuery();
			for (int i = 0; i < fields.Length; i++)
			{
				QueryParser qp = new QueryParser(fields[i], analyzer);
				Query q = qp.Parse(query);
				bQuery.Add(q, flags[i]);
			}
			return bQuery;
		}
Exemplo n.º 25
0
        public string GetStudentsByYearIdAndTimesIdAndSchoolIdAndStudentName(string schoolYear, string times, string schoolId, string StudentName, string pIndex)
        {
            string    result          = string.Empty;
            int       pageIndex       = Int32.Parse(pIndex);
            ArrayList students        = new ArrayList();
            string    pathOfIndexFile = Server.MapPath(System.Configuration.ConfigurationManager.AppSettings["StudentIndexing"].ToString());

            if (Int32.Parse(schoolYear) >= 2000)
            {
                pathOfIndexFile += "\\" + schoolYear + "\\Index";
            }

            string studentName = StudentName.Replace("\"", "");

            studentName = "\"" + studentName + "\"";

            Lucene.Net.Search.IndexSearcher iSearcher = new Lucene.Net.Search.IndexSearcher(pathOfIndexFile);

            Lucene.Net.QueryParsers.QueryParser qYearParser = new Lucene.Net.QueryParsers.QueryParser("YearId", new Lucene.Net.Analysis.Standard.StandardAnalyzer());
            Lucene.Net.Search.Query             iYearQuery  = qYearParser.Parse(schoolYear);

            Lucene.Net.QueryParsers.QueryParser qTestDayParser = new Lucene.Net.QueryParsers.QueryParser("TestDayId", new Lucene.Net.Analysis.Standard.StandardAnalyzer());
            Lucene.Net.Search.Query             iTestDayQuery  = qTestDayParser.Parse(times);

            Lucene.Net.QueryParsers.QueryParser qStudentIdParser = new Lucene.Net.QueryParsers.QueryParser("StudentID", new Lucene.Net.Analysis.Standard.StandardAnalyzer());
            Lucene.Net.Search.Query             iStudentIdQuery  = qStudentIdParser.Parse("1");

            //////////////////////////////////////////////////////////////////////
            Lucene.Net.Search.BooleanQuery bQuery = new Lucene.Net.Search.BooleanQuery();
            bQuery.Add(iYearQuery, Lucene.Net.Search.BooleanClause.Occur.MUST);
            bQuery.Add(iTestDayQuery, Lucene.Net.Search.BooleanClause.Occur.MUST);


            if (StudentName != " " && StudentName != "")
            {
                Lucene.Net.QueryParsers.QueryParser qStudentParser = new Lucene.Net.QueryParsers.QueryParser("StudentName", new Lucene.Net.Analysis.Standard.StandardAnalyzer());
                Lucene.Net.Search.Query             iStudentQuery  = qStudentParser.Parse(studentName);
                bQuery.Add(iStudentQuery, Lucene.Net.Search.BooleanClause.Occur.MUST);
            }

            Lucene.Net.Search.Hits iHits = iSearcher.Search(bQuery);

            using (System.Data.SqlClient.SqlConnection con = new System.Data.SqlClient.SqlConnection(System.Configuration.ConfigurationManager.ConnectionStrings["PSCPortalConnectionString"].ConnectionString))
            {
                con.Open();

                //paging
                for (int i = pageIndex * 20 - 20; i < pageIndex * 20 && i < iHits.Length(); i++)
                {
                    string yId       = iHits.Doc(i).Get("YearId");
                    string stuId     = iHits.Doc(i).Get("StudentID");
                    string testDayId = iHits.Doc(i).Get("TestDayId");

                    System.Data.SqlClient.SqlCommand com = new System.Data.SqlClient.SqlCommand();
                    com.Connection  = con;
                    com.CommandType = CommandType.Text;
                    com.CommandText = @"   select StudentTHPT.TotalMark,[RoundTotalMark],StudentTHPT.YearId,StudentTHPT.TestDayId,StudentId,FirstName+' '+MiddleName+' '+LastName as FullName,Sex,Birthday,MarkEncourage,Section.Name from StudentTHPT inner join Section on StudentTHPT.SectionId = Section.SectionId 
			                                    where StudentTHPT.YearId=@yearId and StudentTHPT.TestDayId=@timeId and StudentId = @studentId
                                           Order by LastName
	                                   "    ;
                    com.Parameters.Add("@yearId", SqlDbType.NChar);
                    com.Parameters["@yearId"].Value = yId;

                    com.Parameters.Add("@timeId", SqlDbType.NVarChar);
                    com.Parameters["@timeId"].Value = testDayId;

                    com.Parameters.Add("@studentId", SqlDbType.NVarChar);
                    com.Parameters["@studentId"].Value = stuId;

                    using (System.Data.SqlClient.SqlDataReader reader = com.ExecuteReader())
                    {
                        while (reader.Read())
                        {
                            string fullName  = reader["FullName"].ToString();
                            string birthday  = reader["Birthday"].ToString().Trim();
                            string studentId = reader["StudentId"].ToString();
                            string total     = iHits.Length().ToString();
                            //    string markEncourage = reader["MarkEncourage"].ToString();
                            string  totalMark      = reader["TotalMark"].ToString();
                            string  section        = reader["Name"].ToString();
                            string  roundTotalMark = reader["RoundTotalMark"].ToString();
                            Student s = new Student {
                                StudentId = studentId, FullName = fullName, Birthday = birthday, Total = total, Section = section, TotalMark = totalMark, RoundTotalMark = roundTotalMark
                            };
                            students.Add(s);
                        }
                    }
                }
            }
            iSearcher.Close();

            System.Web.Script.Serialization.JavaScriptSerializer serialize = new System.Web.Script.Serialization.JavaScriptSerializer();

            result = serialize.Serialize(students);
            return(result);
        }
Exemplo n.º 26
0
		/// <summary> extractTerms is currently the only query-independent means of introspecting queries but it only reveals
		/// a list of terms for that query - not the boosts each individual term in that query may or may not have.
		/// "Container" queries such as BooleanQuery should be unwrapped to get at the boost info held
		/// in each child element. 
		/// Some discussion around this topic here:
		/// http://www.gossamer-threads.com/lists/lucene/java-dev/34208?search_string=introspection;#34208
		/// Unfortunately there seemed to be limited interest in requiring all Query objects to implement
		/// something common which would allow access to child queries so what follows here are query-specific
		/// implementations for accessing embedded query elements. 
		/// </summary>
		private static void  GetTermsFromBooleanQuery(BooleanQuery query, System.Collections.Hashtable terms, bool prohibited, System.String fieldName)
		{
			BooleanClause[] queryClauses = query.GetClauses();
			for (int i = 0; i < queryClauses.Length; i++)
			{
				if (prohibited || queryClauses[i].GetOccur() != BooleanClause.Occur.MUST_NOT)
					GetTerms(queryClauses[i].GetQuery(), terms, prohibited, fieldName);
			}
		}
Exemplo n.º 27
0
        public void Test_Util_Parameter()
        {
            Lucene.Net.Search.BooleanQuery queryPreSerialized = new Lucene.Net.Search.BooleanQuery();
            queryPreSerialized.Add(new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("country", "Russia")), Lucene.Net.Search.BooleanClause.Occur.MUST);
            queryPreSerialized.Add(new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("country", "France")), Lucene.Net.Search.BooleanClause.Occur.MUST);

            //now serialize it 
            System.Runtime.Serialization.Formatters.Binary.BinaryFormatter serializer = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter();
            System.IO.MemoryStream memoryStream = new System.IO.MemoryStream();
            serializer.Serialize(memoryStream, queryPreSerialized);

            //now deserialize 
            memoryStream.Seek(0, System.IO.SeekOrigin.Begin);
            Lucene.Net.Search.BooleanQuery queryPostSerialized = (Lucene.Net.Search.BooleanQuery)serializer.Deserialize(memoryStream);

            memoryStream.Close();

            Assert.AreEqual(queryPreSerialized, queryPostSerialized, "See the issue: LUCENENET-170");
        }
Exemplo n.º 28
0
        public virtual void TestBasic()
        {
            Directory dir = NewDirectory();
            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(2).SetSimilarity(new SimpleSimilarity()).SetMergePolicy(NewLogMergePolicy(2)));

            StringBuilder sb = new StringBuilder(265);
            string term = "term";
            for (int i = 0; i < 30; i++)
            {
                Document doc = new Document();
                sb.Append(term).Append(" ");
                string content = sb.ToString();
                Field noTf = NewField("noTf", content + (i % 2 == 0 ? "" : " notf"), OmitType);
                doc.Add(noTf);

                Field tf = NewField("tf", content + (i % 2 == 0 ? " tf" : ""), NormalType);
                doc.Add(tf);

                writer.AddDocument(doc);
                //System.out.println(d);
            }

            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            /*
             * Verify the index
             */
            IndexReader reader = DirectoryReader.Open(dir);
            IndexSearcher searcher = NewSearcher(reader);
            searcher.Similarity = new SimpleSimilarity();

            Term a = new Term("noTf", term);
            Term b = new Term("tf", term);
            Term c = new Term("noTf", "notf");
            Term d = new Term("tf", "tf");
            TermQuery q1 = new TermQuery(a);
            TermQuery q2 = new TermQuery(b);
            TermQuery q3 = new TermQuery(c);
            TermQuery q4 = new TermQuery(d);

            PhraseQuery pq = new PhraseQuery();
            pq.Add(a);
            pq.Add(c);
            try
            {
                searcher.Search(pq, 10);
                Assert.Fail("did not hit expected exception");
            }
            catch (Exception e)
            {
                Exception cause = e;
                // If the searcher uses an executor service, the IAE is wrapped into other exceptions
                while (cause.InnerException != null)
                {
                    cause = cause.InnerException;
                }
                if (!(cause is InvalidOperationException))
                {
                    throw new InvalidOperationException("Expected an IAE", e);
                } // else OK because positions are not indexed
            }

            searcher.Search(q1, new CountingHitCollectorAnonymousInnerClassHelper(this));
            //System.out.println(CountingHitCollector.getCount());

            searcher.Search(q2, new CountingHitCollectorAnonymousInnerClassHelper2(this));
            //System.out.println(CountingHitCollector.getCount());

            searcher.Search(q3, new CountingHitCollectorAnonymousInnerClassHelper3(this));
            //System.out.println(CountingHitCollector.getCount());

            searcher.Search(q4, new CountingHitCollectorAnonymousInnerClassHelper4(this));
            //System.out.println(CountingHitCollector.getCount());

            BooleanQuery bq = new BooleanQuery();
            bq.Add(q1, Occur.MUST);
            bq.Add(q4, Occur.MUST);

            searcher.Search(bq, new CountingHitCollectorAnonymousInnerClassHelper5(this));
            Assert.AreEqual(15, CountingHitCollector.Count);

            reader.Dispose();
            dir.Dispose();
        }
Exemplo n.º 29
0
 public virtual void  TestBoolean()
 {
     BooleanQuery query = new BooleanQuery();
     query.Add(new TermQuery(new Term("field", "seventy")), Occur.MUST);
     query.Add(new TermQuery(new Term("field", "seven")), Occur.MUST);
     CheckHits(query, new int[]{77, 777, 177, 277, 377, 477, 577, 677, 770, 771, 772, 773, 774, 775, 776, 778, 779, 877, 977});
 }
Exemplo n.º 30
0
        public static Lucene.Net.Search.Query ConvertQueryToLuceneQuery(Query query)
        {
            if (query == null)
            {
                throw new ArgumentNullException("query");
            }

            Lucene.Net.Search.Query lQuery;

            if (query is MatchAllDocsQuery)
            {
                var lMatchAllDocsQuery = new Lucene.Net.Search.MatchAllDocsQuery();
                lQuery = lMatchAllDocsQuery;
            }
            else if (query is TermQuery)
            {
                var termQuery = query as TermQuery;
                var term      = Term.ConvertToLuceneTerm(termQuery.Term);

                lQuery = new Lucene.Net.Search.TermQuery(term);
            }
            else if (query is TermRangeQuery)
            {
                var termRangeQuery  = query as TermRangeQuery;
                var lTermRangeQuery = new Lucene.Net.Search.TermRangeQuery(termRangeQuery.FieldName,
                                                                           termRangeQuery.LowerTerm,
                                                                           termRangeQuery.UpperTerm,
                                                                           termRangeQuery.LowerInclusive,
                                                                           termRangeQuery.UpperInclusive);

                lQuery = lTermRangeQuery;
            }
            else if (query is PhraseQuery)
            {
                var phraseQuery  = query as PhraseQuery;
                var lPhraseQuery = new Lucene.Net.Search.PhraseQuery();
                foreach (var term in phraseQuery.Terms)
                {
                    var lTerm = Term.ConvertToLuceneTerm(term);
                    lPhraseQuery.Add(lTerm);
                }

                if (phraseQuery.Slop.HasValue)
                {
                    lPhraseQuery.Slop = phraseQuery.Slop.Value;
                }

                lQuery = lPhraseQuery;
            }
            else if (query is PrefixQuery)
            {
                var prefixQuery  = query as PrefixQuery;
                var term         = Term.ConvertToLuceneTerm(prefixQuery.Term);
                var lPrefixQuery = new Lucene.Net.Search.PrefixQuery(term);

                lQuery = lPrefixQuery;
            }
            else if (query is RegexQuery)
            {
                var regexQuery  = query as RegexQuery;
                var term        = Term.ConvertToLuceneTerm(regexQuery.Term);
                var lRegexQuery = new Contrib.Regex.RegexQuery(term);

                lQuery = lRegexQuery;
            }
            else if (query is FuzzyQuery)
            {
                var fuzzyQuery  = query as FuzzyQuery;
                var term        = Term.ConvertToLuceneTerm(fuzzyQuery.Term);
                var lFuzzyQuery = new Lucene.Net.Search.FuzzyQuery(term);

                lQuery = lFuzzyQuery;
            }
            else if (query is BooleanQuery)
            {
                var booleanQuery  = query as BooleanQuery;
                var lBooleanQuery = new Lucene.Net.Search.BooleanQuery();
                foreach (var clause in booleanQuery.Clauses)
                {
                    var lNestedQuery = Query.ConvertQueryToLuceneQuery(clause.Query);
                    Lucene.Net.Search.Occur lOccur;
                    switch (clause.Occur)
                    {
                    case Occur.Must:
                        lOccur = Lucene.Net.Search.Occur.MUST;
                        break;

                    case Occur.MustNot:
                        lOccur = Lucene.Net.Search.Occur.MUST_NOT;
                        break;

                    case Occur.Should:
                        lOccur = Lucene.Net.Search.Occur.SHOULD;
                        break;

                    default:
                        throw new InvalidOperationException("Occur not implemented or defined.");
                    }

                    var lClause = new Lucene.Net.Search.BooleanClause(lNestedQuery, lOccur);
                    lBooleanQuery.Add(lClause);
                }

                if (booleanQuery.MinimumNumberShouldMatch.HasValue)
                {
                    lBooleanQuery.MinimumNumberShouldMatch = booleanQuery.MinimumNumberShouldMatch.Value;
                }

                lQuery = lBooleanQuery;
            }
            else if (query is WildcardQuery)
            {
                var wildcardQuery  = query as WildcardQuery;
                var lTerm          = Term.ConvertToLuceneTerm(wildcardQuery.Term);
                var lWildcardQuery = new Lucene.Net.Search.WildcardQuery(lTerm);

                lQuery = lWildcardQuery;
            }
            else if (query is DoubleNumericRangeQuery)
            {
                var doubleNumericRangeQuery = query as DoubleNumericRangeQuery;

                var ldoubleNumericRangeQuery = Lucene.Net.Search.NumericRangeQuery.NewDoubleRange(
                    doubleNumericRangeQuery.FieldName,
                    doubleNumericRangeQuery.Min,
                    doubleNumericRangeQuery.Max,
                    doubleNumericRangeQuery.MinInclusive,
                    doubleNumericRangeQuery.MaxInclusive);

                lQuery = ldoubleNumericRangeQuery;
            }
            else if (query is FloatNumericRangeQuery)
            {
                var floatNumericRangeQuery = query as FloatNumericRangeQuery;

                var lfloatNumericRangeQuery = Lucene.Net.Search.NumericRangeQuery.NewFloatRange(
                    floatNumericRangeQuery.FieldName,
                    floatNumericRangeQuery.Min,
                    floatNumericRangeQuery.Max,
                    floatNumericRangeQuery.MinInclusive,
                    floatNumericRangeQuery.MaxInclusive);

                lQuery = lfloatNumericRangeQuery;
            }
            else if (query is IntNumericRangeQuery)
            {
                var intNumericRangeQuery = query as IntNumericRangeQuery;

                var lintNumericRangeQuery = Lucene.Net.Search.NumericRangeQuery.NewIntRange(
                    intNumericRangeQuery.FieldName,
                    intNumericRangeQuery.Min,
                    intNumericRangeQuery.Max,
                    intNumericRangeQuery.MinInclusive,
                    intNumericRangeQuery.MaxInclusive);

                lQuery = lintNumericRangeQuery;
            }
            else if (query is LongNumericRangeQuery)
            {
                var longNumericRangeQuery = query as LongNumericRangeQuery;

                var llongNumericRangeQuery = Lucene.Net.Search.NumericRangeQuery.NewLongRange(
                    longNumericRangeQuery.FieldName,
                    longNumericRangeQuery.Min,
                    longNumericRangeQuery.Max,
                    longNumericRangeQuery.MinInclusive,
                    longNumericRangeQuery.MaxInclusive);

                lQuery = llongNumericRangeQuery;
            }
            else if (query is QueryParserQuery)
            {
                var queryParserQuery = query as QueryParserQuery;

                var queryParser = new Lucene.Net.QueryParsers.QueryParser(Version.LUCENE_30,
                                                                          queryParserQuery.DefaultField,
                                                                          new StandardAnalyzer(Version.LUCENE_30))
                {
                    AllowLeadingWildcard =
                        queryParserQuery.AllowLeadingWildcard
                };

                lQuery = queryParser.Parse(queryParserQuery.Query);
            }
            else if (query is MultiFieldQueryParserQuery)
            {
                var multiFieldQueryParserQuery = query as MultiFieldQueryParserQuery;

                if (multiFieldQueryParserQuery.FieldNames == null)
                {
                    multiFieldQueryParserQuery.FieldNames = new List <string>();
                }

                var queryParser = new Lucene.Net.QueryParsers.MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, multiFieldQueryParserQuery.FieldNames.ToArray(), new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30));

                lQuery = queryParser.Parse(multiFieldQueryParserQuery.Query);
            }
            else
            {
                throw new ArgumentException(@"Unknown or invalid query object", "query");
            }

            if (query.Boost.HasValue)
            {
                lQuery.Boost = query.Boost.Value;
            }

            return(lQuery);
        }
Exemplo n.º 31
0
 public virtual void  TestBoolean2()
 {
     BooleanQuery query = new BooleanQuery();
     query.Add(new TermQuery(new Term("field", "sevento")), Occur.MUST);
     query.Add(new TermQuery(new Term("field", "sevenly")), Occur.MUST);
     CheckHits(query, new int[]{});
 }
Exemplo n.º 32
0
        private static void GetTermsFromBooleanQuery(BooleanQuery query, HashSetSupport terms, bool prohibited, string fieldName)
        {
            BooleanClause[] queryClauses = query.GetClauses();
            int i;

            for (i = 0; i < queryClauses.Length; i++)
            {
                if (prohibited || !queryClauses[i].prohibited)
                    GetTerms(queryClauses[i].query, terms, prohibited, fieldName);
            }
        }
Exemplo n.º 33
0
		public virtual void  TestBasic()
		{
			Directory dir = new MockRAMDirectory();
			Analyzer analyzer = new StandardAnalyzer();
			IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
			writer.SetMergeFactor(2);
			writer.SetMaxBufferedDocs(2);
			writer.SetSimilarity(new SimpleSimilarity());
			
			
			System.Text.StringBuilder sb = new System.Text.StringBuilder(265);
			System.String term = "term";
			for (int i = 0; i < 30; i++)
			{
				Document d = new Document();
				sb.Append(term).Append(" ");
				System.String content = sb.ToString();
				Field noTf = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED);
				noTf.SetOmitTermFreqAndPositions(true);
				d.Add(noTf);
				
				Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED);
				d.Add(tf);
				
				writer.AddDocument(d);
				//System.out.println(d);
			}
			
			writer.Optimize();
			// flush
			writer.Close();
			_TestUtil.CheckIndex(dir);
			
			/*
			* Verify the index
			*/
			Searcher searcher = new IndexSearcher(dir);
			searcher.SetSimilarity(new SimpleSimilarity());
			
			Term a = new Term("noTf", term);
			Term b = new Term("tf", term);
			Term c = new Term("noTf", "notf");
			Term d2 = new Term("tf", "tf");
			TermQuery q1 = new TermQuery(a);
			TermQuery q2 = new TermQuery(b);
			TermQuery q3 = new TermQuery(c);
			TermQuery q4 = new TermQuery(d2);
			
			
			searcher.Search(q1, new AnonymousClassCountingHitCollector(this));
			//System.out.println(CountingHitCollector.getCount());
			
			
			searcher.Search(q2, new AnonymousClassCountingHitCollector1(this));
			//System.out.println(CountingHitCollector.getCount());
			
			
			
			
			
			searcher.Search(q3, new AnonymousClassCountingHitCollector2(this));
			//System.out.println(CountingHitCollector.getCount());
			
			
			searcher.Search(q4, new AnonymousClassCountingHitCollector3(this));
			//System.out.println(CountingHitCollector.getCount());
			
			
			
			BooleanQuery bq = new BooleanQuery();
			bq.Add(q1, Occur.MUST);
			bq.Add(q4, Occur.MUST);
			
			searcher.Search(bq, new AnonymousClassCountingHitCollector4(this));
			Assert.IsTrue(15 == CountingHitCollector.GetCount());
			
			searcher.Close();
			dir.Close();
		}
Exemplo n.º 34
0
        /// <summary> Suggest similar words (restricted or not to a field of a user index)</summary>
        /// <param name="word">String the word you want a spell check done on
        /// </param>
        /// <param name="num_sug">int the number of suggest words
        /// </param>
        /// <param name="ir">the indexReader of the user index (can be null see field param)
        /// </param>
        /// <param name="field">String the field of the user index: if field is not null, the suggested
        /// words are restricted to the words present in this field.
        /// </param>
        /// <param name="morePopular">boolean return only the suggest words that are more frequent than the searched word
        /// (only if restricted mode = (indexReader!=null and field!=null)
        /// </param>
        /// <throws>  IOException </throws>
        /// <returns> String[] the sorted list of the suggest words with this 2 criteria:
        /// first criteria: the edit distance, second criteria (only if restricted mode): the popularity
        /// of the suggest words in the field of the user index
        /// </returns>
        public virtual System.String[] SuggestSimilar(System.String word, int num_sug, IndexReader ir, System.String field, bool morePopular)
        {
            float min = this.minScore;
            TRStringDistance sd = new TRStringDistance(word);
            int lengthWord = word.Length;
			
            int goalFreq = (morePopular && ir != null) ? ir.DocFreq(new Term(field, word)) : 0;
            if (!morePopular && goalFreq > 0)
            {
                return new System.String[]{word}; // return the word if it exist in the index and i don't want a more popular word
            }
			
            BooleanQuery query = new BooleanQuery();
            System.String[] grams;
            System.String key;
			
            for (int ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++)
            {
				
                key = "gram" + ng; // form key
				
                grams = FormGrams(word, ng); // form word into ngrams (allow dups too)
				
                if (grams.Length == 0)
                {
                    continue; // hmm
                }
				
                if (bStart > 0)
                {
                    // should we boost prefixes?
                    Add(query, "start" + ng, grams[0], bStart); // matches start of word
                }
                if (bEnd > 0)
                {
                    // should we boost suffixes
                    Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word
                }
                for (int i = 0; i < grams.Length; i++)
                {
                    Add(query, key, grams[i]);
                }
            }
			
            IndexSearcher searcher = new IndexSearcher(this.spellindex);
            Hits hits = searcher.Search(query);
            SuggestWordQueue sugqueue = new SuggestWordQueue(num_sug);
			
            int stop = Math.Min(hits.Length(), 10 * num_sug); // go thru more than 'maxr' matches in case the distance filter triggers
            SuggestWord sugword = new SuggestWord();
            for (int i = 0; i < stop; i++)
            {
				
                sugword.string_Renamed = hits.Doc(i).Get(F_WORD); // get orig word)
				
                if (sugword.string_Renamed.Equals(word))
                {
                    continue; // don't suggest a word for itself, that would be silly
                }
				
                //edit distance/normalize with the min word length
                sugword.score = 1.0f - ((float) sd.GetDistance(sugword.string_Renamed) / System.Math.Min(sugword.string_Renamed.Length, lengthWord));
                if (sugword.score < min)
                {
                    continue;
                }
				
                if (ir != null)
                {
                    // use the user index
                    sugword.freq = ir.DocFreq(new Term(field, sugword.string_Renamed)); // freq in the index
                    if ((morePopular && goalFreq > sugword.freq) || sugword.freq < 1)
                    {
                        // don't suggest a word that is not present in the field
                        continue;
                    }
                }
                sugqueue.Insert(sugword);
                if (sugqueue.Size() == num_sug)
                {
                    //if queue full , maintain the min score
                    min = ((SuggestWord) sugqueue.Top()).score;
                }
                sugword = new SuggestWord();
            }
			
            // convert to array string
            System.String[] list = new System.String[sugqueue.Size()];
            for (int i = sugqueue.Size() - 1; i >= 0; i--)
            {
                list[i] = ((SuggestWord) sugqueue.Pop()).string_Renamed;
            }
			
            searcher.Close();
            return list;
        }
Exemplo n.º 35
0
 /// <summary> Add a clause to a boolean query.</summary>
 private static void  Add(BooleanQuery q, System.String k, System.String v, float boost)
 {
     Query tq = new TermQuery(new Term(k, v));
     tq.SetBoost(boost);
     q.Add(new BooleanClause(tq, BooleanClause.Occur.SHOULD));
 }
Exemplo n.º 36
0
		/// <summary> Parses a query, searching on the fields specified.
		/// Use this if you need to specify certain fields as required,
		/// and others as prohibited.
		/// <p><pre>
		/// Usage:
		/// <code>
		/// String[] query = {"query1", "query2", "query3"};
		/// String[] fields = {"filename", "contents", "description"};
		/// BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
		/// BooleanClause.Occur.MUST,
		/// BooleanClause.Occur.MUST_NOT};
		/// MultiFieldQueryParser.parse(query, fields, flags, analyzer);
		/// </code>
		/// </pre>
		/// <p>
		/// The code above would construct a query:
		/// <pre>
		/// <code>
		/// (filename:query1) +(contents:query2) -(description:query3)
		/// </code>
		/// </pre>
		/// 
		/// </summary>
		/// <param name="queries">Queries string to parse
		/// </param>
		/// <param name="fields">Fields to search on
		/// </param>
		/// <param name="flags">Flags describing the fields
		/// </param>
		/// <param name="analyzer">Analyzer to use
		/// </param>
		/// <throws>  ParseException if query parsing fails </throws>
		/// <throws>  TokenMgrError if query parsing fails </throws>
		/// <throws>  IllegalArgumentException if the length of the queries, fields, </throws>
		/// <summary>  and flags array differ
		/// </summary>
		public static Query Parse(System.String[] queries, System.String[] fields, BooleanClause.Occur[] flags, Analyzer analyzer)
		{
			if (!(queries.Length == fields.Length && queries.Length == flags.Length))
				throw new System.ArgumentException("queries, fields, and flags array have have different length");
			BooleanQuery bQuery = new BooleanQuery();
			for (int i = 0; i < fields.Length; i++)
			{
				QueryParser qp = new QueryParser(fields[i], analyzer);
				Query q = qp.Parse(queries[i]);
				bQuery.Add(q, flags[i]);
			}
			return bQuery;
		}
Exemplo n.º 37
0
 /// <summary> Simple similarity query generators.
 /// Takes every unique word and forms a boolean query where all words are optional.
 /// After you get this you'll use to to query your {@link IndexSearcher} for similar docs.
 /// The only caveat is the first hit returned <b>should be</b> your source document - you'll
 /// need to then ignore that.
 /// 
 /// <p>
 /// 
 /// So, if you have a code fragment like this:
 /// <br>
 /// <code>
 /// Query q = formSimilaryQuery( "I use Lucene to search fast. Fast searchers are good", new StandardAnalyzer(), "contents", null);
 /// </code>
 /// 
 /// <p>
 /// 
 /// </summary>
 /// <summary> The query returned, in string form, will be <code>'(i use lucene to search fast searchers are good')</code>.
 /// 
 /// <p>
 /// The philosophy behind this method is "two documents are similar if they share lots of words".
 /// Note that behind the scenes, Lucenes scoring algorithm will tend to give two documents a higher similarity score if the share more uncommon words.
 /// 
 /// <P>
 /// This method is fail-safe in that if a long 'body' is passed in and
 /// {@link BooleanQuery#add BooleanQuery.add()} (used internally)
 /// throws
 /// {@link org.apache.lucene.search.BooleanQuery.TooManyClauses BooleanQuery.TooManyClauses}, the
 /// query as it is will be returned.
 /// 
 /// 
 /// 
 /// 
 /// 
 /// </summary>
 /// <param name="body">the body of the document you want to find similar documents to
 /// </param>
 /// <param name="a">the analyzer to use to parse the body
 /// </param>
 /// <param name="field">the field you want to search on, probably something like "contents" or "body"
 /// </param>
 /// <param name="stop">optional set of stop words to ignore
 /// </param>
 /// <returns> a query with all unique words in 'body'
 /// </returns>
 /// <throws>  IOException this can't happen... </throws>
 public static Query FormSimilarQuery(System.String body, Analyzer a, System.String field, System.Collections.Hashtable stop)
 {
     TokenStream ts = a.TokenStream(field, new System.IO.StringReader(body));
     Lucene.Net.Analysis.Token t;
     BooleanQuery tmp = new BooleanQuery();
     System.Collections.Hashtable already = new System.Collections.Hashtable(); // ignore dups
     while ((t = ts.Next()) != null)
     {
         System.String word = t.TermText();
         // ignore opt stop words
         if (stop != null && stop.Contains(word))
             continue;
         // ignore dups
         if (already.Contains(word) == true)
             continue;
         already.Add(word, word);
         // add to query
         TermQuery tq = new TermQuery(new Term(field, word));
         try
         {
             tmp.Add(tq, BooleanClause.Occur.SHOULD); //false, false);
         }
         catch (BooleanQuery.TooManyClauses too)
         {
             // fail-safe, just return what we have, not the end of the world
             break;
         }
     }
     return tmp;
 }
Exemplo n.º 38
0
        public async Task <SearchQueryResponse> Handle(SearchQueryRequest request, CancellationToken cancellationToken)
        {
            IndexReader reader = null;

            try
            {
                // Ensures index backwards compatibility
                var AppLuceneVersion = LuceneVersion.LUCENE_48;

                //Used cached Lucene Index
                var readerWithMetadata = await _luceneReaderService.GetReader(cancellationToken);

                reader = readerWithMetadata.Index;

                //create an analyzer to process the text
                var analyzer = new StandardAnalyzer(AppLuceneVersion);

                var pageLength = request.PageLength ?? 30;
                var pageIndex  = request.PageIndex ?? 0;

                //hardcoded
                var search_phrase = request.Query;

                var startDateQuery_as_int32 = 0; //min value
                if (request.StartDate.HasValue)
                {
                    //var startDateCriteria = DateTime.Parse("2020-01-01"); //if we want to support text to int transformation
                    startDateQuery_as_int32 = request.StartDate.Value;
                }

                var endDateQuery_as_int32 = int.MaxValue;
                if (request.EndDate.HasValue)
                {
                    //var endDateCriteria = DateTime.Parse("2021-12-31"); if we want to suppoer text to int transformation
                    endDateQuery_as_int32 = request.EndDate.Value;
                }



                //date filter
                var date_query = Lucene.Net.Search.NumericRangeQuery.NewInt32Range(DateRangeByTerm, startDateQuery_as_int32, endDateQuery_as_int32, true, true);

                //text query
                var query_classic_parser           = new Lucene.Net.QueryParsers.Classic.QueryParser(LuceneVersion.LUCENE_48, SearchByTerm, analyzer);
                Lucene.Net.Search.Query text_query = query_classic_parser.Parse(search_phrase);

                //merging
                Lucene.Net.Search.BooleanQuery final_query = new Lucene.Net.Search.BooleanQuery();
                final_query.Add(text_query, Lucene.Net.Search.Occur.MUST);
                final_query.Add(date_query, Lucene.Net.Search.Occur.MUST);

                var searcher = new Lucene.Net.Search.IndexSearcher(reader); // writer.GetReader(applyAllDeletes: true));
                                                                            //var hits = searcher.Search(phrase, 20 /* top 20 */).ScoreDocs;
                var maxResult = pageLength * (pageIndex + 1) + 1;           //need an extra one to determine if there is more

                _logger.LogDebug("Search criteria '{0}' with range ('{1}' to '{2}') with pageIndex {3} and pageLength {4}"
                                 , search_phrase
                                 , startDateQuery_as_int32, endDateQuery_as_int32
                                 , pageIndex, pageLength);

                var results = searcher.Search(final_query, maxResult);

                _logger.LogDebug("Search result has {0} records (before skip).", results.ScoreDocs.Length);

                var response = new SearchQueryResponse();
                response.IsValid      = true;
                response.PageIndex    = pageIndex;
                response.PageLength   = pageLength;
                response.IndexDateUtc = readerWithMetadata.LastIndexOffset;

                var lastPage = results.ScoreDocs.Skip(pageLength * pageIndex);

                if (lastPage.Count() == 0)
                {
                    //Console.WriteLine("No results.");
                    response.Hits = new SearchHitEntry[0];
                }
                else
                {
                    response.HasMore = lastPage.Count() > pageLength;   //if it has an extra one, then there is more

                    var records = new List <SearchHitEntry>();
                    foreach (var hit in lastPage.Take(pageLength)) //foreach (var hit in hits)
                    {
                        var foundDoc            = searcher.Doc(hit.Doc);
                        var foundDocCreatedDate = foundDoc.GetField("updated").GetInt32Value();

                        DateTime parsedDate;

                        var searchResultEntry = new SearchHitEntry()
                        {
                            DocId = foundDoc.Get("doc_id"),
                            Text  = foundDoc.Get("content"),
                            Rank  = hit.Score,
                        };

                        if (foundDocCreatedDate.HasValue && DateTime.TryParseExact(foundDocCreatedDate.Value.ToString(), "yyyyMMdd", null,
                                                                                   DateTimeStyles.None, out parsedDate))
                        {
                            searchResultEntry.ModifiedDate = parsedDate;
                        }
                        else
                        {
                            searchResultEntry.ModifiedDate = null;
                        }
                        records.Add(searchResultEntry);
                    }
                    response.Hits = records.ToArray();
                }



                return(response);
            }
            catch (Exception ex)
            {
                var result = new SearchQueryResponse();
                result.IsValid = false;
                result.Errors  = new List <string>();
                result.Errors.Add("Unexpected error occured: " + ex.Message);
                return(result);
            }
            finally
            {
                // since Lucene IndexReader is cached, it is cache responsibility to dispose Index and Directory properly
                //if (reader != null)
                //{
                //    reader.Dispose();
                //}
                //if (azureDirectory != null)
                //{
                //    azureDirectory.Dispose();
                //}
            }
        }
Exemplo n.º 39
0
 public virtual void TestSynonyms()
 {
     BooleanQuery expected = new BooleanQuery(true);
     expected.Add(new TermQuery(new Term("field", "dogs")), BooleanClause.Occur.SHOULD);
     expected.Add(new TermQuery(new Term("field", "dog")), BooleanClause.Occur.SHOULD);
     QueryBuilder builder = new QueryBuilder(new MockSynonymAnalyzer());
     Assert.AreEqual(expected, builder.CreateBooleanQuery("field", "dogs"));
     Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "dogs"));
     Assert.AreEqual(expected, builder.CreateBooleanQuery("field", "dogs", BooleanClause.Occur.MUST));
     Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "dogs"));
 }
Exemplo n.º 40
0
		/// <summary> Parses a query, searching on the fields specified. Use this if you need
		/// to specify certain fields as required, and others as prohibited.
		/// <p/>
		/// 
		/// <pre>
		/// Usage:
		/// &lt;code&gt;
		/// String[] query = {&quot;query1&quot;, &quot;query2&quot;, &quot;query3&quot;};
		/// String[] fields = {&quot;filename&quot;, &quot;contents&quot;, &quot;description&quot;};
		/// BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
		/// BooleanClause.Occur.MUST,
		/// BooleanClause.Occur.MUST_NOT};
		/// MultiFieldQueryParser.parse(query, fields, flags, analyzer);
		/// &lt;/code&gt;
		/// </pre>
		/// <p/>
		/// The code above would construct a query:
		/// 
		/// <pre>
		/// &lt;code&gt;
		/// (filename:query1) +(contents:query2) -(description:query3)
		/// &lt;/code&gt;
		/// </pre>
		/// 
		/// </summary>
		/// <param name="matchVersion">Lucene version to match; this is passed through to
		/// QueryParser.
		/// </param>
		/// <param name="queries">Queries string to parse
		/// </param>
		/// <param name="fields">Fields to search on
		/// </param>
		/// <param name="flags">Flags describing the fields
		/// </param>
		/// <param name="analyzer">Analyzer to use
		/// </param>
		/// <throws>  ParseException </throws>
		/// <summary>             if query parsing fails
		/// </summary>
		/// <throws>  IllegalArgumentException </throws>
		/// <summary>             if the length of the queries, fields, and flags array differ
		/// </summary>
		public static Query Parse(Version matchVersion, System.String[] queries, System.String[] fields, BooleanClause.Occur[] flags, Analyzer analyzer)
		{
			if (!(queries.Length == fields.Length && queries.Length == flags.Length))
				throw new System.ArgumentException("queries, fields, and flags array have have different length");
			BooleanQuery bQuery = new BooleanQuery();
			for (int i = 0; i < fields.Length; i++)
			{
				QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer);
				Query q = qp.Parse(queries[i]);
				if (q != null && (!(q is BooleanQuery) || ((BooleanQuery) q).GetClauses().Length > 0))
				{
					bQuery.Add(q, flags[i]);
				}
			}
			return bQuery;
		}
Exemplo n.º 41
0
        public virtual void TestMinShouldMatch()
        {
            BooleanQuery expected = new BooleanQuery();
            expected.Add(new TermQuery(new Term("field", "one")), BooleanClause.Occur.SHOULD);
            expected.Add(new TermQuery(new Term("field", "two")), BooleanClause.Occur.SHOULD);
            expected.Add(new TermQuery(new Term("field", "three")), BooleanClause.Occur.SHOULD);
            expected.Add(new TermQuery(new Term("field", "four")), BooleanClause.Occur.SHOULD);
            expected.MinimumNumberShouldMatch = 0;

            QueryBuilder builder = new QueryBuilder(new MockAnalyzer(Random()));
            Assert.AreEqual(expected, builder.CreateMinShouldMatchQuery("field", "one two three four", 0.1f));
            Assert.AreEqual(expected, builder.CreateMinShouldMatchQuery("field", "one two three four", 0.24f));

            expected.MinimumNumberShouldMatch = 1;
            Assert.AreEqual(expected, builder.CreateMinShouldMatchQuery("field", "one two three four", 0.25f));
            Assert.AreEqual(expected, builder.CreateMinShouldMatchQuery("field", "one two three four", 0.49f));

            expected.MinimumNumberShouldMatch = 2;
            Assert.AreEqual(expected, builder.CreateMinShouldMatchQuery("field", "one two three four", 0.5f));
            Assert.AreEqual(expected, builder.CreateMinShouldMatchQuery("field", "one two three four", 0.74f));

            expected.MinimumNumberShouldMatch = 3;
            Assert.AreEqual(expected, builder.CreateMinShouldMatchQuery("field", "one two three four", 0.75f));
            Assert.AreEqual(expected, builder.CreateMinShouldMatchQuery("field", "one two three four", 0.99f));
        }
Exemplo n.º 42
0
        public void TestBooleanQuerySerialization()
        {
            Lucene.Net.Search.BooleanQuery lucQuery = new Lucene.Net.Search.BooleanQuery();

            lucQuery.Add(new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("field", "x")), Lucene.Net.Search.BooleanClause.Occur.MUST);
            
            System.Runtime.Serialization.Formatters.Binary.BinaryFormatter bf = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter();
            System.IO.MemoryStream ms = new System.IO.MemoryStream();
            bf.Serialize(ms, lucQuery);
            ms.Seek(0, System.IO.SeekOrigin.Begin);
            Lucene.Net.Search.BooleanQuery lucQuery2 = (Lucene.Net.Search.BooleanQuery)bf.Deserialize(ms);
            ms.Close();

            Assert.AreEqual(lucQuery, lucQuery2, "Error in serialization");

            Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir, true);

            int hitCount = searcher.Search(lucQuery, 20).totalHits;
            
            searcher.Close();
            searcher = new Lucene.Net.Search.IndexSearcher(dir, true);

            int hitCount2 = searcher.Search(lucQuery2, 20).totalHits;

            Assert.AreEqual(hitCount, hitCount2,"Error in serialization - different hit counts");
        }
Exemplo n.º 43
0
        /// <summary> Suggest similar words (restricted or not to a field of a user index)</summary>
        /// <param name="word">String the word you want a spell check done on
        /// </param>
        /// <param name="num_sug">int the number of suggest words
        /// </param>
        /// <param name="ir">the indexReader of the user index (can be null see field param)
        /// </param>
        /// <param name="field">String the field of the user index: if field is not null, the suggested
        /// words are restricted to the words present in this field.
        /// </param>
        /// <param name="morePopular">boolean return only the suggest words that are more frequent than the searched word
        /// (only if restricted mode = (indexReader!=null and field!=null)
        /// </param>
        /// <throws>  IOException </throws>
        /// <returns> String[] the sorted list of the suggest words with this 2 criteria:
        /// first criteria: the edit distance, second criteria (only if restricted mode): the popularity
        /// of the suggest words in the field of the user index
        /// </returns>
        public virtual System.String[] SuggestSimilar(System.String word, int num_sug, IndexReader ir, System.String field, bool morePopular)
        {
            float            min = this.minScore;
            TRStringDistance sd  = new TRStringDistance(word);
            int lengthWord       = word.Length;

            int goalFreq = (morePopular && ir != null) ? ir.DocFreq(new Term(field, word)) : 0;

            if (!morePopular && goalFreq > 0)
            {
                return(new System.String[] { word }); // return the word if it exist in the index and i don't want a more popular word
            }

            BooleanQuery query = new BooleanQuery();

            System.String[] grams;
            System.String   key;

            for (int ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++)
            {
                key = "gram" + ng;           // form key

                grams = FormGrams(word, ng); // form word into ngrams (allow dups too)

                if (grams.Length == 0)
                {
                    continue; // hmm
                }

                if (bStart > 0)
                {
                    // should we boost prefixes?
                    Add(query, "start" + ng, grams[0], bStart); // matches start of word
                }
                if (bEnd > 0)
                {
                    // should we boost suffixes
                    Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word
                }
                for (int i = 0; i < grams.Length; i++)
                {
                    Add(query, key, grams[i]);
                }
            }

            IndexSearcher    searcher = new IndexSearcher(this.spellindex);
            Hits             hits     = searcher.Search(query);
            SuggestWordQueue sugqueue = new SuggestWordQueue(num_sug);

            int         stop    = Math.Min(hits.Length(), 10 * num_sug); // go thru more than 'maxr' matches in case the distance filter triggers
            SuggestWord sugword = new SuggestWord();

            for (int i = 0; i < stop; i++)
            {
                sugword.string_Renamed = hits.Doc(i).Get(F_WORD); // get orig word)

                if (sugword.string_Renamed.Equals(word))
                {
                    continue; // don't suggest a word for itself, that would be silly
                }

                //edit distance/normalize with the min word length
                sugword.score = 1.0f - ((float)sd.GetDistance(sugword.string_Renamed) / System.Math.Min(sugword.string_Renamed.Length, lengthWord));
                if (sugword.score < min)
                {
                    continue;
                }

                if (ir != null)
                {
                    // use the user index
                    sugword.freq = ir.DocFreq(new Term(field, sugword.string_Renamed)); // freq in the index
                    if ((morePopular && goalFreq > sugword.freq) || sugword.freq < 1)
                    {
                        // don't suggest a word that is not present in the field
                        continue;
                    }
                }
                sugqueue.Insert(sugword);
                if (sugqueue.Size() == num_sug)
                {
                    //if queue full , maintain the min score
                    min = ((SuggestWord)sugqueue.Top()).score;
                }
                sugword = new SuggestWord();
            }

            // convert to array string
            System.String[] list = new System.String[sugqueue.Size()];
            for (int i = sugqueue.Size() - 1; i >= 0; i--)
            {
                list[i] = ((SuggestWord)sugqueue.Pop()).string_Renamed;
            }

            searcher.Close();
            return(list);
        }
Exemplo n.º 44
0
 public virtual void  SetUp()
 {
     originalMaxClauses = BooleanQuery.GetMaxClauseCount();
 }
Exemplo n.º 45
0
        public virtual void TestCJKTerm()
        {
            // individual CJK chars as terms
            SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(this);

            BooleanQuery expected = new BooleanQuery();
            expected.Add(new TermQuery(new Term("field", "中")), BooleanClause.Occur.SHOULD);
            expected.Add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD);

            QueryBuilder builder = new QueryBuilder(analyzer);
            Assert.AreEqual(expected, builder.CreateBooleanQuery("field", "中国"));
        }
Exemplo n.º 46
0
 public virtual void  TearDown()
 {
     BooleanQuery.SetMaxClauseCount(originalMaxClauses);
 }
Exemplo n.º 47
0
 /// <summary> Add a clause to a boolean query.</summary>
 private static void  Add(BooleanQuery q, System.String k, System.String v)
 {
     q.Add(new BooleanClause(new TermQuery(new Term(k, v)), BooleanClause.Occur.SHOULD));
 }
Exemplo n.º 48
0
        /// <summary> Create the More like query from a PriorityQueue</summary>
        private Query CreateQuery(PriorityQueue q)
        {
            BooleanQuery query = new BooleanQuery();
            System.Object cur;
            int qterms = 0;
            float bestScore = 0;
			
            while (((cur = q.Pop()) != null))
            {
                System.Object[] ar = (System.Object[]) cur;
                TermQuery tq = new TermQuery(new Term((System.String) ar[1], (System.String) ar[0]));
				
                if (boost)
                {
                    if (qterms == 0)
                    {
                        bestScore = (float) ((System.Single) ar[2]);
                    }
                    float myScore = (float) ((System.Single) ar[2]);
					
                    tq.SetBoost(myScore / bestScore);
                }
				
                try
                {
                    query.Add(tq, BooleanClause.Occur.SHOULD);
                }
                catch (BooleanQuery.TooManyClauses ignore)
                {
                    break;
                }
				
                qterms++;
                if (maxQueryTerms > 0 && qterms >= maxQueryTerms)
                {
                    break;
                }
            }
			
            return query;
        }
Exemplo n.º 49
0
 public virtual void TestCJKSynonymsOR2()
 {
     BooleanQuery expected = new BooleanQuery();
     expected.Add(new TermQuery(new Term("field", "中")), BooleanClause.Occur.SHOULD);
     BooleanQuery inner = new BooleanQuery(true);
     inner.Add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD);
     inner.Add(new TermQuery(new Term("field", "國")), BooleanClause.Occur.SHOULD);
     expected.Add(inner, BooleanClause.Occur.SHOULD);
     BooleanQuery inner2 = new BooleanQuery(true);
     inner2.Add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD);
     inner2.Add(new TermQuery(new Term("field", "國")), BooleanClause.Occur.SHOULD);
     expected.Add(inner2, BooleanClause.Occur.SHOULD);
     QueryBuilder builder = new QueryBuilder(new MockCJKSynonymAnalyzer());
     Assert.AreEqual(expected, builder.CreateBooleanQuery("field", "中国国"));
 }
Exemplo n.º 50
0
		/// <summary> Parses a query which searches on the fields specified.
		/// If x fields are specified, this effectively constructs:
		/// 
		/// <code>
		/// (field1:query) (field2:query) (field3:query)...(fieldx:query)
		/// </code>
		/// 
		/// </summary>
		/// <param name="query">Query string to parse
		/// </param>
		/// <param name="fields">Fields to search on
		/// </param>
		/// <param name="analyzer">Analyzer to use
		/// </param>
		/// <throws>  ParseException if query parsing fails </throws>
		/// <throws>  TokenMgrError if query parsing fails </throws>
		/// <deprecated> use {@link #Parse(String)} instead but note that it
		/// returns a different query for queries where all terms are required:
		/// its query excepts all terms, no matter in what field they occur whereas
		/// the query built by this (deprecated) method expected all terms in all fields 
		/// at the same time.
		/// </deprecated>
		public static Query Parse(System.String query, System.String[] fields, Analyzer analyzer)
		{
			BooleanQuery bQuery = new BooleanQuery();
			for (int i = 0; i < fields.Length; i++)
			{
				Query q = Parse(query, fields[i], analyzer);
				bQuery.Add(q, BooleanClause.Occur.SHOULD);
			}
			return bQuery;
		}
Exemplo n.º 51
0
        /// <summary> Simple similarity query generators.
        /// Takes every unique word and forms a boolean query where all words are optional.
        /// After you get this you'll use to to query your {@link IndexSearcher} for similar docs.
        /// The only caveat is the first hit returned <b>should be</b> your source document - you'll
        /// need to then ignore that.
        /// 
        /// <p>
        /// 
        /// So, if you have a code fragment like this:
        /// <br>
        /// <code>
        /// Query q = formSimilaryQuery( "I use Lucene to search fast. Fast searchers are good", new StandardAnalyzer(), "contents", null);
        /// </code>
        /// 
        /// <p>
        /// 
        /// </summary>
        /// <summary> The query returned, in string form, will be <code>'(i use lucene to search fast searchers are good')</code>.
        /// 
        /// <p>
        /// The philosophy behind this method is "two documents are similar if they share lots of words".
        /// Note that behind the scenes, Lucenes scoring algorithm will tend to give two documents a higher similarity score if the share more uncommon words.
        /// 
        /// <P>
        /// This method is fail-safe in that if a long 'body' is passed in and
        /// {@link BooleanQuery#add BooleanQuery.add()} (used internally)
        /// throws
        /// {@link org.apache.lucene.search.BooleanQuery.TooManyClauses BooleanQuery.TooManyClauses}, the
        /// query as it is will be returned.
        /// 
        /// 
        /// 
        /// 
        /// 
        /// </summary>
        /// <param name="body">the body of the document you want to find similar documents to
        /// </param>
        /// <param name="a">the analyzer to use to parse the body
        /// </param>
        /// <param name="field">the field you want to search on, probably something like "contents" or "body"
        /// </param>
        /// <param name="stop">optional set of stop words to ignore
        /// </param>
        /// <returns> a query with all unique words in 'body'
        /// </returns>
        /// <throws>  IOException this can't happen... </throws>
        public static Query FormSimilarQuery(System.String body, Analyzer a, System.String field, System.Collections.Hashtable stop)
        {
            TokenStream ts = a.TokenStream(field, new System.IO.StringReader(body));
            TermAttribute termAtt = (TermAttribute)ts.AddAttribute(typeof(TermAttribute));

            BooleanQuery tmp = new BooleanQuery();
            System.Collections.Hashtable already = new System.Collections.Hashtable(); // ignore dups
            while (ts.IncrementToken())
            {
                String word = termAtt.Term();
                // ignore opt stop words
                if (stop != null && stop.Contains(word))
                    continue;
                // ignore dups
                if (already.Contains(word) == true)
                    continue;
                already.Add(word, word);
                // add to query
                TermQuery tq = new TermQuery(new Term(field, word));
                try
                {
                    tmp.Add(tq, BooleanClause.Occur.SHOULD);
                }
                catch (BooleanQuery.TooManyClauses)
                {
                    // fail-safe, just return what we have, not the end of the world
                    break;
                }
            }
            return tmp;
        }
Exemplo n.º 52
0
		/// <summary> Parses a query, searching on the fields specified. Use this if you need to
		/// specify certain fields as required, and others as prohibited.
		/// <p>
		/// <pre>
		/// Usage:
		/// <code>
		/// String[] fields = { &quot;filename&quot;, &quot;contents&quot;, &quot;description&quot; };
		/// int[] flags = { MultiFieldQueryParser.NORMAL_FIELD,
		/// MultiFieldQueryParser.REQUIRED_FIELD,
		/// MultiFieldQueryParser.PROHIBITED_FIELD, };
		/// parse(query, fields, flags, analyzer);
		/// </code>
		/// </pre>
		/// 
		/// <p>
		/// The code above would construct a query:
		/// <pre>
		/// <code>
		/// (filename:query1) +(contents:query2) -(description:query3)
		/// </code>
		/// </pre>
		/// 
		/// </summary>
		/// <param name="queries">Queries string to parse
		/// </param>
		/// <param name="fields">Fields to search on
		/// </param>
		/// <param name="flags">Flags describing the fields
		/// </param>
		/// <param name="analyzer">Analyzer to use
		/// </param>
		/// <throws>  ParseException if query parsing fails </throws>
		/// <throws>  TokenMgrError if query parsing fails </throws>
		/// <throws>  IllegalArgumentException if the length of the queries, fields, and flags array differ </throws>
		/// <deprecated> use {@link #Parse(String[], String[], BooleanClause.Occur[], Analyzer)} instead
		/// </deprecated>
		public static Query Parse(System.String[] queries, System.String[] fields, int[] flags, Analyzer analyzer)
		{
			if (!(queries.Length == fields.Length && queries.Length == flags.Length))
				throw new System.ArgumentException("queries, fields, and flags array have have different length");
			BooleanQuery bQuery = new BooleanQuery();
			for (int i = 0; i < fields.Length; i++)
			{
				QueryParser qp = new QueryParser(fields[i], analyzer);
				Query q = qp.Parse(queries[i]);
				int flag = flags[i];
				switch (flag)
				{
					
					case REQUIRED_FIELD: 
						bQuery.Add(q, BooleanClause.Occur.MUST);
						break;
					
					case PROHIBITED_FIELD: 
						bQuery.Add(q, BooleanClause.Occur.MUST_NOT);
						break;
					
					default: 
						bQuery.Add(q, BooleanClause.Occur.SHOULD);
						break;
					
				}
			}
			return bQuery;
		}
		/// <summary> Parses a query which searches on the fields specified.
		/// <p>
		/// If x fields are specified, this effectively constructs:
		/// <pre>
		/// <code>
		/// (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
		/// </code>
		/// </pre>
		/// </summary>
		/// <param name="queries">Queries strings to parse
		/// </param>
		/// <param name="fields">Fields to search on
		/// </param>
		/// <param name="analyzer">Analyzer to use
		/// </param>
		/// <throws>  ParseException if query parsing fails </throws>
		/// <throws>  IllegalArgumentException if the length of the queries array differs </throws>
		/// <summary>  from the length of the fields array
		/// </summary>
		public static Query Parse(System.String[] queries, System.String[] fields, Analyzer analyzer)
		{
			if (queries.Length != fields.Length)
				throw new System.ArgumentException("queries.length != fields.length");
			BooleanQuery bQuery = new BooleanQuery();
			for (int i = 0; i < fields.Length; i++)
			{
				QueryParser qp = new QueryParser(fields[i], analyzer);
				Query q = qp.Parse(queries[i]);
				if (q != null && (!(q is BooleanQuery) || ((BooleanQuery) q).GetClauses().Length > 0))
				{
					bQuery.Add(q, BooleanClause.Occur.SHOULD);
				}
			}
			return bQuery;
		}
Exemplo n.º 54
0
        public virtual void  TestBasic()
        {
            Directory   dir      = new MockRAMDirectory();
            Analyzer    analyzer = new StandardAnalyzer();
            IndexWriter writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetMergeFactor(2);
            writer.SetMaxBufferedDocs(2);
            writer.SetSimilarity(new SimpleSimilarity());


            System.Text.StringBuilder sb   = new System.Text.StringBuilder(265);
            System.String             term = "term";
            for (int i = 0; i < 30; i++)
            {
                Document d = new Document();
                sb.Append(term).Append(" ");
                System.String content = sb.ToString();
                Field         noTf    = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED);
                noTf.SetOmitTermFreqAndPositions(true);
                d.Add(noTf);

                Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED);
                d.Add(tf);

                writer.AddDocument(d);
                //System.out.println(d);
            }

            writer.Optimize();
            // flush
            writer.Close();
            _TestUtil.CheckIndex(dir);

            /*
             * Verify the index
             */
            Searcher searcher = new IndexSearcher(dir);

            searcher.SetSimilarity(new SimpleSimilarity());

            Term      a  = new Term("noTf", term);
            Term      b  = new Term("tf", term);
            Term      c  = new Term("noTf", "notf");
            Term      d2 = new Term("tf", "tf");
            TermQuery q1 = new TermQuery(a);
            TermQuery q2 = new TermQuery(b);
            TermQuery q3 = new TermQuery(c);
            TermQuery q4 = new TermQuery(d2);


            searcher.Search(q1, new AnonymousClassCountingHitCollector(this));
            //System.out.println(CountingHitCollector.getCount());


            searcher.Search(q2, new AnonymousClassCountingHitCollector1(this));
            //System.out.println(CountingHitCollector.getCount());



            searcher.Search(q3, new AnonymousClassCountingHitCollector2(this));
            //System.out.println(CountingHitCollector.getCount());


            searcher.Search(q4, new AnonymousClassCountingHitCollector3(this));
            //System.out.println(CountingHitCollector.getCount());



            BooleanQuery bq = new BooleanQuery();

            bq.Add(q1, Occur.MUST);
            bq.Add(q4, Occur.MUST);

            searcher.Search(bq, new AnonymousClassCountingHitCollector4(this));
            Assert.IsTrue(15 == CountingHitCollector.GetCount());

            searcher.Close();
            dir.Close();
        }
Exemplo n.º 55
0
 /// <summary> Add a clause to a boolean query.</summary>
 private static void  Add(BooleanQuery q, System.String k, System.String v)
 {
     q.Add(new BooleanClause(new TermQuery(new Term(k, v)), BooleanClause.Occur.SHOULD));
 }