Example #1
0
        public override Query Rewrite(IndexReader reader)
        {
            Query orig = new RegexQuery(term).Rewrite(reader);

            // RegexQuery (via MultiTermQuery).rewrite always returns a BooleanQuery
            BooleanQuery bq = (BooleanQuery)orig;

            BooleanClause[] clauses = bq.GetClauses();
            SpanQuery[]     sqs     = new SpanQuery[clauses.Length];
            for (int i = 0; i < clauses.Length; i++)
            {
                BooleanClause clause = clauses[i];

                // Clauses from RegexQuery.rewrite are always TermQuery's
                TermQuery tq = (TermQuery)clause.GetQuery();

                sqs[i] = new SpanTermQuery(tq.GetTerm());
                sqs[i].SetBoost(tq.GetBoost());
            }

            SpanOrQuery query = new SpanOrQuery(sqs);

            query.SetBoost(orig.GetBoost());

            return(query);
        }
Example #2
0
        public virtual void  TestGetValuesForIndexedDocument()
        {
            RAMDirectory dir    = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(dir, new StandardAnalyzer(), true);

            writer.AddDocument(MakeDocumentWithFields());
            writer.Close();

            Searcher searcher = new IndexSearcher(dir);

            // search for something that does exists
            Query query = new TermQuery(new Term("keyword", "test1"));

            // ensure that queries return expected results without DateFilter first
            Hits hits = searcher.Search(query);

            Assert.AreEqual(1, hits.Length());

            try
            {
                DoAssert(hits.Doc(0), true);
            }
            catch (System.Exception e)
            {
                System.Console.Error.WriteLine(e.StackTrace);
                System.Console.Error.Write("\n");
            }
            finally
            {
                searcher.Close();
            }
        }
Example #3
0
        /// <summary> Add a clause to a boolean query.</summary>
        private static void  Add(BooleanQuery q, System.String k, System.String v, float boost)
        {
            Query tq = new TermQuery(new Term(k, v));

            tq.SetBoost(boost);
            q.Add(new BooleanClause(tq, BooleanClause.Occur.SHOULD));
        }
Example #4
0
        public virtual void  TestAfterClose()
        {
            Directory   dir1   = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);

            writer.SetInfoStream(infoStream);

            // create the index
            CreateIndexNoClose(false, "test", writer);

            IndexReader r = writer.GetReader();

            writer.Close();

            _TestUtil.CheckIndex(dir1);

            // reader should remain usable even after IndexWriter is closed:
            Assert.AreEqual(100, r.NumDocs());
            Query q = new TermQuery(new Term("indexname", "test"));

            Assert.AreEqual(100, new IndexSearcher(r).Search(q, 10).totalHits);

            try
            {
                r.Reopen();
                Assert.Fail("failed to hit AlreadyClosedException");
            }
            catch (AlreadyClosedException ace)
            {
                // expected
            }
            r.Close();
            dir1.Close();
        }
        public virtual void TestDifferentNumResults()
        {
            // test the collector w/ FacetRequests and different numResults
            DirectoryReader indexReader = DirectoryReader.Open(indexDir);
            var taxoReader = new DirectoryTaxonomyReader(taxoDir);
            IndexSearcher searcher = NewSearcher(indexReader);

            FacetsCollector sfc = new FacetsCollector();
            TermQuery q = new TermQuery(A);
            searcher.Search(q, sfc);
            Facets facets = GetTaxonomyFacetCounts(taxoReader, Config, sfc);
            FacetResult result = facets.GetTopChildren(NUM_CHILDREN_CP_A, CP_A);
            Assert.AreEqual(-1, (int)result.Value);
            foreach (LabelAndValue labelValue in result.LabelValues)
            {
                Assert.AreEqual(termExpectedCounts[CP_A + "/" + labelValue.label], labelValue.value);
            }
            result = facets.GetTopChildren(NUM_CHILDREN_CP_B, CP_B);
            Assert.AreEqual(termExpectedCounts[CP_B], result.Value);
            foreach (LabelAndValue labelValue in result.LabelValues)
            {
                Assert.AreEqual(termExpectedCounts[CP_B + "/" + labelValue.label], labelValue.value);
            }

            IOUtils.Close(indexReader, taxoReader);
        }
Example #6
0
        public virtual void  TestDuringAddDelete()
        {
            Directory   dir1   = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);

            writer.SetInfoStream(infoStream);
            writer.SetMergeFactor(2);

            // create the index
            CreateIndexNoClose(false, "test", writer);
            writer.Commit();

            IndexReader r = writer.GetReader();

            int   NUM_THREAD = 5;
            float SECONDS    = 3;

            long endTime = (long)((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 1000.0 * SECONDS);

            System.Collections.IList excs = (System.Collections.IList)System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(new System.Collections.ArrayList()));

            SupportClass.ThreadClass[] threads = new SupportClass.ThreadClass[NUM_THREAD];
            for (int i = 0; i < NUM_THREAD; i++)
            {
                threads[i] = new AnonymousClassThread1(endTime, writer, excs, this);
                threads[i].IsBackground = true;
                threads[i].Start();
            }

            int sum = 0;

            while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < endTime)
            {
                IndexReader r2 = r.Reopen();
                if (r2 != r)
                {
                    r.Close();
                    r = r2;
                }
                Query q = new TermQuery(new Term("indexname", "test"));
                sum += new IndexSearcher(r).Search(q, 10).totalHits;
            }

            for (int i = 0; i < NUM_THREAD; i++)
            {
                threads[i].Join();
            }
            Assert.IsTrue(sum > 0);

            Assert.AreEqual(0, excs.Count);
            writer.Close();

            _TestUtil.CheckIndex(dir1);
            r.Close();
            dir1.Close();
        }
Example #7
0
        public virtual void  TestFieldSetValue()
        {
            Field    field = new Field("id", "id1", Field.Store.YES, Field.Index.NOT_ANALYZED);
            Document doc   = new Document();

            doc.Add(field);
            doc.Add(new Field("keyword", "test", Field.Store.YES, Field.Index.NOT_ANALYZED));

            RAMDirectory dir    = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.AddDocument(doc);
            field.SetValue("id2");
            writer.AddDocument(doc);
            field.SetValue("id3");
            writer.AddDocument(doc);
            writer.Close();

            Searcher searcher = new IndexSearcher(dir);

            Query query = new TermQuery(new Term("keyword", "test"));

            // ensure that queries return expected results without DateFilter first
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            int result = 0;

            for (int i = 0; i < 3; i++)
            {
                Document doc2 = searcher.Doc(hits[i].doc);
                Field    f    = doc2.GetField("id");
                if (f.StringValue().Equals("id1"))
                {
                    result |= 1;
                }
                else if (f.StringValue().Equals("id2"))
                {
                    result |= 2;
                }
                else if (f.StringValue().Equals("id3"))
                {
                    result |= 4;
                }
                else
                {
                    Assert.Fail("unexpected id field");
                }
            }
            searcher.Close();
            dir.Close();
            Assert.AreEqual(7, result, "did not see all IDs");
        }
Example #8
0
 public bool CheckDocExist(OfficeData officeData)
 {
     Lucene.Net.Search.Query        query1 = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("FileName", officeData.FileName));
     Lucene.Net.Search.Query        query2 = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("LastWriteTime", officeData.LastWriteTime));
     Lucene.Net.Search.BooleanQuery query3 = new Lucene.Net.Search.BooleanQuery();
     query3.Add(query1, Lucene.Net.Search.Occur.MUST);
     query3.Add(query2, Lucene.Net.Search.Occur.MUST);
     Lucene.Net.Search.TopDocs topDocs = searcher.Search(query3, 2);
     if (topDocs.TotalHits == 0)
     {
         return(false);
     }
     return(true);
 }
Example #9
0
            override public void  Run()
            {
                IndexSearcher searcher = null;
                Query         query    = new TermQuery(new Term("content", "aaa"));

                for (int i = 0; i < this.numIteration; i++)
                {
                    try
                    {
                        searcher = new IndexSearcher(dir);
                    }
                    catch (System.Exception e)
                    {
                        hitException = true;
                        System.Console.Out.WriteLine("Stress Test Index Searcher: create hit unexpected exception: " + e.ToString());
                        System.Console.Out.WriteLine(e.StackTrace);
                        break;
                    }
                    if (searcher != null)
                    {
                        ScoreDoc[] hits = null;
                        try
                        {
                            hits = searcher.Search(query, null, 1000).scoreDocs;
                        }
                        catch (System.IO.IOException e)
                        {
                            hitException = true;
                            System.Console.Out.WriteLine("Stress Test Index Searcher: search hit unexpected exception: " + e.ToString());
                            System.Console.Out.WriteLine(e.StackTrace);
                            break;
                        }
                        // System.out.println(hits.length() + " total results");
                        try
                        {
                            searcher.Close();
                        }
                        catch (System.IO.IOException e)
                        {
                            hitException = true;
                            System.Console.Out.WriteLine("Stress Test Index Searcher: close hit unexpected exception: " + e.ToString());
                            System.Console.Out.WriteLine(e.StackTrace);
                            break;
                        }
                        searcher = null;
                    }
                }
            }
Example #10
0
        private bool videoExistsInIndex(string id, Lucene.Net.Store.Directory index)
        {
            bool exist = false;

            Lucene.Net.Search.TermQuery termQuery    = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("bctid", id));
            Lucene.Net.Search.Searcher  termSearcher = new Lucene.Net.Search.IndexSearcher(index, true);

            Lucene.Net.Search.TopScoreDocCollector termCollector = Lucene.Net.Search.TopScoreDocCollector.Create(1, true);
            termSearcher.Search(termQuery, termCollector);
            int termResults = termCollector.TopDocs().TotalHits;

            if (termResults > 0)
            {
                exist = true;
            }
            return(exist);
        }
Example #11
0
        /// <summary> Create the More like query from a PriorityQueue</summary>
        private Query CreateQuery(PriorityQueue q)
        {
            BooleanQuery query = new BooleanQuery();

            System.Object cur;
            int           qterms    = 0;
            float         bestScore = 0;

            while (((cur = q.Pop()) != null))
            {
                System.Object[] ar = (System.Object[])cur;
                TermQuery       tq = new TermQuery(new Term((System.String)ar[1], (System.String)ar[0]));

                if (boost)
                {
                    if (qterms == 0)
                    {
                        bestScore = (float)((System.Single)ar[2]);
                    }
                    float myScore = (float)((System.Single)ar[2]);

                    tq.SetBoost(myScore / bestScore);
                }

                try
                {
                    query.Add(tq, BooleanClause.Occur.SHOULD);
                }
                catch (BooleanQuery.TooManyClauses)
                {
                    break;
                }

                qterms++;
                if (maxQueryTerms > 0 && qterms >= maxQueryTerms)
                {
                    break;
                }
            }

            return(query);
        }
Example #12
0
        public static bool PreviouslyIndexed(string url)
        {
            string indexFileLocation = indexDir;
            Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(indexFileLocation, false);
            Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir);
            Lucene.Net.Search.Hits hits = null;
            try
            {
                Lucene.Net.Search.Query query = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("url", url));

                hits = searcher.Search(query);

            }
            catch { }
            finally
            {
                searcher.Close();
            }
            return hits.Length() > 0;
        }
Example #13
0
        public virtual void  TestGetValuesForIndexedDocument()
        {
            RAMDirectory dir    = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.AddDocument(MakeDocumentWithFields());
            writer.Close();

            Searcher searcher = new IndexSearcher(dir);

            // search for something that does exists
            Query query = new TermQuery(new Term("keyword", "test1"));

            // ensure that queries return expected results without DateFilter first
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            DoAssert(searcher.Doc(hits[0].doc), true);
            searcher.Close();
        }
Example #14
0
        public static List<IndexedItem> SearchProjects(string s)
        {
            List<IndexedItem> retVal = new List<IndexedItem>();

            string indexFileLocation = indexDir;
            Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(indexFileLocation, false);
            Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir);

            try
            {
                Lucene.Net.Search.Query query = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("content", s));
                query = query.Combine(new Lucene.Net.Search.Query[] { query, new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("url", fromUrl)) });
                query = query.Combine(new Lucene.Net.Search.Query[] { query, new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("title", s)) });

                //execute the query
                Lucene.Net.Search.Hits hits = searcher.Search(query);

                //iterate over the results.
                for (int i = 0; i < hits.Length(); i++)
                {
                    Lucene.Net.Documents.Document doc = hits.Doc(i);
                    string article = doc.Get("content");
                    string title = doc.Get("title");
                    string url = doc.Get("url");
                    retVal.Add(new IndexedItem { Article = article, Href = url, Title = title });
                }
                foreach (IndexedItem ind in retVal)
                {
                    Console.WriteLine(ind.Href);
                }

                retVal = retVal.Distinct().ToList();
            }
            catch { }
            finally
            {
                searcher.Close();
            }
            return retVal;
        }
Example #15
0
        void LUCENENET_100_ClientSearch()
        {
            try
            {
                Lucene.Net.Search.Searchable    s        = (Lucene.Net.Search.Searchable)Activator.GetObject(typeof(Lucene.Net.Search.Searchable), @"tcp://localhost:38087/Searcher");
                Lucene.Net.Search.MultiSearcher searcher = new Lucene.Net.Search.MultiSearcher(new Lucene.Net.Search.Searchable[] { s });

                Lucene.Net.Search.Query q = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("field1", "moon"));

                Lucene.Net.Search.Sort sort = new Lucene.Net.Search.Sort();
                sort.SetSort(new Lucene.Net.Search.SortField("field2", Lucene.Net.Search.SortField.INT));

                Lucene.Net.Search.TopDocs h = searcher.Search(q, null, 100, sort);
            }
            catch (Exception ex)
            {
                LUCENENET_100_Exception = ex;
            }
            finally
            {
                LUCENENET_100_testFinished = true;
            }
        }
Example #16
0
        public string Visit_WithValidTermQuery_ReturnsValidReponse()
        {
            var query = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("City", "TelAviv"));

            var phraseQuery = new LuceneTermQuery
            {
                LuceneQuery = query,
            };

            var luceneVisitor = new LuceneVisitor();

            phraseQuery.Accept(luceneVisitor);

            var es = phraseQuery.ESQuery;

            Assert.NotNull(es);

            var visitor = VisitorTestsUtils.CreateAndVisitRootVisitor();

            visitor.Visit((QueryStringClause)es);

            return(((QueryStringClause)es).KustoQL);
        }
Example #17
0
        public string Visit_WithValidTermQuery_ReturnsValidReponse()
        {
            var query = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("City", "TelAviv"));

            var phraseQuery = new LuceneTermQuery
            {
                LuceneQuery = query,
            };

            var luceneVisitor = new LuceneVisitor();

            phraseQuery.Accept(luceneVisitor);

            var es = phraseQuery.ESQuery;

            Assert.NotNull(es);

            var visitor = new ElasticSearchDSLVisitor(SchemaRetrieverMock.CreateMockSchemaRetriever());

            visitor.Visit((QueryStringClause)es);

            return(((QueryStringClause)es).KustoQL);
        }
Example #18
0
        public void Query(string searchText)
        {
            //state the file location of the index
            DirectoryInfo directoryInfo = new DirectoryInfo(_Location);

            Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.Open(directoryInfo);

            //create an index searcher that will perform the search
            Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir, true);

            //build a query object
            Lucene.Net.Index.Term   searchTerm = new Lucene.Net.Index.Term("content", searchText);
            Lucene.Net.Search.Query query      = new Lucene.Net.Search.TermQuery(searchTerm);

            //execute the query
            Lucene.Net.Search.TopDocs hits = searcher.Search(query, null, 100);

            //iterate over the results.
            for (int i = 0; i < hits.TotalHits; i++)
            {
                Lucene.Net.Search.ScoreDoc doc = hits.ScoreDocs[i];
            }
        }
Example #19
0
        /// <summary> Simple similarity query generators.
        /// Takes every unique word and forms a boolean query where all words are optional.
        /// After you get this you'll use to to query your <see cref="IndexSearcher"/> for similar docs.
        /// The only caveat is the first hit returned <b>should be</b> your source document - you'll
        /// need to then ignore that.
        ///
        /// <p/>
        ///
        /// So, if you have a code fragment like this:
        /// <br/>
        /// <code>
        /// Query q = formSimilaryQuery( "I use Lucene to search fast. Fast searchers are good", new StandardAnalyzer(), "contents", null);
        /// </code>
        ///
        /// <p/>
        /// The query returned, in string form, will be <c>'(i use lucene to search fast searchers are good')</c>.
        ///
        /// <p/>
        /// The philosophy behind this method is "two documents are similar if they share lots of words".
        /// Note that behind the scenes, Lucenes scoring algorithm will tend to give two documents a higher similarity score if the share more uncommon words.
        ///
        /// <P/>
        /// This method is fail-safe in that if a long 'body' is passed in and
        /// <see cref="BooleanQuery.Add(BooleanClause)"/> (used internally)
        /// throws
        /// <see cref="BooleanQuery.TooManyClauses"/>, the
        /// query as it is will be returned.
        ///
        ///
        ///
        ///
        ///
        /// </summary>
        /// <param name="body">the body of the document you want to find similar documents to
        /// </param>
        /// <param name="a">the analyzer to use to parse the body
        /// </param>
        /// <param name="field">the field you want to search on, probably something like "contents" or "body"
        /// </param>
        /// <param name="stop">optional set of stop words to ignore
        /// </param>
        /// <returns> a query with all unique words in 'body'
        /// </returns>
        /// <throws>  IOException this can't happen... </throws>
        public static Query FormSimilarQuery(System.String body, Analyzer a, System.String field, System.Collections.Hashtable stop)
        {
            TokenStream ts = a.TokenStream(field, new System.IO.StringReader(body));

            Lucene.Net.Analysis.Token t;
            BooleanQuery tmp = new BooleanQuery();

            System.Collections.Hashtable already = new System.Collections.Hashtable();             // ignore dups
            while ((t = ts.Next()) != null)
            {
                System.String word = t.TermText();
                // ignore opt stop words
                if (stop != null && stop.Contains(word))
                {
                    continue;
                }
                // ignore dups
                if (already.Contains(word) == true)
                {
                    continue;
                }
                already.Add(word, word);
                // add to query
                TermQuery tq = new TermQuery(new Term(field, word));
                try
                {
                    tmp.Add(tq, BooleanClause.Occur.SHOULD);                     //false, false);
                }
                catch (BooleanQuery.TooManyClauses)
                {
                    // fail-safe, just return what we have, not the end of the world
                    break;
                }
            }
            return(tmp);
        }
        public virtual void TestDifferentNumResults()
        {
            // test the collector w/ FacetRequests and different numResults
            DirectoryReader indexReader = DirectoryReader.Open(indexDir);
            var taxoReader = new DirectoryTaxonomyReader(taxoDir);
            IndexSearcher searcher = NewSearcher(indexReader);

            FacetsCollector sfc = new FacetsCollector();
            TermQuery q = new TermQuery(A);
            searcher.Search(q, sfc);
            Facets facets = GetTaxonomyFacetCounts(taxoReader, Config, sfc);
            FacetResult result = facets.GetTopChildren(NUM_CHILDREN_CP_A, CP_A);
            Assert.AreEqual(-1, (int)result.Value);
            foreach (LabelAndValue labelValue in result.LabelValues)
            {
                Assert.AreEqual(termExpectedCounts[CP_A + "/" + labelValue.label], labelValue.value);
            }
            result = facets.GetTopChildren(NUM_CHILDREN_CP_B, CP_B);
            Assert.AreEqual(termExpectedCounts[CP_B], result.Value);
            foreach (LabelAndValue labelValue in result.LabelValues)
            {
                Assert.AreEqual(termExpectedCounts[CP_B + "/" + labelValue.label], labelValue.value);
            }

            IOUtils.Close(indexReader, taxoReader);
        }
Example #21
0
		public virtual void  TestGetValuesForIndexedDocument()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			writer.AddDocument(MakeDocumentWithFields());
			writer.Close();
			
			Searcher searcher = new IndexSearcher(dir);
			
			// search for something that does exists
			Query query = new TermQuery(new Term("keyword", "test1"));
			
			// ensure that queries return expected results without DateFilter first
			ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
			Assert.AreEqual(1, hits.Length);
			
			DoAssert(searcher.Doc(hits[0].doc), true);
			searcher.Close();
		}
        public virtual void TestDuringAddIndexes_LuceneNet()
        {
            MockRAMDirectory dir1   = new MockRAMDirectory();
            IndexWriter      writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);

            writer.SetInfoStream(infoStream);
            writer.SetMergeFactor(2);

            // create the index
            CreateIndexNoClose(false, "test", writer);
            writer.Commit();

            Directory[] dirs = new Directory[10];
            for (int i = 0; i < 10; i++)
            {
                dirs[i] = new MockRAMDirectory(dir1);
            }

            IndexReader r = writer.GetReader();

            int   NUM_THREAD = 5;
            float SECONDS    = 3;

            long endTime = (long)((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 1000.0 * SECONDS);

            System.Collections.IList excs = (System.Collections.IList)System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(new System.Collections.ArrayList()));

            SupportClass.ThreadClass[] threads = new SupportClass.ThreadClass[NUM_THREAD];
            for (int i = 0; i < NUM_THREAD; i++)
            {
                threads[i] = new AnonymousClassThread(endTime, writer, dirs, excs, this);
                threads[i].IsBackground = true;
                threads[i].Start();
            }

            int lastCount = 0;

            while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < endTime)
            {
                using (IndexReader r2 = writer.GetReader())
                {
                    Query q     = new TermQuery(new Term("indexname", "test"));
                    int   count = new IndexSearcher(r2).Search(q, 10).TotalHits;
                    Assert.IsTrue(count >= lastCount);
                    lastCount = count;
                }
            }

            for (int i = 0; i < NUM_THREAD; i++)
            {
                threads[i].Join();
            }

            Assert.AreEqual(0, excs.Count);
            r.Close();
            Assert.AreEqual(0, dir1.GetOpenDeletedFiles().Count);
            writer.Close();

            _TestUtil.CheckIndex(dir1);

            dir1.Close();
        }
        public virtual void TestAfterClose()
        {
            Directory dir1 = GetAssertNoDeletesDirectory(NewDirectory());
            IndexWriter writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));

            // create the index
            CreateIndexNoClose(false, "test", writer);

            DirectoryReader r = writer.Reader;
            writer.Dispose();

            TestUtil.CheckIndex(dir1);

            // reader should remain usable even after IndexWriter is closed:
            Assert.AreEqual(100, r.NumDocs);
            Query q = new TermQuery(new Term("indexname", "test"));
            IndexSearcher searcher = NewSearcher(r);
            Assert.AreEqual(100, searcher.Search(q, 10).TotalHits);
            try
            {
                DirectoryReader.OpenIfChanged(r);
                Assert.Fail("failed to hit AlreadyClosedException");
            }
            catch (AlreadyClosedException ace)
            {
                // expected
            }
            r.Dispose();
            dir1.Dispose();
        }
		public virtual void  TestMaxSizeHighlightTruncates()
		{
			System.String goodWord = "goodtoken";
			System.String[] stopWords = new System.String[]{"stoppedtoken"};
			
			TermQuery query = new TermQuery(new Term("data", goodWord));
			SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
			Highlighter hg = new Highlighter(fm, new QueryScorer(query));
			hg.SetTextFragmenter(new NullFragmenter());
			
			System.String match = null;
			System.Text.StringBuilder sb = new System.Text.StringBuilder();
			sb.Append(goodWord);
			for (int i = 0; i < 10000; i++)
			{
				sb.Append(" ");
				sb.Append(stopWords[0]);
			}
			
			hg.SetMaxDocBytesToAnalyze(100);
			match = hg.GetBestFragment(new StandardAnalyzer(stopWords), "data", sb.ToString());
			Assert.IsTrue(match.Length < hg.GetMaxDocBytesToAnalyze(), "Matched text should be no more than 100 chars in length ");
			
			//add another tokenized word to the overrall length - but set way beyond 
			//the length of text under consideration (after a large slug of stop words + whitespace)
			sb.Append(" ");
			sb.Append(goodWord);
			match = hg.GetBestFragment(new StandardAnalyzer(stopWords), "data", sb.ToString());
			Assert.IsTrue(match.Length < hg.GetMaxDocBytesToAnalyze(), "Matched text should be no more than 100 chars in length ");
		}
Example #25
0
        public virtual void TestFarsiRangeFilterCollating(Analyzer analyzer, BytesRef firstBeg, BytesRef firstEnd, BytesRef secondBeg, BytesRef secondEnd)
        {
            Directory dir = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document doc = new Document();
            doc.Add(new TextField("content", "\u0633\u0627\u0628", Field.Store.YES));
            doc.Add(new StringField("body", "body", Field.Store.YES));
            writer.AddDocument(doc);
            writer.Dispose();
            IndexReader reader = DirectoryReader.Open(dir);
            IndexSearcher searcher = new IndexSearcher(reader);
            Query query = new TermQuery(new Term("body", "body"));

            // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
            // orders the U+0698 character before the U+0633 character, so the single
            // index Term below should NOT be returned by a TermRangeFilter with a Farsi
            // Collator (or an Arabic one for the case when Farsi searcher not
            // supported).
            ScoreDoc[] result = searcher.Search(query, new TermRangeFilter("content", firstBeg, firstEnd, true, true), 1).ScoreDocs;
            Assert.AreEqual(0, result.Length, "The index Term should not be included.");

            result = searcher.Search(query, new TermRangeFilter("content", secondBeg, secondEnd, true, true), 1).ScoreDocs;
            Assert.AreEqual(1, result.Length, "The index Term should be included.");

            reader.Dispose();
            dir.Dispose();
        }
        public virtual void TestDuringAddIndexes()
        {
            Directory dir1 = GetAssertNoDeletesDirectory(NewDirectory());
            IndexWriter writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy(2)));

            // create the index
            CreateIndexNoClose(false, "test", writer);
            writer.Commit();

            Directory[] dirs = new Directory[10];
            for (int i = 0; i < 10; i++)
            {
                dirs[i] = new MockDirectoryWrapper(Random(), new RAMDirectory(dir1, NewIOContext(Random())));
            }

            DirectoryReader r = writer.Reader;

            const float SECONDS = 0.5f;

            long endTime = (long)(Environment.TickCount + 1000.0 * SECONDS);
            IList<Exception> excs = new SynchronizedCollection<Exception>();

            // Only one thread can addIndexes at a time, because
            // IndexWriter acquires a write lock in each directory:
            var threads = new ThreadClass[1];
            for (int i = 0; i < threads.Length; i++)
            {
                threads[i] = new ThreadAnonymousInnerClassHelper(writer, dirs, endTime, excs);
                threads[i].SetDaemon(true);
                threads[i].Start();
            }

            int lastCount = 0;
            while (Environment.TickCount < endTime)
            {
                DirectoryReader r2 = DirectoryReader.OpenIfChanged(r);
                if (r2 != null)
                {
                    r.Dispose();
                    r = r2;
                }
                Query q = new TermQuery(new Term("indexname", "test"));
                IndexSearcher searcher = NewSearcher(r);
                int count = searcher.Search(q, 10).TotalHits;
                Assert.IsTrue(count >= lastCount);
                lastCount = count;
            }

            for (int i = 0; i < threads.Length; i++)
            {
                threads[i].Join();
            }
            // final check
            DirectoryReader dr2 = DirectoryReader.OpenIfChanged(r);
            if (dr2 != null)
            {
                r.Dispose();
                r = dr2;
            }
            Query q2 = new TermQuery(new Term("indexname", "test"));
            IndexSearcher searcher_ = NewSearcher(r);
            int count_ = searcher_.Search(q2, 10).TotalHits;
            Assert.IsTrue(count_ >= lastCount);

            Assert.AreEqual(0, excs.Count);
            r.Dispose();
            if (dir1 is MockDirectoryWrapper)
            {
                ICollection<string> openDeletedFiles = ((MockDirectoryWrapper)dir1).OpenDeletedFiles;
                Assert.AreEqual(0, openDeletedFiles.Count, "openDeleted=" + openDeletedFiles);
            }

            writer.Dispose();

            dir1.Dispose();
        }
        public virtual void  TestAfterClose()
        {
            Directory dir1 = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
            writer.SetInfoStream(infoStream);
            
            // create the index
            CreateIndexNoClose(false, "test", writer);
            
            IndexReader r = writer.GetReader();
            writer.Close();
            
            _TestUtil.CheckIndex(dir1);
            
            // reader should remain usable even after IndexWriter is closed:
            Assert.AreEqual(100, r.NumDocs());
            Query q = new TermQuery(new Term("indexname", "test"));
            Assert.AreEqual(100, new IndexSearcher(r).Search(q, 10).TotalHits);

            Assert.Throws<AlreadyClosedException>(() => r.Reopen(), "failed to hit AlreadyClosedException");

            r.Close();
            dir1.Close();
        }
Example #28
0
		public virtual void  TestBasic()
		{
			Directory dir = new MockRAMDirectory();
			Analyzer analyzer = new StandardAnalyzer();
			IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
			writer.SetMergeFactor(2);
			writer.SetMaxBufferedDocs(2);
			writer.SetSimilarity(new SimpleSimilarity());
			
			
			System.Text.StringBuilder sb = new System.Text.StringBuilder(265);
			System.String term = "term";
			for (int i = 0; i < 30; i++)
			{
				Document d = new Document();
				sb.Append(term).Append(" ");
				System.String content = sb.ToString();
				Field noTf = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED);
				noTf.SetOmitTermFreqAndPositions(true);
				d.Add(noTf);
				
				Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED);
				d.Add(tf);
				
				writer.AddDocument(d);
				//System.out.println(d);
			}
			
			writer.Optimize();
			// flush
			writer.Close();
			_TestUtil.CheckIndex(dir);
			
			/*
			* Verify the index
			*/
			Searcher searcher = new IndexSearcher(dir);
			searcher.SetSimilarity(new SimpleSimilarity());
			
			Term a = new Term("noTf", term);
			Term b = new Term("tf", term);
			Term c = new Term("noTf", "notf");
			Term d2 = new Term("tf", "tf");
			TermQuery q1 = new TermQuery(a);
			TermQuery q2 = new TermQuery(b);
			TermQuery q3 = new TermQuery(c);
			TermQuery q4 = new TermQuery(d2);
			
			
			searcher.Search(q1, new AnonymousClassCountingHitCollector(this));
			//System.out.println(CountingHitCollector.getCount());
			
			
			searcher.Search(q2, new AnonymousClassCountingHitCollector1(this));
			//System.out.println(CountingHitCollector.getCount());
			
			
			
			
			
			searcher.Search(q3, new AnonymousClassCountingHitCollector2(this));
			//System.out.println(CountingHitCollector.getCount());
			
			
			searcher.Search(q4, new AnonymousClassCountingHitCollector3(this));
			//System.out.println(CountingHitCollector.getCount());
			
			
			
			BooleanQuery bq = new BooleanQuery();
			bq.Add(q1, Occur.MUST);
			bq.Add(q4, Occur.MUST);
			
			searcher.Search(bq, new AnonymousClassCountingHitCollector4(this));
			Assert.IsTrue(15 == CountingHitCollector.GetCount());
			
			searcher.Close();
			dir.Close();
		}
        public virtual void TestDuringAddIndexes_LuceneNet()
        {
            MockRAMDirectory dir1 = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
            writer.SetInfoStream(infoStream);
            writer.MergeFactor = 2;

            // create the index
            CreateIndexNoClose(false, "test", writer);
            writer.Commit();

            Directory[] dirs = new Directory[10];
            for (int i = 0; i < 10; i++)
            {
                dirs[i] = new MockRAMDirectory(dir1);
            }

            IndexReader r = writer.GetReader();

            int NUM_THREAD = 5;
            float SECONDS = 3;

            long endTime = (long)((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 1000.0 * SECONDS);
            System.Collections.IList excs = (System.Collections.IList)System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(new System.Collections.ArrayList()));

            ThreadClass[] threads = new ThreadClass[NUM_THREAD];
            for (int i = 0; i < NUM_THREAD; i++)
            {
                threads[i] = new AnonymousClassThread(endTime, writer, dirs, excs, this);
                threads[i].IsBackground = true;
                threads[i].Start();
            }

            int lastCount = 0;
            while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < endTime)
            {
                using (IndexReader r2 = writer.GetReader())
                {
                    Query q = new TermQuery(new Term("indexname", "test"));
                    int count = new IndexSearcher(r2).Search(q, 10).TotalHits;
                    Assert.IsTrue(count >= lastCount);
                    lastCount = count;
                }
            }

            for (int i = 0; i < NUM_THREAD; i++)
            {
                threads[i].Join();
            }

            Assert.AreEqual(0, excs.Count);
            r.Close();
            Assert.AreEqual(0, dir1.GetOpenDeletedFiles().Count);
            writer.Close();

            _TestUtil.CheckIndex(dir1);

            dir1.Close();
        }
 public virtual void  TestDuringAddDelete()
 {
     Directory dir1 = new MockRAMDirectory();
     IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
     writer.SetInfoStream(infoStream);
     writer.MergeFactor = 2;
     
     // create the index
     CreateIndexNoClose(false, "test", writer);
     writer.Commit();
     
     IndexReader r = writer.GetReader();
     
     int NUM_THREAD = 5;
     float SECONDS = 3;
     
     long endTime = (long) ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 1000.0 * SECONDS);
     System.Collections.IList excs = (System.Collections.IList) System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(new System.Collections.ArrayList()));
     
     ThreadClass[] threads = new ThreadClass[NUM_THREAD];
     for (int i = 0; i < NUM_THREAD; i++)
     {
         threads[i] = new AnonymousClassThread1(endTime, writer, excs, this);
         threads[i].IsBackground = true;
         threads[i].Start();
     }
     
     int sum = 0;
     while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < endTime)
     {
         IndexReader r2 = r.Reopen();
         if (r2 != r)
         {
             r.Close();
             r = r2;
         }
         Query q = new TermQuery(new Term("indexname", "test"));
         sum += new IndexSearcher(r).Search(q, 10).TotalHits;
     }
     
     for (int i = 0; i < NUM_THREAD; i++)
     {
         threads[i].Join();
     }
     Assert.IsTrue(sum > 0);
     
     Assert.AreEqual(0, excs.Count);
     writer.Close();
     
     _TestUtil.CheckIndex(dir1);
     r.Close();
     dir1.Close();
 }
        public virtual void TestSegmentsWithoutCategoriesOrResults()
        {
            // tests the accumulator when there are segments with no results
            var indexDir = NewDirectory();
            var taxoDir = NewDirectory();

            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            //iwc.MergePolicy = NoMergePolicy.INSTANCE; // prevent merges
            IndexWriter indexWriter = new IndexWriter(indexDir, iwc);

            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig config = new FacetsConfig();
            indexTwoDocs(taxoWriter, indexWriter, config, false); // 1st segment, no content, with categories
            indexTwoDocs(taxoWriter, indexWriter, null, true); // 2nd segment, with content, no categories
            indexTwoDocs(taxoWriter, indexWriter, config, true); // 3rd segment ok
            indexTwoDocs(taxoWriter, indexWriter, null, false); // 4th segment, no content, or categories
            indexTwoDocs(taxoWriter, indexWriter, null, true); // 5th segment, with content, no categories
            indexTwoDocs(taxoWriter, indexWriter, config, true); // 6th segment, with content, with categories
            indexTwoDocs(taxoWriter, indexWriter, null, true); // 7th segment, with content, no categories
            IOUtils.Close(indexWriter, taxoWriter);

            DirectoryReader indexReader = DirectoryReader.Open(indexDir);
            var taxoReader = new DirectoryTaxonomyReader(taxoDir);
            IndexSearcher indexSearcher = NewSearcher(indexReader);

            // search for "f:a", only segments 1 and 3 should match results
            Query q = new TermQuery(new Term("f", "a"));
            FacetsCollector sfc = new FacetsCollector();
            indexSearcher.Search(q, sfc);
            Facets facets = GetTaxonomyFacetCounts(taxoReader, config, sfc);
            FacetResult result = facets.GetTopChildren(10, "A");
            Assert.AreEqual(2, result.LabelValues.Length, "wrong number of children");
            foreach (LabelAndValue labelValue in result.LabelValues)
            {
                Assert.AreEqual(2, (int)labelValue.value, "wrong weight for child " + labelValue.label);
            }

            IOUtils.Close(indexReader, taxoReader, indexDir, taxoDir);
        }
Example #32
0
        public virtual void  TestGetValuesForIndexedDocument()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true);
			writer.AddDocument(MakeDocumentWithFields());
			writer.Close();
			
			Searcher searcher = new IndexSearcher(dir);
			
			// search for something that does exists
			Query query = new TermQuery(new Term("keyword", "test1"));
			
			// ensure that queries return expected results without DateFilter first
			Hits hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			
			try
			{
				DoAssert(hits.Doc(0), true);
			}
			catch (System.Exception e)
			{
                System.Console.Error.WriteLine(e.StackTrace);
				System.Console.Error.Write("\n");
			}
			finally
			{
				searcher.Close();
			}
		}
Example #33
0
        public virtual void  TestBasic()
        {
            Directory   dir      = new MockRAMDirectory();
            Analyzer    analyzer = new StandardAnalyzer();
            IndexWriter writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetMergeFactor(2);
            writer.SetMaxBufferedDocs(2);
            writer.SetSimilarity(new SimpleSimilarity());


            System.Text.StringBuilder sb   = new System.Text.StringBuilder(265);
            System.String             term = "term";
            for (int i = 0; i < 30; i++)
            {
                Document d = new Document();
                sb.Append(term).Append(" ");
                System.String content = sb.ToString();
                Field         noTf    = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED);
                noTf.SetOmitTermFreqAndPositions(true);
                d.Add(noTf);

                Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED);
                d.Add(tf);

                writer.AddDocument(d);
                //System.out.println(d);
            }

            writer.Optimize();
            // flush
            writer.Close();
            _TestUtil.CheckIndex(dir);

            /*
             * Verify the index
             */
            Searcher searcher = new IndexSearcher(dir);

            searcher.SetSimilarity(new SimpleSimilarity());

            Term      a  = new Term("noTf", term);
            Term      b  = new Term("tf", term);
            Term      c  = new Term("noTf", "notf");
            Term      d2 = new Term("tf", "tf");
            TermQuery q1 = new TermQuery(a);
            TermQuery q2 = new TermQuery(b);
            TermQuery q3 = new TermQuery(c);
            TermQuery q4 = new TermQuery(d2);


            searcher.Search(q1, new AnonymousClassCountingHitCollector(this));
            //System.out.println(CountingHitCollector.getCount());


            searcher.Search(q2, new AnonymousClassCountingHitCollector1(this));
            //System.out.println(CountingHitCollector.getCount());



            searcher.Search(q3, new AnonymousClassCountingHitCollector2(this));
            //System.out.println(CountingHitCollector.getCount());


            searcher.Search(q4, new AnonymousClassCountingHitCollector3(this));
            //System.out.println(CountingHitCollector.getCount());



            BooleanQuery bq = new BooleanQuery();

            bq.Add(q1, Occur.MUST);
            bq.Add(q4, Occur.MUST);

            searcher.Search(bq, new AnonymousClassCountingHitCollector4(this));
            Assert.IsTrue(15 == CountingHitCollector.GetCount());

            searcher.Close();
            dir.Close();
        }
        public virtual void TestScoring()
        {
            // verify that drill-down queries do not modify scores
            IndexSearcher searcher = NewSearcher(reader);

            float[] scores = new float[reader.MaxDoc];

            Query q = new TermQuery(new Term("content", "foo"));
            TopDocs docs = searcher.Search(q, reader.MaxDoc); // fetch all available docs to this query
            foreach (ScoreDoc sd in docs.ScoreDocs)
            {
                scores[sd.Doc] = sd.Score;
            }

            // create a drill-down query with category "a", scores should not change
            DrillDownQuery q2 = new DrillDownQuery(config, q);
            q2.Add("a");
            docs = searcher.Search(q2, reader.MaxDoc); // fetch all available docs to this query
            foreach (ScoreDoc sd in docs.ScoreDocs)
            {
                Assert.AreEqual(scores[sd.Doc], sd.Score, 0f, "score of doc=" + sd.Doc + " modified");
            }
        }
        public virtual void  TestKeepLastNDeletionPolicyWithReader()
        {
            int N = 10;

            for (int pass = 0; pass < 2; pass++)
            {
                bool useCompoundFile = (pass % 2) != 0;

                KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N);

                Directory   dir    = new RAMDirectory();
                IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, policy, IndexWriter.MaxFieldLength.UNLIMITED);
                writer.UseCompoundFile = useCompoundFile;
                writer.Close();
                Term  searchTerm = new Term("content", "aaa");
                Query query      = new TermQuery(searchTerm);

                for (int i = 0; i < N + 1; i++)
                {
                    writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.UNLIMITED);
                    writer.UseCompoundFile = useCompoundFile;
                    for (int j = 0; j < 17; j++)
                    {
                        AddDoc(writer);
                    }
                    // this is a commit
                    writer.Close();
                    IndexReader reader = IndexReader.Open(dir, policy, false);
                    reader.DeleteDocument(3 * i + 1);
                    reader.SetNorm(4 * i + 1, "content", 2.0F);
                    IndexSearcher searcher = new IndexSearcher(reader);
                    ScoreDoc[]    hits     = searcher.Search(query, null, 1000).ScoreDocs;
                    Assert.AreEqual(16 * (1 + i), hits.Length);
                    // this is a commit
                    reader.Close();
                    searcher.Close();
                }
                writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.UNLIMITED);
                writer.UseCompoundFile = useCompoundFile;
                writer.Optimize();
                // this is a commit
                writer.Close();

                Assert.AreEqual(2 * (N + 2), policy.numOnInit);
                Assert.AreEqual(2 * (N + 2) - 1, policy.numOnCommit);

                IndexSearcher searcher2 = new IndexSearcher(dir, false);
                ScoreDoc[]    hits2     = searcher2.Search(query, null, 1000).ScoreDocs;
                Assert.AreEqual(176, hits2.Length);

                // Simplistic check: just verify only the past N segments_N's still
                // exist, and, I can open a reader on each:
                long gen = SegmentInfos.GetCurrentSegmentGeneration(dir);

                dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
                int expectedCount = 176;

                for (int i = 0; i < N + 1; i++)
                {
                    try
                    {
                        IndexReader reader = IndexReader.Open(dir, true);

                        // Work backwards in commits on what the expected
                        // count should be.
                        searcher2 = new IndexSearcher(reader);
                        hits2     = searcher2.Search(query, null, 1000).ScoreDocs;
                        if (i > 1)
                        {
                            if (i % 2 == 0)
                            {
                                expectedCount += 1;
                            }
                            else
                            {
                                expectedCount -= 17;
                            }
                        }
                        Assert.AreEqual(expectedCount, hits2.Length);
                        searcher2.Close();
                        reader.Close();
                        if (i == N)
                        {
                            Assert.Fail("should have failed on commits before last 5");
                        }
                    }
                    catch (System.IO.IOException e)
                    {
                        if (i != N)
                        {
                            throw e;
                        }
                    }
                    if (i < N)
                    {
                        dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
                    }
                    gen--;
                }

                dir.Close();
            }
        }
        public virtual void  TestDuringAddIndexes()
        {
            MockRAMDirectory dir1   = new MockRAMDirectory();
            IndexWriter      writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);

            writer.SetInfoStream(infoStream);
            writer.MergeFactor = 2;

            // create the index
            CreateIndexNoClose(false, "test", writer);
            writer.Commit();

            Directory[] dirs = new Directory[10];
            for (int i = 0; i < 10; i++)
            {
                dirs[i] = new MockRAMDirectory(dir1);
            }

            IndexReader r = writer.GetReader();

            int   NUM_THREAD = 5;
            float SECONDS    = 3;

            long endTime = (long)((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 1000.0 * SECONDS);

            System.Collections.IList excs = (System.Collections.IList)System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(new System.Collections.ArrayList()));

            ThreadClass[] threads = new ThreadClass[NUM_THREAD];
            for (int i = 0; i < NUM_THREAD; i++)
            {
                threads[i] = new AnonymousClassThread(endTime, writer, dirs, excs, this);
                threads[i].IsBackground = true;
                threads[i].Start();
            }

            int lastCount = 0;

            while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < endTime)
            {
                IndexReader r2 = r.Reopen();
                if (r2 != r)
                {
                    r.Close();
                    r = r2;
                }
                Query q     = new TermQuery(new Term("indexname", "test"));
                int   count = new IndexSearcher(r).Search(q, 10).TotalHits;
                Assert.IsTrue(count >= lastCount);
                lastCount = count;
            }

            for (int i = 0; i < NUM_THREAD; i++)
            {
                threads[i].Join();
            }

            Assert.AreEqual(0, excs.Count);
            r.Close();
            try
            {
                Assert.AreEqual(0, dir1.GetOpenDeletedFiles().Count);
            }
            catch
            {
                //DIGY:
                //I think this is an expected behaviour.
                //There isn't any pending files to be deleted after "writer.Close()".
                //But, since lucene.java's test case is designed that way
                //and I might be wrong, I will add a warning

                // Assert only in debug mode, so that CheckIndex is called during release.
#if DEBUG
                Assert.Inconclusive("", 0, dir1.GetOpenDeletedFiles().Count);
#endif
            }
            writer.Close();

            _TestUtil.CheckIndex(dir1);

            dir1.Close();
        }
Example #37
0
 private static void GetTermsFromTermQuery(TermQuery query, HashSetSupport terms, string fieldName)
 {
     if ((fieldName == null) || (query.GetTerm().Field() == (string)(object)fieldName))
     {
         terms.Add(new WeightedTerm(query.GetBoost(), query.GetTerm().Text()));
     }
 }
Example #38
0
        public virtual void  TestKeepLastNDeletionPolicyWithCreates()
        {
            int N = 10;

            for (int pass = 0; pass < 4; pass++)
            {
                bool autoCommit      = pass < 2;
                bool useCompoundFile = (pass % 2) > 0;

                KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N);

                Directory   dir    = new RAMDirectory();
                IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
                writer.SetMaxBufferedDocs(10);
                writer.SetUseCompoundFile(useCompoundFile);
                writer.Close();
                Term  searchTerm = new Term("content", "aaa");
                Query query      = new TermQuery(searchTerm);

                for (int i = 0; i < N + 1; i++)
                {
                    writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
                    writer.SetMaxBufferedDocs(10);
                    writer.SetUseCompoundFile(useCompoundFile);
                    for (int j = 0; j < 17; j++)
                    {
                        AddDoc(writer);
                    }
                    // this is a commit when autoCommit=false:
                    writer.Close();
                    IndexReader reader = IndexReader.Open(dir, policy);
                    reader.DeleteDocument(3);
                    reader.SetNorm(5, "content", 2.0F);
                    IndexSearcher searcher = new IndexSearcher(reader);
                    ScoreDoc[]    hits     = searcher.Search(query, null, 1000).scoreDocs;
                    Assert.AreEqual(16, hits.Length);
                    // this is a commit when autoCommit=false:
                    reader.Close();
                    searcher.Close();

                    writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
                    // This will not commit: there are no changes
                    // pending because we opened for "create":
                    writer.Close();
                }

                Assert.AreEqual(1 + 3 * (N + 1), policy.numOnInit);
                if (!autoCommit)
                {
                    Assert.AreEqual(3 * (N + 1), policy.numOnCommit);
                }

                IndexSearcher searcher2 = new IndexSearcher(dir);
                ScoreDoc[]    hits2     = searcher2.Search(query, null, 1000).scoreDocs;
                Assert.AreEqual(0, hits2.Length);

                // Simplistic check: just verify only the past N segments_N's still
                // exist, and, I can open a reader on each:
                long gen = SegmentInfos.GetCurrentSegmentGeneration(dir);

                dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
                int expectedCount = 0;

                for (int i = 0; i < N + 1; i++)
                {
                    try
                    {
                        IndexReader reader = IndexReader.Open(dir);

                        // Work backwards in commits on what the expected
                        // count should be.  Only check this in the
                        // autoCommit false case:
                        if (!autoCommit)
                        {
                            searcher2 = new IndexSearcher(reader);
                            hits2     = searcher2.Search(query, null, 1000).scoreDocs;
                            Assert.AreEqual(expectedCount, hits2.Length);
                            searcher2.Close();
                            if (expectedCount == 0)
                            {
                                expectedCount = 16;
                            }
                            else if (expectedCount == 16)
                            {
                                expectedCount = 17;
                            }
                            else if (expectedCount == 17)
                            {
                                expectedCount = 0;
                            }
                        }
                        reader.Close();
                        if (i == N)
                        {
                            Assert.Fail("should have failed on commits before last " + N);
                        }
                    }
                    catch (System.IO.IOException e)
                    {
                        if (i != N)
                        {
                            throw e;
                        }
                    }
                    if (i < N)
                    {
                        dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
                    }
                    gen--;
                }

                dir.Close();
            }
        }
Example #39
0
        public virtual void  TestTerm()
        {
            Query query = new TermQuery(new Term("field", "seventy"));

            CheckHits(query, new int[] { 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979 });
        }
        public virtual void TestDuringAddDelete()
        {
            Directory dir1 = NewDirectory();
            var writer = new IndexWriter(dir1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy(2)));

            // create the index
            CreateIndexNoClose(false, "test", writer);
            writer.Commit();

            DirectoryReader r = writer.Reader;

            const float SECONDS = 0.5f;

            long endTime = (long)(Environment.TickCount + 1000.0 * SECONDS);
            IList<Exception> excs = new SynchronizedCollection<Exception>();

            var threads = new ThreadClass[NumThreads];
            for (int i = 0; i < NumThreads; i++)
            {
                threads[i] = new ThreadAnonymousInnerClassHelper2(writer, r, endTime, excs);
                threads[i].SetDaemon(true);
                threads[i].Start();
            }

            int sum = 0;
            while (Environment.TickCount < endTime)
            {
                DirectoryReader r2 = DirectoryReader.OpenIfChanged(r);
                if (r2 != null)
                {
                    r.Dispose();
                    r = r2;
                }
                Query q = new TermQuery(new Term("indexname", "test"));
                IndexSearcher searcher = NewSearcher(r);
                sum += searcher.Search(q, 10).TotalHits;
            }

            for (int i = 0; i < NumThreads; i++)
            {
                threads[i].Join();
            }
            // at least search once
            DirectoryReader dr2 = DirectoryReader.OpenIfChanged(r);
            if (dr2 != null)
            {
                r.Dispose();
                r = dr2;
            }
            Query q2 = new TermQuery(new Term("indexname", "test"));
            IndexSearcher indSearcher = NewSearcher(r);
            sum += indSearcher.Search(q2, 10).TotalHits;
            Assert.IsTrue(sum > 0, "no documents found at all");

            Assert.AreEqual(0, excs.Count);
            writer.Dispose();

            r.Dispose();
            dir1.Dispose();
        }
Example #41
0
        public virtual void  TestTerm2()
        {
            Query query = new TermQuery(new Term("field", "seventish"));

            CheckHits(query, new int[] {});
        }
		public virtual void  TestOffByOne()
		{
			TermQuery query = new TermQuery(new Term("data", "help"));
			Highlighter hg = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(query));
			hg.SetTextFragmenter(new NullFragmenter());
			
			System.String match = null;
			match = hg.GetBestFragment(new StandardAnalyzer(), "data", "help me [54-65]");
			Assert.AreEqual("<B>help</B> me [54-65]", match);
		}
Example #43
0
			override public void  Run()
			{
				IndexSearcher searcher = null;
				Query query = new TermQuery(new Term("content", "aaa"));
				for (int i = 0; i < this.numIteration; i++)
				{
					try
					{
						searcher = new IndexSearcher(dir);
					}
					catch (System.Exception e)
					{
						hitException = true;
						System.Console.Out.WriteLine("Stress Test Index Searcher: create hit unexpected exception: " + e.ToString());
						System.Console.Out.WriteLine(e.StackTrace);
						break;
					}
					if (searcher != null)
					{
						ScoreDoc[] hits = null;
						try
						{
							hits = searcher.Search(query, null, 1000).scoreDocs;
						}
						catch (System.IO.IOException e)
						{
							hitException = true;
							System.Console.Out.WriteLine("Stress Test Index Searcher: search hit unexpected exception: " + e.ToString());
							System.Console.Out.WriteLine(e.StackTrace);
							break;
						}
						// System.out.println(hits.length() + " total results");
						try
						{
							searcher.Close();
						}
						catch (System.IO.IOException e)
						{
							hitException = true;
							System.Console.Out.WriteLine("Stress Test Index Searcher: close hit unexpected exception: " + e.ToString());
							System.Console.Out.WriteLine(e.StackTrace);
							break;
						}
						searcher = null;
					}
				}
			}
Example #44
0
 public override void Run()
 {
     IndexReader reader = null;
     IndexSearcher searcher = null;
     Query query = new TermQuery(new Term("content", "aaa"));
     for (int i = 0; i < this.NumIteration; i++)
     {
         try
         {
             reader = DirectoryReader.Open(Dir);
             searcher = NewSearcher(reader);
         }
         catch (Exception e)
         {
             HitException = true;
             Console.WriteLine("Stress Test Index Searcher: create hit unexpected exception: " + e.ToString());
             Console.Out.Write(e.StackTrace);
             break;
         }
         try
         {
             searcher.Search(query, null, 1000);
         }
         catch (IOException e)
         {
             HitException = true;
             Console.WriteLine("Stress Test Index Searcher: search hit unexpected exception: " + e.ToString());
             Console.Out.Write(e.StackTrace);
             break;
         }
         // System.out.println(hits.Length() + " total results");
         try
         {
             reader.Dispose();
         }
         catch (IOException e)
         {
             HitException = true;
             Console.WriteLine("Stress Test Index Searcher: close hit unexpected exception: " + e.ToString());
             Console.Out.Write(e.StackTrace);
             break;
         }
     }
 }
Example #45
0
        /// <summary> Simple similarity query generators.
        /// Takes every unique word and forms a boolean query where all words are optional.
        /// After you get this you'll use to to query your {@link IndexSearcher} for similar docs.
        /// The only caveat is the first hit returned <b>should be</b> your source document - you'll
        /// need to then ignore that.
        /// 
        /// <p>
        /// 
        /// So, if you have a code fragment like this:
        /// <br>
        /// <code>
        /// Query q = formSimilaryQuery( "I use Lucene to search fast. Fast searchers are good", new StandardAnalyzer(), "contents", null);
        /// </code>
        /// 
        /// <p>
        /// 
        /// </summary>
        /// <summary> The query returned, in string form, will be <code>'(i use lucene to search fast searchers are good')</code>.
        /// 
        /// <p>
        /// The philosophy behind this method is "two documents are similar if they share lots of words".
        /// Note that behind the scenes, Lucenes scoring algorithm will tend to give two documents a higher similarity score if the share more uncommon words.
        /// 
        /// <P>
        /// This method is fail-safe in that if a long 'body' is passed in and
        /// {@link BooleanQuery#add BooleanQuery.add()} (used internally)
        /// throws
        /// {@link org.apache.lucene.search.BooleanQuery.TooManyClauses BooleanQuery.TooManyClauses}, the
        /// query as it is will be returned.
        /// 
        /// 
        /// 
        /// 
        /// 
        /// </summary>
        /// <param name="body">the body of the document you want to find similar documents to
        /// </param>
        /// <param name="a">the analyzer to use to parse the body
        /// </param>
        /// <param name="field">the field you want to search on, probably something like "contents" or "body"
        /// </param>
        /// <param name="stop">optional set of stop words to ignore
        /// </param>
        /// <returns> a query with all unique words in 'body'
        /// </returns>
        /// <throws>  IOException this can't happen... </throws>
        public static Query FormSimilarQuery(System.String body, Analyzer a, System.String field, System.Collections.Hashtable stop)
        {
            TokenStream ts = a.TokenStream(field, new System.IO.StringReader(body));
            TermAttribute termAtt = (TermAttribute)ts.AddAttribute(typeof(TermAttribute));

            BooleanQuery tmp = new BooleanQuery();
            System.Collections.Hashtable already = new System.Collections.Hashtable(); // ignore dups
            while (ts.IncrementToken())
            {
                String word = termAtt.Term();
                // ignore opt stop words
                if (stop != null && stop.Contains(word))
                    continue;
                // ignore dups
                if (already.Contains(word) == true)
                    continue;
                already.Add(word, word);
                // add to query
                TermQuery tq = new TermQuery(new Term(field, word));
                try
                {
                    tmp.Add(tq, BooleanClause.Occur.SHOULD);
                }
                catch (BooleanQuery.TooManyClauses)
                {
                    // fail-safe, just return what we have, not the end of the world
                    break;
                }
            }
            return tmp;
        }
Example #46
0
		public virtual void  TestFieldSetValue()
		{
			
			Field field = new Field("id", "id1", Field.Store.YES, Field.Index.NOT_ANALYZED);
			Document doc = new Document();
			doc.Add(field);
			doc.Add(new Field("keyword", "test", Field.Store.YES, Field.Index.NOT_ANALYZED));
			
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			writer.AddDocument(doc);
			field.SetValue("id2");
			writer.AddDocument(doc);
			field.SetValue("id3");
			writer.AddDocument(doc);
			writer.Close();
			
			Searcher searcher = new IndexSearcher(dir);
			
			Query query = new TermQuery(new Term("keyword", "test"));
			
			// ensure that queries return expected results without DateFilter first
			ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
			Assert.AreEqual(3, hits.Length);
			int result = 0;
			for (int i = 0; i < 3; i++)
			{
				Document doc2 = searcher.Doc(hits[i].doc);
				Field f = doc2.GetField("id");
				if (f.StringValue().Equals("id1"))
					result |= 1;
				else if (f.StringValue().Equals("id2"))
					result |= 2;
				else if (f.StringValue().Equals("id3"))
					result |= 4;
				else
					Assert.Fail("unexpected id field");
			}
			searcher.Close();
			dir.Close();
			Assert.AreEqual(7, result, "did not see all IDs");
		}
Example #47
0
        public static Lucene.Net.Search.Query ConvertQueryToLuceneQuery(Query query)
        {
            if (query == null)
            {
                throw new ArgumentNullException("query");
            }

            Lucene.Net.Search.Query lQuery;

            if (query is MatchAllDocsQuery)
            {
                var lMatchAllDocsQuery = new Lucene.Net.Search.MatchAllDocsQuery();
                lQuery = lMatchAllDocsQuery;
            }
            else if (query is TermQuery)
            {
                var termQuery = query as TermQuery;
                var term      = Term.ConvertToLuceneTerm(termQuery.Term);

                lQuery = new Lucene.Net.Search.TermQuery(term);
            }
            else if (query is TermRangeQuery)
            {
                var termRangeQuery  = query as TermRangeQuery;
                var lTermRangeQuery = new Lucene.Net.Search.TermRangeQuery(termRangeQuery.FieldName,
                                                                           termRangeQuery.LowerTerm,
                                                                           termRangeQuery.UpperTerm,
                                                                           termRangeQuery.LowerInclusive,
                                                                           termRangeQuery.UpperInclusive);

                lQuery = lTermRangeQuery;
            }
            else if (query is PhraseQuery)
            {
                var phraseQuery  = query as PhraseQuery;
                var lPhraseQuery = new Lucene.Net.Search.PhraseQuery();
                foreach (var term in phraseQuery.Terms)
                {
                    var lTerm = Term.ConvertToLuceneTerm(term);
                    lPhraseQuery.Add(lTerm);
                }

                if (phraseQuery.Slop.HasValue)
                {
                    lPhraseQuery.Slop = phraseQuery.Slop.Value;
                }

                lQuery = lPhraseQuery;
            }
            else if (query is PrefixQuery)
            {
                var prefixQuery  = query as PrefixQuery;
                var term         = Term.ConvertToLuceneTerm(prefixQuery.Term);
                var lPrefixQuery = new Lucene.Net.Search.PrefixQuery(term);

                lQuery = lPrefixQuery;
            }
            else if (query is RegexQuery)
            {
                var regexQuery  = query as RegexQuery;
                var term        = Term.ConvertToLuceneTerm(regexQuery.Term);
                var lRegexQuery = new Contrib.Regex.RegexQuery(term);

                lQuery = lRegexQuery;
            }
            else if (query is FuzzyQuery)
            {
                var fuzzyQuery  = query as FuzzyQuery;
                var term        = Term.ConvertToLuceneTerm(fuzzyQuery.Term);
                var lFuzzyQuery = new Lucene.Net.Search.FuzzyQuery(term);

                lQuery = lFuzzyQuery;
            }
            else if (query is BooleanQuery)
            {
                var booleanQuery  = query as BooleanQuery;
                var lBooleanQuery = new Lucene.Net.Search.BooleanQuery();
                foreach (var clause in booleanQuery.Clauses)
                {
                    var lNestedQuery = Query.ConvertQueryToLuceneQuery(clause.Query);
                    Lucene.Net.Search.Occur lOccur;
                    switch (clause.Occur)
                    {
                    case Occur.Must:
                        lOccur = Lucene.Net.Search.Occur.MUST;
                        break;

                    case Occur.MustNot:
                        lOccur = Lucene.Net.Search.Occur.MUST_NOT;
                        break;

                    case Occur.Should:
                        lOccur = Lucene.Net.Search.Occur.SHOULD;
                        break;

                    default:
                        throw new InvalidOperationException("Occur not implemented or defined.");
                    }

                    var lClause = new Lucene.Net.Search.BooleanClause(lNestedQuery, lOccur);
                    lBooleanQuery.Add(lClause);
                }

                if (booleanQuery.MinimumNumberShouldMatch.HasValue)
                {
                    lBooleanQuery.MinimumNumberShouldMatch = booleanQuery.MinimumNumberShouldMatch.Value;
                }

                lQuery = lBooleanQuery;
            }
            else if (query is WildcardQuery)
            {
                var wildcardQuery  = query as WildcardQuery;
                var lTerm          = Term.ConvertToLuceneTerm(wildcardQuery.Term);
                var lWildcardQuery = new Lucene.Net.Search.WildcardQuery(lTerm);

                lQuery = lWildcardQuery;
            }
            else if (query is DoubleNumericRangeQuery)
            {
                var doubleNumericRangeQuery = query as DoubleNumericRangeQuery;

                var ldoubleNumericRangeQuery = Lucene.Net.Search.NumericRangeQuery.NewDoubleRange(
                    doubleNumericRangeQuery.FieldName,
                    doubleNumericRangeQuery.Min,
                    doubleNumericRangeQuery.Max,
                    doubleNumericRangeQuery.MinInclusive,
                    doubleNumericRangeQuery.MaxInclusive);

                lQuery = ldoubleNumericRangeQuery;
            }
            else if (query is FloatNumericRangeQuery)
            {
                var floatNumericRangeQuery = query as FloatNumericRangeQuery;

                var lfloatNumericRangeQuery = Lucene.Net.Search.NumericRangeQuery.NewFloatRange(
                    floatNumericRangeQuery.FieldName,
                    floatNumericRangeQuery.Min,
                    floatNumericRangeQuery.Max,
                    floatNumericRangeQuery.MinInclusive,
                    floatNumericRangeQuery.MaxInclusive);

                lQuery = lfloatNumericRangeQuery;
            }
            else if (query is IntNumericRangeQuery)
            {
                var intNumericRangeQuery = query as IntNumericRangeQuery;

                var lintNumericRangeQuery = Lucene.Net.Search.NumericRangeQuery.NewIntRange(
                    intNumericRangeQuery.FieldName,
                    intNumericRangeQuery.Min,
                    intNumericRangeQuery.Max,
                    intNumericRangeQuery.MinInclusive,
                    intNumericRangeQuery.MaxInclusive);

                lQuery = lintNumericRangeQuery;
            }
            else if (query is LongNumericRangeQuery)
            {
                var longNumericRangeQuery = query as LongNumericRangeQuery;

                var llongNumericRangeQuery = Lucene.Net.Search.NumericRangeQuery.NewLongRange(
                    longNumericRangeQuery.FieldName,
                    longNumericRangeQuery.Min,
                    longNumericRangeQuery.Max,
                    longNumericRangeQuery.MinInclusive,
                    longNumericRangeQuery.MaxInclusive);

                lQuery = llongNumericRangeQuery;
            }
            else if (query is QueryParserQuery)
            {
                var queryParserQuery = query as QueryParserQuery;

                var queryParser = new Lucene.Net.QueryParsers.QueryParser(Version.LUCENE_30,
                                                                          queryParserQuery.DefaultField,
                                                                          new StandardAnalyzer(Version.LUCENE_30))
                {
                    AllowLeadingWildcard =
                        queryParserQuery.AllowLeadingWildcard
                };

                lQuery = queryParser.Parse(queryParserQuery.Query);
            }
            else if (query is MultiFieldQueryParserQuery)
            {
                var multiFieldQueryParserQuery = query as MultiFieldQueryParserQuery;

                if (multiFieldQueryParserQuery.FieldNames == null)
                {
                    multiFieldQueryParserQuery.FieldNames = new List <string>();
                }

                var queryParser = new Lucene.Net.QueryParsers.MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, multiFieldQueryParserQuery.FieldNames.ToArray(), new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30));

                lQuery = queryParser.Parse(multiFieldQueryParserQuery.Query);
            }
            else
            {
                throw new ArgumentException(@"Unknown or invalid query object", "query");
            }

            if (query.Boost.HasValue)
            {
                lQuery.Boost = query.Boost.Value;
            }

            return(lQuery);
        }
Example #48
0
        /// <summary> Create the More like query from a PriorityQueue</summary>
        private Query CreateQuery(PriorityQueue q)
        {
            BooleanQuery query = new BooleanQuery();
            System.Object cur;
            int qterms = 0;
            float bestScore = 0;
			
            while (((cur = q.Pop()) != null))
            {
                System.Object[] ar = (System.Object[]) cur;
                TermQuery tq = new TermQuery(new Term((System.String) ar[1], (System.String) ar[0]));
				
                if (boost)
                {
                    if (qterms == 0)
                    {
                        bestScore = (float) ((System.Single) ar[2]);
                    }
                    float myScore = (float) ((System.Single) ar[2]);
					
                    tq.SetBoost(myScore / bestScore);
                }
				
                try
                {
                    query.Add(tq, BooleanClause.Occur.SHOULD);
                }
                catch (BooleanQuery.TooManyClauses ignore)
                {
                    break;
                }
				
                qterms++;
                if (maxQueryTerms > 0 && qterms >= maxQueryTerms)
                {
                    break;
                }
            }
			
            return query;
        }
        public virtual void TestTwoFieldsTwoFormats()
        {
            Analyzer analyzer = new MockAnalyzer(Random());

            Directory directory = NewDirectory();
            // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            DocValuesFormat fast = DocValuesFormat.ForName("Lucene45");
            DocValuesFormat slow = DocValuesFormat.ForName("Lucene45");
            iwc.SetCodec(new Lucene46CodecAnonymousInnerClassHelper(this, fast, slow));
            IndexWriter iwriter = new IndexWriter(directory, iwc);
            Document doc = new Document();
            string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
            string text = "this is the text to be indexed. " + longTerm;
            doc.Add(NewTextField("fieldname", text, Field.Store.YES));
            doc.Add(new NumericDocValuesField("dv1", 5));
            doc.Add(new BinaryDocValuesField("dv2", new BytesRef("hello world")));
            iwriter.AddDocument(doc);
            iwriter.Dispose();

            // Now search the index:
            IndexReader ireader = DirectoryReader.Open(directory); // read-only=true
            IndexSearcher isearcher = NewSearcher(ireader);

            Assert.AreEqual(1, isearcher.Search(new TermQuery(new Term("fieldname", longTerm)), 1).TotalHits);
            Query query = new TermQuery(new Term("fieldname", "text"));
            TopDocs hits = isearcher.Search(query, null, 1);
            Assert.AreEqual(1, hits.TotalHits);
            BytesRef scratch = new BytesRef();
            // Iterate through the results:
            for (int i = 0; i < hits.ScoreDocs.Length; i++)
            {
                Document hitDoc = isearcher.Doc(hits.ScoreDocs[i].Doc);
                Assert.AreEqual(text, hitDoc.Get("fieldname"));
                Debug.Assert(ireader.Leaves.Count == 1);
                NumericDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetNumericDocValues("dv1");
                Assert.AreEqual(5, dv.Get(hits.ScoreDocs[i].Doc));
                BinaryDocValues dv2 = ((AtomicReader)ireader.Leaves[0].Reader).GetBinaryDocValues("dv2");
                dv2.Get(hits.ScoreDocs[i].Doc, scratch);
                Assert.AreEqual(new BytesRef("hello world"), scratch);
            }

            ireader.Dispose();
            directory.Dispose();
        }
        public virtual void TestQueryImplicitDefaultParams()
        {
            IndexSearcher searcher = NewSearcher(reader);

            // Create the base query to start with
            DrillDownQuery q = new DrillDownQuery(config);
            q.Add("a");

            // Making sure the query yields 5 documents with the facet "b" and the
            // previous (facet "a") query as a base query
            DrillDownQuery q2 = new DrillDownQuery(config, q);
            q2.Add("b");
            TopDocs docs = searcher.Search(q2, 100);
            Assert.AreEqual(5, docs.TotalHits);

            // Check that content:foo (which yields 50% results) and facet/b (which yields 20%)
            // would gather together 10 results (10%..)
            Query fooQuery = new TermQuery(new Term("content", "foo"));
            DrillDownQuery q4 = new DrillDownQuery(config, fooQuery);
            q4.Add("b");
            docs = searcher.Search(q4, 100);
            Assert.AreEqual(10, docs.TotalHits);
        }
        public virtual void TestRandomSampling()
        {
            Directory dir = NewDirectory();
            Directory taxoDir = NewDirectory();

            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);

            FacetsConfig config = new FacetsConfig();

            int numDocs = AtLeast(10000);
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new StringField("EvenOdd", (i % 2 == 0) ? "even" : "odd", Store.NO));
                doc.Add(new FacetField("iMod10", Convert.ToString(i % 10)));
                writer.AddDocument(config.Build(taxoWriter, doc));
            }
            Random random = Random();

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);
            IOUtils.Close(writer, taxoWriter);

            // Test empty results
            RandomSamplingFacetsCollector collectRandomZeroResults = new RandomSamplingFacetsCollector(numDocs / 10, random.NextLong());

            // There should be no divisions by zero
            searcher.Search(new TermQuery(new Term("EvenOdd", "NeverMatches")), collectRandomZeroResults);

            // There should be no divisions by zero and no null result
            Assert.NotNull(collectRandomZeroResults.GetMatchingDocs);

            // There should be no results at all
            foreach (MatchingDocs doc in collectRandomZeroResults.GetMatchingDocs)
            {
                Assert.AreEqual(0, doc.totalHits);
            }

            // Now start searching and retrieve results.

            // Use a query to select half of the documents.
            TermQuery query = new TermQuery(new Term("EvenOdd", "even"));

            // there will be 5 facet values (0, 2, 4, 6 and 8), as only the even (i %
            // 10) are hits.
            // there is a REAL small chance that one of the 5 values will be missed when
            // sampling.
            // but is that 0.8 (chance not to take a value) ^ 2000 * 5 (any can be
            // missing) ~ 10^-193
            // so that is probably not going to happen.
            int maxNumChildren = 5;

            RandomSamplingFacetsCollector random100Percent = new RandomSamplingFacetsCollector(numDocs, random.NextLong()); // no sampling
            RandomSamplingFacetsCollector random10Percent = new RandomSamplingFacetsCollector(numDocs / 10, random.NextLong()); // 10 % of total docs, 20% of the hits

            FacetsCollector fc = new FacetsCollector();

            searcher.Search(query, MultiCollector.Wrap(fc, random100Percent, random10Percent));

            FastTaxonomyFacetCounts random10FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random10Percent);
            FastTaxonomyFacetCounts random100FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random100Percent);
            FastTaxonomyFacetCounts exactFacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, fc);

            FacetResult random10Result = random10Percent.AmortizeFacetCounts(random10FacetCounts.GetTopChildren(10, "iMod10"), config, searcher);
            FacetResult random100Result = random100FacetCounts.GetTopChildren(10, "iMod10");
            FacetResult exactResult = exactFacetCounts.GetTopChildren(10, "iMod10");

            Assert.AreEqual(random100Result, exactResult);

            // we should have five children, but there is a small chance we have less.
            // (see above).
            Assert.True(random10Result.ChildCount <= maxNumChildren);
            // there should be one child at least.
            Assert.True(random10Result.ChildCount >= 1);

            // now calculate some statistics to determine if the sampled result is 'ok'.
            // because random sampling is used, the results will vary each time.
            int sum = 0;
            foreach (LabelAndValue lav in random10Result.LabelValues)
            {
                sum += (int)lav.value;
            }
            float mu = (float)sum / (float)maxNumChildren;

            float variance = 0;
            foreach (LabelAndValue lav in random10Result.LabelValues)
            {
                variance += (float)Math.Pow((mu - (int)lav.value), 2);
            }
            variance = variance / maxNumChildren;
            float sigma = (float)Math.Sqrt(variance);

            // we query only half the documents and have 5 categories. The average
            // number of docs in a category will thus be the total divided by 5*2
            float targetMu = numDocs / (5.0f * 2.0f);

            // the average should be in the range and the standard deviation should not
            // be too great
            Assert.True(sigma < 200);
            Assert.True(targetMu - 3 * sigma < mu && mu < targetMu + 3 * sigma);

            IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir);
        }
 public virtual void TestRandomSearchPerformance()
 {
     IndexSearcher searcher = new IndexSearcher(Reader);
     foreach (Term t in SampleTerms)
     {
         TermQuery query = new TermQuery(t);
         TopDocs topDocs = searcher.Search(query, 10);
         Assert.IsTrue(topDocs.TotalHits > 0);
     }
 }
		public virtual void  TestKeepLastNDeletionPolicyWithCreates()
		{
			
			int N = 10;
			
			for (int pass = 0; pass < 4; pass++)
			{
				
				bool autoCommit = pass < 2;
				bool useCompoundFile = (pass % 2) > 0;
				
				KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N);
				
				Directory dir = new RAMDirectory();
				IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
				writer.SetMaxBufferedDocs(10);
				writer.SetUseCompoundFile(useCompoundFile);
				writer.Close();
				Term searchTerm = new Term("content", "aaa");
				Query query = new TermQuery(searchTerm);
				
				for (int i = 0; i < N + 1; i++)
				{
					
					writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
					writer.SetMaxBufferedDocs(10);
					writer.SetUseCompoundFile(useCompoundFile);
					for (int j = 0; j < 17; j++)
					{
						AddDoc(writer);
					}
					// this is a commit when autoCommit=false:
					writer.Close();
					IndexReader reader = IndexReader.Open(dir, policy);
					reader.DeleteDocument(3);
					reader.SetNorm(5, "content", 2.0F);
					IndexSearcher searcher = new IndexSearcher(reader);
					Hits hits = searcher.Search(query);
					Assert.AreEqual(16, hits.Length());
					// this is a commit when autoCommit=false:
					reader.Close();
					searcher.Close();
					
					writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
					// This will not commit: there are no changes
					// pending because we opened for "create":
					writer.Close();
				}
				
				Assert.AreEqual(1 + 3 * (N + 1), policy.numOnInit);
				if (autoCommit)
				{
					Assert.IsTrue(policy.numOnCommit > 3 * (N + 1) - 1);
				}
				else
				{
					Assert.AreEqual(2 * (N + 1), policy.numOnCommit);
				}
				
				IndexSearcher searcher2 = new IndexSearcher(dir);
				Hits hits2 = searcher2.Search(query);
				Assert.AreEqual(0, hits2.Length());
				
				// Simplistic check: just verify only the past N segments_N's still
				// exist, and, I can open a reader on each:
				long gen = SegmentInfos.GetCurrentSegmentGeneration(dir);
				
				dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
				int expectedCount = 0;
				
				for (int i = 0; i < N + 1; i++)
				{
					try
					{
						IndexReader reader = IndexReader.Open(dir);
						
						// Work backwards in commits on what the expected
						// count should be.  Only check this in the
						// autoCommit false case:
						if (!autoCommit)
						{
							searcher2 = new IndexSearcher(reader);
							hits2 = searcher2.Search(query);
							Assert.AreEqual(expectedCount, hits2.Length());
							searcher2.Close();
							if (expectedCount == 0)
							{
								expectedCount = 16;
							}
							else if (expectedCount == 16)
							{
								expectedCount = 17;
							}
							else if (expectedCount == 17)
							{
								expectedCount = 0;
							}
						}
						reader.Close();
						if (i == N)
						{
							Assert.Fail("should have failed on commits before last " + N);
						}
					}
					catch (System.IO.IOException e)
					{
						if (i != N)
						{
							throw e;
						}
					}
					if (i < N)
					{
						dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
					}
					gen--;
				}
				
				dir.Close();
			}
		}
        public virtual void  TestDuringAddIndexes()
        {
            MockRAMDirectory dir1 = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
            writer.SetInfoStream(infoStream);
            writer.MergeFactor = 2;
            
            // create the index
            CreateIndexNoClose(false, "test", writer);
            writer.Commit();
            
            Directory[] dirs = new Directory[10];
            for (int i = 0; i < 10; i++)
            {
                dirs[i] = new MockRAMDirectory(dir1);
            }
            
            IndexReader r = writer.GetReader();
            
            int NUM_THREAD = 5;
            float SECONDS = 3;
            
            long endTime = (long) ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 1000.0 * SECONDS);
            System.Collections.IList excs = (System.Collections.IList) System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(new System.Collections.ArrayList()));
            
            ThreadClass[] threads = new ThreadClass[NUM_THREAD];
            for (int i = 0; i < NUM_THREAD; i++)
            {
                threads[i] = new AnonymousClassThread(endTime, writer, dirs, excs, this);
                threads[i].IsBackground = true;
                threads[i].Start();
            }
            
            int lastCount = 0;
            while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < endTime)
            {
                IndexReader r2 = r.Reopen();
                if (r2 != r)
                {
                    r.Close();
                    r = r2;
                }
                Query q = new TermQuery(new Term("indexname", "test"));
                int count = new IndexSearcher(r).Search(q, 10).TotalHits;
                Assert.IsTrue(count >= lastCount);
                lastCount = count;
            }
            
            for (int i = 0; i < NUM_THREAD; i++)
            {
                threads[i].Join();
            }
            
            Assert.AreEqual(0, excs.Count);
            r.Close();
            try
            {
                Assert.AreEqual(0, dir1.GetOpenDeletedFiles().Count);
            }
            catch
            {
                //DIGY: 
                //I think this is an expected behaviour.
                //There isn't any pending files to be deleted after "writer.Close()". 
                //But, since lucene.java's test case is designed that way
                //and I might be wrong, I will add a warning

                // Assert only in debug mode, so that CheckIndex is called during release.
#if DEBUG
                Assert.Inconclusive("", 0, dir1.GetOpenDeletedFiles().Count);
#endif 
            }
            writer.Close();
            
            _TestUtil.CheckIndex(dir1);
            
            dir1.Close();
        }
Example #55
0
        // Test using various international locales with accented characters (which
        // sort differently depending on locale)
        //
        // Copied (and slightly modified) from
        // Lucene.Net.Search.TestSort.testInternationalSort()
        //
        // TODO: this test is really fragile. there are already 3 different cases,
        // depending upon unicode version.
        public virtual void TestCollationKeySort(Analyzer usAnalyzer, Analyzer franceAnalyzer, Analyzer swedenAnalyzer, Analyzer denmarkAnalyzer, string usResult, string frResult, string svResult, string dkResult)
        {
            Directory indexStore = NewDirectory();
            IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)));

            // document data:
            // the tracer field is used to determine which document was hit
            string[][] sortData = new string[][] { new string[] { "A", "x", "p\u00EAche", "p\u00EAche", "p\u00EAche", "p\u00EAche" }, new string[] { "B", "y", "HAT", "HAT", "HAT", "HAT" }, new string[] { "C", "x", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9" }, new string[] { "D", "y", "HUT", "HUT", "HUT", "HUT" }, new string[] { "E", "x", "peach", "peach", "peach", "peach" }, new string[] { "F", "y", "H\u00C5T", "H\u00C5T", "H\u00C5T", "H\u00C5T" }, new string[] { "G", "x", "sin", "sin", "sin", "sin" }, new string[] { "H", "y", "H\u00D8T", "H\u00D8T", "H\u00D8T", "H\u00D8T" }, new string[] { "I", "x", "s\u00EDn", "s\u00EDn", "s\u00EDn", "s\u00EDn" }, new string[] { "J", "y", "HOT", "HOT", "HOT", "HOT" } };

            FieldType customType = new FieldType();
            customType.Stored = true;

            for (int i = 0; i < sortData.Length; ++i)
            {
                Document doc = new Document();
                doc.Add(new Field("tracer", sortData[i][0], customType));
                doc.Add(new TextField("contents", sortData[i][1], Field.Store.NO));
                if (sortData[i][2] != null)
                {
                    doc.Add(new TextField("US", usAnalyzer.TokenStream("US", new StringReader(sortData[i][2]))));
                }
                if (sortData[i][3] != null)
                {
                    doc.Add(new TextField("France", franceAnalyzer.TokenStream("France", new StringReader(sortData[i][3]))));
                }
                if (sortData[i][4] != null)
                {
                    doc.Add(new TextField("Sweden", swedenAnalyzer.TokenStream("Sweden", new StringReader(sortData[i][4]))));
                }
                if (sortData[i][5] != null)
                {
                    doc.Add(new TextField("Denmark", denmarkAnalyzer.TokenStream("Denmark", new StringReader(sortData[i][5]))));
                }
                writer.AddDocument(doc);
            }
            writer.ForceMerge(1);
            writer.Dispose();
            IndexReader reader = DirectoryReader.Open(indexStore);
            IndexSearcher searcher = new IndexSearcher(reader);

            Sort sort = new Sort();
            Query queryX = new TermQuery(new Term("contents", "x"));
            Query queryY = new TermQuery(new Term("contents", "y"));

            sort.SetSort(new SortField("US", SortField.Type_e.STRING));
            AssertMatches(searcher, queryY, sort, usResult);

            sort.SetSort(new SortField("France", SortField.Type_e.STRING));
            AssertMatches(searcher, queryX, sort, frResult);

            sort.SetSort(new SortField("Sweden", SortField.Type_e.STRING));
            AssertMatches(searcher, queryY, sort, svResult);

            sort.SetSort(new SortField("Denmark", SortField.Type_e.STRING));
            AssertMatches(searcher, queryY, sort, dkResult);
            reader.Dispose();
            indexStore.Dispose();
        }