private void CreateRandomIndexes(int maxSegments)
 {
     dir = NewDirectory();
     numDocs = AtLeast(150);
     int numTerms = TestUtil.NextInt(Random(), 1, numDocs / 5);
     ISet<string> randomTerms = new HashSet<string>();
     while (randomTerms.size() < numTerms)
     {
         randomTerms.add(TestUtil.RandomSimpleString(Random()));
     }
     terms = new List<string>(randomTerms);
     int seed = Random().Next();
     IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
     iwc.SetMergePolicy(TestSortingMergePolicy.NewSortingMergePolicy(sort));
     iw = new RandomIndexWriter(new Random(seed), dir, iwc);
     for (int i = 0; i < numDocs; ++i)
     {
         Document doc = RandomDocument();
         iw.AddDocument(doc);
         if (i == numDocs / 2 || (i != numDocs - 1 && Random().nextInt(8) == 0))
         {
             iw.Commit();
         }
         if (Random().nextInt(15) == 0)
         {
             string term = RandomInts.RandomFrom(Random(), terms);
             iw.DeleteDocuments(new Term("s", term));
         }
     }
     reader = iw.Reader;
 }
Example #2
0
 public override void SetUp()
 {
     base.SetUp();
     PayloadHelper helper = new PayloadHelper();
     Searcher_Renamed = helper.SetUp(Random(), Similarity, 1000);
     IndexReader = Searcher_Renamed.IndexReader;
 }
        public void BeforeClass()
        {
            Dir = NewDirectory();
            Sdir1 = NewDirectory();
            Sdir2 = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, new MockAnalyzer(Random()), Similarity, TimeZone);
            RandomIndexWriter swriter1 = new RandomIndexWriter(Random(), Sdir1, new MockAnalyzer(Random()), Similarity, TimeZone);
            RandomIndexWriter swriter2 = new RandomIndexWriter(Random(), Sdir2, new MockAnalyzer(Random()), Similarity, TimeZone);

            for (int i = 0; i < 10; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("data", Convert.ToString(i), Field.Store.NO));
                writer.AddDocument(doc);
                ((i % 2 == 0) ? swriter1 : swriter2).AddDocument(doc);
            }
            writer.ForceMerge(1);
            swriter1.ForceMerge(1);
            swriter2.ForceMerge(1);
            writer.Dispose();
            swriter1.Dispose();
            swriter2.Dispose();

            Reader = DirectoryReader.Open(Dir);
            Searcher = NewSearcher(Reader);

            MultiReader = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Sdir2) }, true);
            MultiSearcher = NewSearcher(MultiReader);

            MultiReaderDupls = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Dir) }, true);
            MultiSearcherDupls = NewSearcher(MultiReaderDupls);
        }
Example #4
0
        public override void SetUp()
        {
            base.SetUp();
            Dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));

            Document doc = new Document();
            Field field = NewStringField("field", "", Field.Store.NO);
            doc.Add(field);

            NumberFormatInfo df = new NumberFormatInfo();
            df.NumberDecimalDigits = 0;

            //NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT));
            for (int i = 0; i < 1000; i++)
            {
                field.StringValue = i.ToString(df);
                writer.AddDocument(doc);
            }

            Reader = writer.Reader;
            Searcher = NewSearcher(Reader);
            writer.Dispose();
            if (VERBOSE)
            {
                Console.WriteLine("TEST: setUp searcher=" + Searcher);
            }
        }
Example #5
0
        public override void SetUp()
        {
            base.SetUp();
            Dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));

            Document doc = new Document();
            FieldType customType = new FieldType(TextField.TYPE_STORED);
            customType.OmitNorms = true;
            Field field = NewField("field", "", customType);
            doc.Add(field);

            NumberFormatInfo df = new NumberFormatInfo();
            df.NumberDecimalDigits = 0;

            //NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT));
            for (int i = 0; i < 1000; i++)
            {
                field.StringValue = i.ToString(df);
                writer.AddDocument(doc);
            }

            Reader = writer.Reader;
            writer.Dispose();
            Searcher = NewSearcher(Reader);
        }
        public void BeforeClass()
        {
            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone);

            Document doc = new Document();
            Field field = NewStringField(FIELD, "meaninglessnames", Field.Store.NO);
            doc.Add(field);

            for (int i = 0; i < 5137; ++i)
            {
                writer.AddDocument(doc);
            }

            field.StringValue = "tangfulin";
            writer.AddDocument(doc);

            field.StringValue = "meaninglessnames";
            for (int i = 5138; i < 11377; ++i)
            {
                writer.AddDocument(doc);
            }

            field.StringValue = "tangfulin";
            writer.AddDocument(doc);

            Reader = writer.Reader;
            Searcher = NewSearcher(Reader);
            writer.Dispose();
        }
 public override void SetUp()
 {
     base.SetUp();
     INDEX_SIZE = AtLeast(2000);
     Index = NewDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(Random(), Index);
     RandomGen random = new RandomGen(this, Random());
     for (int i = 0; i < INDEX_SIZE; ++i) // don't decrease; if to low the
     {
         // problem doesn't show up
         Document doc = new Document();
         if ((i % 5) != 0) // some documents must not have an entry in the first
         {
             // sort field
             doc.Add(NewStringField("publicationDate_", random.LuceneDate, Field.Store.YES));
         }
         if ((i % 7) == 0) // some documents to match the query (see below)
         {
             doc.Add(NewTextField("content", "test", Field.Store.YES));
         }
         // every document has a defined 'mandant' field
         doc.Add(NewStringField("mandant", Convert.ToString(i % 3), Field.Store.YES));
         writer.AddDocument(doc);
     }
     Reader = writer.Reader;
     writer.Dispose();
     Query = new TermQuery(new Term("content", "test"));
 }
 /// <summary>
 ///     Initializes a new instance of the <see cref="SearchResults" /> class.
 /// </summary>
 /// <param name="searcher">The searcher.</param>
 /// <param name="reader">The reader.</param>
 /// <param name="docs">The hits.</param>
 /// <param name="criteria">The criteria.</param>
 /// <param name="query">The query.</param>
 public LuceneSearchResults(
     Searcher searcher, IndexReader reader, TopDocs docs, ISearchCriteria criteria, Query query)
 {
     Results = new SearchResults(criteria, null);
     CreateDocuments(searcher, docs);
     CreateFacets(reader, query);
     CreateSuggestions(reader, criteria);
 }
 public static void AfterClass()
 {
     s = null;
     r.Dispose();
     r = null;
     Index.Dispose();
     Index = null;
 }
Example #10
0
 internal MatchAllScorer(MatchAllDocsQuery outerInstance, IndexReader reader, Bits liveDocs, Weight w, float score)
     : base(w)
 {
     this.OuterInstance = outerInstance;
     this.LiveDocs = liveDocs;
     this.Score_Renamed = score;
     MaxDoc = reader.MaxDoc;
 }
 public static void AfterClass()
 {
     Searcher = null;
     Reader.Dispose();
     Reader = null;
     Directory.Dispose();
     Directory = null;
 }
Example #12
0
        public static void BeforeClass()
        {
            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
            for (int i = 0; i < DocFields.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField(field, DocFields[i], Field.Store.NO));
                writer.AddDocument(doc);
            }
            writer.Dispose();
            LittleReader = DirectoryReader.Open(Directory);
            Searcher = NewSearcher(LittleReader);
            // this is intentionally using the baseline sim, because it compares against bigSearcher (which uses a random one)
            Searcher.Similarity = new DefaultSimilarity();

            // Make big index
            Dir2 = new MockDirectoryWrapper(Random(), new RAMDirectory(Directory, IOContext.DEFAULT));

            // First multiply small test index:
            MulFactor = 1;
            int docCount = 0;
            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: now copy index...");
            }
            do
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: cycle...");
                }
                Directory copy = new MockDirectoryWrapper(Random(), new RAMDirectory(Dir2, IOContext.DEFAULT));
                RandomIndexWriter w = new RandomIndexWriter(Random(), Dir2);
                w.AddIndexes(copy);
                docCount = w.MaxDoc();
                w.Dispose();
                MulFactor *= 2;
            } while (docCount < 3000);

            RandomIndexWriter riw = new RandomIndexWriter(Random(), Dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));
            Document doc_ = new Document();
            doc_.Add(NewTextField("field2", "xxx", Field.Store.NO));
            for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++)
            {
                riw.AddDocument(doc_);
            }
            doc_ = new Document();
            doc_.Add(NewTextField("field2", "big bad bug", Field.Store.NO));
            for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++)
            {
                riw.AddDocument(doc_);
            }
            Reader = riw.Reader;
            BigSearcher = NewSearcher(Reader);
            riw.Dispose();
        }
 public static void AfterClassDrillDownQueryTest()
 {
     IOUtils.Close(reader, taxo, dir, taxoDir);
     reader = null;
     taxo = null;
     dir = null;
     taxoDir = null;
     config = null;
 }
Example #14
0
 // TODO: this should be setUp()....
 public virtual void CreateDummySearcher()
 {
     // Create a dummy index with nothing in it.
     // this could possibly fail if Lucene starts checking for docid ranges...
     d = NewDirectory();
     IndexWriter iw = new IndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
     iw.AddDocument(new Document());
     iw.Dispose();
     r = DirectoryReader.Open(d);
     s = NewSearcher(r);
 }
 public override void SetUp()
 {
     base.SetUp();
     Directory = NewDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone);
     Document doc = new Document();
     doc.Add(NewTextField(FN, "the quick brown fox jumps over the lazy ??? dog 493432 49344", Field.Store.NO));
     writer.AddDocument(doc);
     Reader = writer.Reader;
     writer.Dispose();
     Searcher = NewSearcher(Reader);
 }
 public override void SetUp()
 {
     base.SetUp();
     // create test index
     MDirectory = NewDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(Random(), MDirectory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)).SetMergePolicy(NewLogMergePolicy()).SetSimilarity(new DefaultSimilarity()));
     AddDocument(writer, "1", "I think it should work.");
     AddDocument(writer, "2", "I think it should work.");
     AddDocument(writer, "3", "I think it should work.");
     AddDocument(writer, "4", "I think it should work.");
     Reader = writer.Reader;
     writer.Dispose();
     Searcher = NewSearcher(Reader);
     Searcher.Similarity = new DefaultSimilarity();
 }
 public TaxonomyIndexArrays(IndexReader reader)
 {
     parents = new int[reader.MaxDoc];
     if (parents.Length > 0)
     {
         InitParents(reader, 0);
         // Starting Lucene 2.9, following the change LUCENE-1542, we can
         // no longer reliably read the parent "-1" (see comment in
         // LuceneTaxonomyWriter.SinglePositionTokenStream). We have no way
         // to fix this in indexing without breaking backward-compatibility
         // with existing indexes, so what we'll do instead is just
         // hard-code the parent of ordinal 0 to be -1, and assume (as is
         // indeed the case) that no other parent can be -1.
         parents[0] = TaxonomyReader.INVALID_ORDINAL;
     }
 }
        public void BeforeClass()
        {
            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));

            writer.AddDocument(Doc(new Field[] { GetField("id", "0"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "jones") }));

            writer.AddDocument(Doc(new Field[] { GetField("id", "1"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "smith"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "jones") }));

            writer.AddDocument(Doc(new Field[] { GetField("id", "2"), GetField("gender", "female"), GetField("first", "greta"), GetField("last", "jones"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "smith"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "jones") }));

            writer.AddDocument(Doc(new Field[] { GetField("id", "3"), GetField("gender", "female"), GetField("first", "lisa"), GetField("last", "jones"), GetField("gender", "male"), GetField("first", "bob"), GetField("last", "costas") }));

            writer.AddDocument(Doc(new Field[] { GetField("id", "4"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "smith"), GetField("gender", "female"), GetField("first", "linda"), GetField("last", "dixit"), GetField("gender", "male"), GetField("first", "bubba"), GetField("last", "jones") }));
            Reader = writer.Reader;
            writer.Dispose();
            Searcher = NewSearcher(Reader);
        }
        public TaxonomyIndexArrays(IndexReader reader, TaxonomyIndexArrays copyFrom)
        {
            Debug.Assert(copyFrom != null);

            // note that copyParents.length may be equal to reader.maxDoc(). this is not a bug
            // it may be caused if e.g. the taxonomy segments were merged, and so an updated
            // NRT reader was obtained, even though nothing was changed. this is not very likely
            // to happen.
            int[] copyParents = copyFrom.Parents;
            this.parents = new int[reader.MaxDoc];
            Array.Copy(copyParents, 0, parents, 0, copyParents.Length);
            InitParents(reader, copyParents.Length);

            if (copyFrom.initializedChildren)
            {
                InitChildrenSiblings(copyFrom);
            }
        }
        public override void SetUp()
        {
            base.SetUp();
            Directory = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone);
            Document doc = new Document();
            Field field = NewTextField("field", "", Field.Store.NO);
            doc.Add(field);

            field.StringValue = "quick brown fox";
            iw.AddDocument(doc);
            field.StringValue = "jumps over lazy broun dog";
            iw.AddDocument(doc);
            field.StringValue = "jumps over extremely very lazy broxn dog";
            iw.AddDocument(doc);
            Reader = iw.Reader;
            iw.Dispose();
            Searcher = NewSearcher(Reader);
        }
 public override void SetUp()
 {
     base.SetUp();
     Directory = NewDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone);
     Document doc = new Document();
     Field titleField = NewTextField("title", "some title", Field.Store.NO);
     Field field = NewTextField(FN, "", Field.Store.NO);
     Field footerField = NewTextField("footer", "a footer", Field.Store.NO);
     doc.Add(titleField);
     doc.Add(field);
     doc.Add(footerField);
     field.StringValue = "\uD866\uDF05abcdef";
     writer.AddDocument(doc);
     field.StringValue = "\uD866\uDF06ghijkl";
     writer.AddDocument(doc);
     // this sorts before the previous two in UTF-8/UTF-32, but after in UTF-16!!!
     field.StringValue = "\uFB94mnopqr";
     writer.AddDocument(doc);
     field.StringValue = "\uFB95stuvwx"; // this one too.
     writer.AddDocument(doc);
     field.StringValue = "a\uFFFCbc";
     writer.AddDocument(doc);
     field.StringValue = "a\uFFFDbc";
     writer.AddDocument(doc);
     field.StringValue = "a\uFFFEbc";
     writer.AddDocument(doc);
     field.StringValue = "a\uFB94bc";
     writer.AddDocument(doc);
     field.StringValue = "bacadaba";
     writer.AddDocument(doc);
     field.StringValue = "\uFFFD";
     writer.AddDocument(doc);
     field.StringValue = "\uFFFD\uD866\uDF05";
     writer.AddDocument(doc);
     field.StringValue = "\uFFFD\uFFFD";
     writer.AddDocument(doc);
     Reader = writer.Reader;
     Searcher = NewSearcher(Reader);
     writer.Dispose();
 }
Example #22
0
 public override Query Rewrite(IndexReader reader)
 {
     Query newQuery = baseQuery;
     while (true)
     {
         Query rewrittenQuery = newQuery.Rewrite(reader);
         if (rewrittenQuery == newQuery)
         {
             break;
         }
         newQuery = rewrittenQuery;
     }
     if (newQuery == baseQuery)
     {
         return this;
     }
     else
     {
         return new DrillSidewaysQuery(newQuery, drillDownCollector, drillSidewaysCollectors, drillDownQueries, scoreSubDocsAtOnce);
     }
 }
 public override void SetUp()
 {
     base.SetUp();
     Directory = NewDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone);
     Document doc = new Document();
     Field titleField = NewTextField("title", "some title", Field.Store.NO);
     Field field = NewTextField(FN, "this is document one 2345", Field.Store.NO);
     Field footerField = NewTextField("footer", "a footer", Field.Store.NO);
     doc.Add(titleField);
     doc.Add(field);
     doc.Add(footerField);
     writer.AddDocument(doc);
     field.StringValue = "some text from doc two a short piece 5678.91";
     writer.AddDocument(doc);
     field.StringValue = "doc three has some different stuff" + " with numbers 1234 5678.9 and letter b";
     writer.AddDocument(doc);
     Reader = writer.Reader;
     Searcher = NewSearcher(Reader);
     writer.Dispose();
 }
Example #24
0
        public override void SetUp()
        {
            base.SetUp();
            Dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));

            Document doc = new Document();
            Field field = NewStringField("field", "", Field.Store.NO);
            doc.Add(field);

            // we generate aweful prefixes: good for testing.
            // but for preflex codec, the test can be very slow, so use less iterations.
            string codec = Codec.Default.Name;
            int num = codec.Equals("Lucene3x") ? 200 * RANDOM_MULTIPLIER : AtLeast(1000);
            for (int i = 0; i < num; i++)
            {
                field.StringValue = TestUtil.RandomUnicodeString(Random(), 10);
                writer.AddDocument(doc);
            }
            Reader = writer.Reader;
            Searcher = NewSearcher(Reader);
            writer.Dispose();
        }
Example #25
0
        public override void SetUp()
        {
            base.SetUp();
            // Create an index writer.
            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone);

            // oldest doc:
            // Add the first document.  text = "Document 1"  dateTime = Oct 10 03:25:22 EDT 2007
            writer.AddDocument(CreateDocument("Document 1", 1192001122000L));
            // Add the second document.  text = "Document 2"  dateTime = Oct 10 03:25:26 EDT 2007
            writer.AddDocument(CreateDocument("Document 2", 1192001126000L));
            // Add the third document.  text = "Document 3"  dateTime = Oct 11 07:12:13 EDT 2007
            writer.AddDocument(CreateDocument("Document 3", 1192101133000L));
            // Add the fourth document.  text = "Document 4"  dateTime = Oct 11 08:02:09 EDT 2007
            writer.AddDocument(CreateDocument("Document 4", 1192104129000L));
            // latest doc:
            // Add the fifth document.  text = "Document 5"  dateTime = Oct 12 13:25:43 EDT 2007
            writer.AddDocument(CreateDocument("Document 5", 1192209943000L));

            Reader = writer.Reader;
            writer.Dispose();
        }
        public void BeforeClass()
        {
            string[] data = new string[] { "A 1 2 3 4 5 6", "Z       4 5 6", null, "B   2   4 5 6", "Y     3   5 6", null, "C     3     6", "X       4 5 6" };

            Small = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Small, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)).SetMergePolicy(NewLogMergePolicy()));

            FieldType customType = new FieldType(TextField.TYPE_STORED);
            customType.Tokenized = false;
            for (int i = 0; i < data.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewField("id", Convert.ToString(i), customType)); // Field.Keyword("id",String.valueOf(i)));
                doc.Add(NewField("all", "all", customType)); // Field.Keyword("all","all"));
                if (null != data[i])
                {
                    doc.Add(NewTextField("data", data[i], Field.Store.YES)); // Field.Text("data",data[i]));
                }
                writer.AddDocument(doc);
            }

            Reader = writer.Reader;
            writer.Dispose();
        }
        public void BeforeClass()
        {
            string[] data = new string[] { "A 1 2 3 4 5 6", "Z       4 5 6", null, "B   2   4 5 6", "Y     3   5 6", null, "C     3     6", "X       4 5 6" };

            Index = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), Index, Similarity, TimeZone);

            for (int i = 0; i < data.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("id", Convert.ToString(i), Field.Store.YES)); //Field.Keyword("id",String.valueOf(i)));
                doc.Add(NewStringField("all", "all", Field.Store.YES)); //Field.Keyword("all","all"));
                if (null != data[i])
                {
                    doc.Add(NewTextField("data", data[i], Field.Store.YES)); //Field.Text("data",data[i]));
                }
                w.AddDocument(doc);
            }

            r = w.Reader;
            s = NewSearcher(r);
            w.Dispose();
            //System.out.println("Set up " + getName());
        }
Example #28
0
 public override void SetNextReader(IndexReader reader, int docBase)
 {
     currentReaderValues = FieldCache_Fields.DEFAULT.GetLongs(reader, field);
 }
Example #29
0
 public ShardIndexSearcher(ShardSearchingTestBase.NodeState nodeState, long[] nodeVersions, IndexReader localReader, int nodeID)
     : base(localReader)
 {
     this.outerInstance = nodeState;
     this.nodeVersions  = nodeVersions;
     MyNodeID           = nodeID;
     Debug.Assert(MyNodeID == nodeState.MyNodeID, "myNodeID=" + nodeID + " NodeState.this.myNodeID=" + nodeState.MyNodeID);
 }
        public void BeforeClass()
        {
            // NOTE: turn off compound file, this test will open some index files directly.
            OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true;
            IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetUseCompoundFile(false);

            TermIndexInterval = config.TermIndexInterval;
            IndexDivisor = TestUtil.NextInt(Random(), 1, 10);
            NUMBER_OF_DOCUMENTS = AtLeast(100);
            NUMBER_OF_FIELDS = AtLeast(Math.Max(10, 3 * TermIndexInterval * IndexDivisor / NUMBER_OF_DOCUMENTS));

            Directory = NewDirectory();

            config.SetCodec(new PreFlexRWCodec(OLD_FORMAT_IMPERSONATION_IS_ACTIVE));
            LogMergePolicy mp = NewLogMergePolicy();
            // NOTE: turn off compound file, this test will open some index files directly.
            mp.NoCFSRatio = 0.0;
            config.SetMergePolicy(mp);

            Populate(Directory, config);

            DirectoryReader r0 = IndexReader.Open(Directory);
            SegmentReader r = LuceneTestCase.GetOnlySegmentReader(r0);
            string segment = r.SegmentName;
            r.Dispose();

            FieldInfosReader infosReader = (new PreFlexRWCodec(OLD_FORMAT_IMPERSONATION_IS_ACTIVE)).FieldInfosFormat().FieldInfosReader;
            FieldInfos fieldInfos = infosReader.Read(Directory, segment, "", IOContext.READONCE);
            string segmentFileName = IndexFileNames.SegmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION);
            long tiiFileLength = Directory.FileLength(segmentFileName);
            IndexInput input = Directory.OpenInput(segmentFileName, NewIOContext(Random()));
            TermEnum = new SegmentTermEnum(Directory.OpenInput(IndexFileNames.SegmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_EXTENSION), NewIOContext(Random())), fieldInfos, false);
            int totalIndexInterval = TermEnum.IndexInterval * IndexDivisor;

            SegmentTermEnum indexEnum = new SegmentTermEnum(input, fieldInfos, true);
            Index = new TermInfosReaderIndex(indexEnum, IndexDivisor, tiiFileLength, totalIndexInterval);
            indexEnum.Dispose();
            input.Dispose();

            Reader = IndexReader.Open(Directory);
            SampleTerms = Sample(Random(), Reader, 1000);
        }
Example #31
0
 public LuceneDictionary(IndexReader reader, System.String field)
 {
     this.reader = reader;
     this.field  = field;
 }
        public void TestWhichMTQMatched()
        {
            Directory dir = NewDirectory();
            // use simpleanalyzer for more natural tokenization (else "test." is a token)
            Analyzer          analyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true);
            IndexWriterConfig iwc      = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            FieldType offsetsType = new FieldType(TextField.TYPE_STORED);

            offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
            Field    body = new Field("body", "", offsetsType);
            Document doc  = new Document();

            doc.Add(body);

            body.SetStringValue("Test a one sentence document.");
            iw.AddDocument(doc);

            IndexReader ir = iw.Reader;

            iw.Dispose();

            IndexSearcher       searcher    = NewSearcher(ir);
            PostingsHighlighter highlighter = new PostingsHighlighterAnalyzerHelper(analyzer);
            //PostingsHighlighter highlighter = new PostingsHighlighter() {
            //      @Override
            //      protected Analyzer getIndexAnalyzer(String field)
            //{
            //    return analyzer;
            //}
            //    };
            BooleanQuery query = new BooleanQuery();

            query.Add(new WildcardQuery(new Term("body", "te*")), Occur.SHOULD);
            query.Add(new WildcardQuery(new Term("body", "one")), Occur.SHOULD);
            query.Add(new WildcardQuery(new Term("body", "se*")), Occur.SHOULD);
            TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);

            assertEquals(1, topDocs.TotalHits);
            String[] snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(1, snippets.Length);

            // Default formatter just bolds each hit:
            assertEquals("<b>Test</b> a <b>one</b> <b>sentence</b> document.", snippets[0]);

            // Now use our own formatter, that also stuffs the
            // matching term's text into the result:
            highlighter = new PostingsHighlighterAnalyzerAndFormatterHelper(analyzer, new PassageFormatterHelper());

            //highlighter = new PostingsHighlighter()
            //{
            //    @Override
            //      protected Analyzer getIndexAnalyzer(String field)
            //{
            //    return analyzer;
            //}

            //@Override
            //      protected PassageFormatter getFormatter(String field)
            //{
            //    return new PassageFormatter() {

            //          @Override
            //          public Object format(Passage passages[], String content)
            //{
            //    // Copied from DefaultPassageFormatter, but
            //    // tweaked to include the matched term:
            //    StringBuilder sb = new StringBuilder();
            //    int pos = 0;
            //    for (Passage passage : passages)
            //    {
            //        // don't add ellipsis if its the first one, or if its connected.
            //        if (passage.startOffset > pos && pos > 0)
            //        {
            //            sb.append("... ");
            //        }
            //        pos = passage.startOffset;
            //        for (int i = 0; i < passage.numMatches; i++)
            //        {
            //            int start = passage.matchStarts[i];
            //            int end = passage.matchEnds[i];
            //            // its possible to have overlapping terms
            //            if (start > pos)
            //            {
            //                sb.append(content, pos, start);
            //            }
            //            if (end > pos)
            //            {
            //                sb.append("<b>");
            //                sb.append(content, Math.max(pos, start), end);
            //                sb.append('(');
            //                sb.append(passage.getMatchTerms()[i].utf8ToString());
            //                sb.append(')');
            //                sb.append("</b>");
            //                pos = end;
            //            }
            //        }
            //        // its possible a "term" from the analyzer could span a sentence boundary.
            //        sb.append(content, pos, Math.max(pos, passage.endOffset));
            //        pos = passage.endOffset;
            //    }
            //    return sb.toString();
            //}
            //        };
            //      }
            //    };


            assertEquals(1, topDocs.TotalHits);
            snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(1, snippets.Length);

            // Default formatter bolds each hit:
            assertEquals("<b>Test(body:te*)</b> a <b>one(body:one)</b> <b>sentence(body:se*)</b> document.", snippets[0]);

            ir.Dispose();
            dir.Dispose();
        }
Example #33
0
 public SearcherFactoryAnonymousInnerClassHelper3(TestSearcherManager outerInstance, IndexReader other)
 {
     this.OuterInstance = outerInstance;
     this.Other         = other;
 }
Example #34
0
		private static IndexSearcher GetIndexSearcher(IndexReader reader)
		{
			return new IndexSearcher(reader);
		}
Example #35
0
        public override void BeforeClass()
        {
            base.BeforeClass();

            NUM_DOCS  = AtLeast(500);
            NUM_ORDS  = AtLeast(2);
            directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random, directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy()));
            long   theLong           = long.MaxValue;
            double theDouble         = double.MaxValue;
            sbyte  theByte           = sbyte.MaxValue;
            short  theShort          = short.MaxValue;
            int    theInt            = int.MaxValue;
            float  theFloat          = float.MaxValue;

            unicodeStrings = new string[NUM_DOCS];
            //MultiValued = new BytesRef[NUM_DOCS, NUM_ORDS];
            multiValued = RectangularArrays.ReturnRectangularArray <BytesRef>(NUM_DOCS, NUM_ORDS);
            if (Verbose)
            {
                Console.WriteLine("TEST: setUp");
            }
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("theLong", (theLong--).ToString(CultureInfo.InvariantCulture), Field.Store.NO));
                doc.Add(NewStringField("theDouble", (theDouble--).ToString("R", CultureInfo.InvariantCulture), Field.Store.NO));
                doc.Add(NewStringField("theByte", (theByte--).ToString(CultureInfo.InvariantCulture), Field.Store.NO));
                doc.Add(NewStringField("theShort", (theShort--).ToString(CultureInfo.InvariantCulture), Field.Store.NO));
                doc.Add(NewStringField("theInt", (theInt--).ToString(CultureInfo.InvariantCulture), Field.Store.NO));
                doc.Add(NewStringField("theFloat", (theFloat--).ToString("R", CultureInfo.InvariantCulture), Field.Store.NO));
                if (i % 2 == 0)
                {
                    doc.Add(NewStringField("sparse", (i).ToString(CultureInfo.InvariantCulture), Field.Store.NO));
                }

                if (i % 2 == 0)
                {
                    doc.Add(new Int32Field("numInt", i, Field.Store.NO));
                }

                // sometimes skip the field:
                if (Random.Next(40) != 17)
                {
                    unicodeStrings[i] = GenerateString(i);
                    doc.Add(NewStringField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES));
                }

                // sometimes skip the field:
                if (Random.Next(10) != 8)
                {
                    for (int j = 0; j < NUM_ORDS; j++)
                    {
                        string newValue = GenerateString(i);
                        multiValued[i][j] = new BytesRef(newValue);
                        doc.Add(NewStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES));
                    }
                    Array.Sort(multiValued[i]);
                }
                writer.AddDocument(doc);
            }
            IndexReader r = writer.GetReader();

            reader = SlowCompositeReaderWrapper.Wrap(r);
            writer.Dispose();
        }
Example #36
0
        private SuggestionQueryResult QueryOverMultipleWords(SuggestionQuery suggestionQuery, IndexReader indexReader,
                                                             string queryText)
        {
            var individualTerms = queryText.Split(new[] { ' ', '\t', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);

            var result         = new HashSet <string>();
            var maxSuggestions = suggestionQuery.MaxSuggestions;

            foreach (var term in individualTerms)
            {
                if (maxSuggestions <= 0)
                {
                    break;
                }

                foreach (var suggestion in spellChecker.SuggestSimilar(term,
                                                                       suggestionQuery.MaxSuggestions,                                                  // we can filter out duplicates, so taking more
                                                                       indexReader,
                                                                       suggestionQuery.Field,
                                                                       suggestionQuery.Popularity))
                {
                    if (result.Add(suggestion) == false)
                    {
                        continue;
                    }

                    maxSuggestions--;
                    if (maxSuggestions <= 0)
                    {
                        break;
                    }
                }
            }

            return(new SuggestionQueryResult
            {
                Suggestions = result.ToArray()
            });
        }
        public virtual void TestWickedLongTerm()
        {
            using (RAMDirectory dir = new RAMDirectory())
            {
                char[] chars = new char[IndexWriter.MAX_TERM_LENGTH];
                Arrays.Fill(chars, 'x');

                string   bigTerm = new string(chars);
                Document doc     = new Document();

                using (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT))))
                {
                    // This produces a too-long term:
                    string contents = "abc xyz x" + bigTerm + " another term";
                    doc.Add(new TextField("content", contents, Field.Store.NO));
                    writer.AddDocument(doc);

                    // Make sure we can add another normal document
                    doc = new Document();
                    doc.Add(new TextField("content", "abc bbb ccc", Field.Store.NO));
                    writer.AddDocument(doc);
                }
#pragma warning disable 612, 618
                using (IndexReader reader = IndexReader.Open(dir))
#pragma warning restore 612, 618
                {
                    // Make sure all terms < max size were indexed
                    assertEquals(2, reader.DocFreq(new Term("content", "abc")));
                    assertEquals(1, reader.DocFreq(new Term("content", "bbb")));
                    assertEquals(1, reader.DocFreq(new Term("content", "term")));
                    assertEquals(1, reader.DocFreq(new Term("content", "another")));

                    // Make sure position is still incremented when
                    // massive term is skipped:
                    DocsAndPositionsEnum tps = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "content", new BytesRef("another"));
                    assertTrue(tps.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                    assertEquals(1, tps.Freq);
                    assertEquals(3, tps.NextPosition());

                    // Make sure the doc that has the massive term is in
                    // the index:
                    assertEquals("document with wicked long term should is not in the index!", 2, reader.NumDocs);
                }

                // Make sure we can add a document with exactly the
                // maximum length term, and search on that term:
                doc = new Document();
                doc.Add(new TextField("content", bigTerm, Field.Store.NO));
                ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
                sa.MaxTokenLength = 100000;
                using (var writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa)))
                {
                    writer.AddDocument(doc);
                }
#pragma warning disable 612, 618
                using (var reader = IndexReader.Open(dir))
#pragma warning restore 612, 618
                {
                    assertEquals(1, reader.DocFreq(new Term("content", bigTerm)));
                }
            }
        }
Example #38
0
        /// <summary>
        /// Expert: highlights the top-N passages from multiple fields,
        /// for the provided int[] docids, to custom object as
        /// returned by the <see cref="PassageFormatter"/>.  Use
        /// this API to render to something other than <see cref="string"/>.
        /// </summary>
        /// <param name="fieldsIn">field names to highlight. Must have a stored string value and also be indexed with offsets.</param>
        /// <param name="query">query to highlight.</param>
        /// <param name="searcher">searcher that was previously used to execute the query.</param>
        /// <param name="docidsIn">containing the document IDs to highlight.</param>
        /// <param name="maxPassagesIn">The maximum number of top-N ranked passages per-field used to form the highlighted snippets.</param>
        /// <returns>
        /// <see cref="T:IDictionary{string, object[]}"/> keyed on field name, containing the array of formatted snippets
        /// corresponding to the documents in <paramref name="docidsIn"/>.
        /// If no highlights were found for a document, the
        /// first <paramref name="maxPassagesIn"/> from the field will
        /// be returned.
        /// </returns>
        /// <exception cref="IOException">if an I/O error occurred during processing</exception>
        /// <exception cref="ArgumentException">if <c>field</c> was indexed without <see cref="IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/></exception>
        protected internal virtual IDictionary <string, object[]> HighlightFieldsAsObjects(string[] fieldsIn, Query query, IndexSearcher searcher, int[] docidsIn, int[] maxPassagesIn)
        {
            if (fieldsIn.Length < 1)
            {
                throw new ArgumentException("fieldsIn must not be empty");
            }
            if (fieldsIn.Length != maxPassagesIn.Length)
            {
                throw new ArgumentException("invalid number of maxPassagesIn");
            }
            IndexReader reader    = searcher.IndexReader;
            Query       rewritten = Rewrite(query);

            JCG.SortedSet <Term> queryTerms = new JCG.SortedSet <Term>();
            rewritten.ExtractTerms(queryTerms);

            IndexReaderContext          readerContext = reader.Context;
            IList <AtomicReaderContext> leaves        = readerContext.Leaves;

            // Make our own copies because we sort in-place:
            int[] docids = new int[docidsIn.Length];
            System.Array.Copy(docidsIn, 0, docids, 0, docidsIn.Length);
            string[] fields = new string[fieldsIn.Length];
            System.Array.Copy(fieldsIn, 0, fields, 0, fieldsIn.Length);
            int[] maxPassages = new int[maxPassagesIn.Length];
            System.Array.Copy(maxPassagesIn, 0, maxPassages, 0, maxPassagesIn.Length);

            // sort for sequential io
            ArrayUtil.TimSort(docids);
            new InPlaceMergeSorterAnonymousHelper(fields, maxPassages).Sort(0, fields.Length);

            // pull stored data:
            IList <string[]> contents = LoadFieldValues(searcher, fields, docids, maxLength);

            IDictionary <string, object[]> highlights = new Dictionary <string, object[]>();

            for (int i = 0; i < fields.Length; i++)
            {
                string field       = fields[i];
                int    numPassages = maxPassages[i];
                Term   floor       = new Term(field, "");
                Term   ceiling     = new Term(field, UnicodeUtil.BIG_TERM);

                // LUCENENET: Call custom GetViewBetween overload to mimic Java's exclusive upper bound behavior.
                var fieldTerms = queryTerms.GetViewBetween(floor, lowerValueInclusive: true, ceiling, upperValueInclusive: false);

                // TODO: should we have some reasonable defaults for term pruning? (e.g. stopwords)

                // Strip off the redundant field:
                BytesRef[] terms    = new BytesRef[fieldTerms.Count];
                int        termUpto = 0;
                foreach (Term term in fieldTerms)
                {
                    terms[termUpto++] = term.Bytes;
                }
                IDictionary <int, object> fieldHighlights = HighlightField(field, contents[i], GetBreakIterator(field), terms, docids, leaves, numPassages, query);

                object[] result = new object[docids.Length];
                for (int j = 0; j < docidsIn.Length; j++)
                {
                    fieldHighlights.TryGetValue(docidsIn[j], out result[j]);
                }
                highlights[field] = result;
            }
            return(highlights);
        }
Example #39
0
 public ExplanationAssertingSearcher(IndexReader r)
     : base(r)
 {
 }
Example #40
0
        public static TermInfo[] GetHighFreqTerms(Directory dir,
                                                  Hashtable junkWords,
                                                  int numTerms,
                                                  String[] fields)
        {
            if (dir == null || fields == null)
            {
                return(new TermInfo[0]);
            }

            IndexReader   reader = IndexReader.Open(dir, true);
            TermInfoQueue tiq    = new TermInfoQueue(numTerms);
            TermEnum      terms  = reader.Terms();

            int minFreq = 0;

            while (terms.Next())
            {
                String field = terms.Term().Field();

                if (fields != null && fields.Length > 0)
                {
                    bool skip = true;

                    for (int i = 0; i < fields.Length; i++)
                    {
                        if (field.Equals(fields[i]))
                        {
                            skip = false;
                            break;
                        }
                    }
                    if (skip)
                    {
                        continue;
                    }
                }

                if (junkWords != null && junkWords[terms.Term().Text()] != null)
                {
                    continue;
                }

                if (terms.DocFreq() > minFreq)
                {
                    TermInfo top = (TermInfo)tiq.Add(new TermInfo(terms.Term(), terms.DocFreq()));
                    if (tiq.Size() >= numTerms)                        // if tiq overfull
                    {
                        tiq.Pop();                                     // remove lowest in tiq
                        minFreq = top.DocFreq;                         // reset minFreq
                    }
                }
            }

            TermInfo[] res = new TermInfo[tiq.Size()];

            for (int i = 0; i < res.Length; i++)
            {
                res[res.Length - i - 1] = (TermInfo)tiq.Pop();
            }

            reader.Close();

            return(res);
        }
Example #41
0
        public virtual void TestPerFieldCodec()
        {
            int NUM_DOCS = AtLeast(173);

            if (Verbose)
            {
                Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS);
            }

            using BaseDirectoryWrapper dir = NewDirectory();
            dir.CheckIndexOnDispose        = false; // we use a custom codec provider
            using IndexWriter w            = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetCodec(new CustomPerFieldCodec()).SetMergePolicy(NewLogMergePolicy(3)));
            Documents.Document doc = new Documents.Document();
            // uses default codec:
            doc.Add(NewTextField("field1", "this field uses the standard codec as the test", Field.Store.NO));
            // uses pulsing codec:
            Field field2 = NewTextField("field2", "this field uses the pulsing codec as the test", Field.Store.NO);

            doc.Add(field2);

            Field idField = NewStringField("id", "", Field.Store.NO);

            doc.Add(idField);
            for (int i = 0; i < NUM_DOCS; i++)
            {
                idField.SetStringValue("" + i);
                w.AddDocument(doc);
                if ((i + 1) % 10 == 0)
                {
                    w.Commit();
                }
            }
            if (Verbose)
            {
                Console.WriteLine("TEST: now delete id=77");
            }
            w.DeleteDocuments(new Term("id", "77"));

            using (IndexReader r = DirectoryReader.Open(w, true))
            {
                Assert.AreEqual(NUM_DOCS - 1, r.NumDocs);
                IndexSearcher s = NewSearcher(r);
                Assert.AreEqual(NUM_DOCS - 1, s.Search(new TermQuery(new Term("field1", "standard")), 1).TotalHits);
                Assert.AreEqual(NUM_DOCS - 1, s.Search(new TermQuery(new Term("field2", "pulsing")), 1).TotalHits);
            }

            if (Verbose)
            {
                Console.WriteLine("\nTEST: now delete 2nd doc");
            }
            w.DeleteDocuments(new Term("id", "44"));

            if (Verbose)
            {
                Console.WriteLine("\nTEST: now force merge");
            }
            w.ForceMerge(1);
            if (Verbose)
            {
                Console.WriteLine("\nTEST: now open reader");
            }
            using (IndexReader r = DirectoryReader.Open(w, true))
            {
                Assert.AreEqual(NUM_DOCS - 2, r.MaxDoc);
                Assert.AreEqual(NUM_DOCS - 2, r.NumDocs);
                IndexSearcher s = NewSearcher(r);
                Assert.AreEqual(NUM_DOCS - 2, s.Search(new TermQuery(new Term("field1", "standard")), 1).TotalHits);
                Assert.AreEqual(NUM_DOCS - 2, s.Search(new TermQuery(new Term("field2", "pulsing")), 1).TotalHits);
                Assert.AreEqual(1, s.Search(new TermQuery(new Term("id", "76")), 1).TotalHits);
                Assert.AreEqual(0, s.Search(new TermQuery(new Term("id", "77")), 1).TotalHits);
                Assert.AreEqual(0, s.Search(new TermQuery(new Term("id", "44")), 1).TotalHits);

                if (Verbose)
                {
                    Console.WriteLine("\nTEST: now close NRT reader");
                }
            }
        }
 public CustomSearcher(TestCustomSearcherSort outerInstance, IndexReader r, int switcher)
     : base(r)
 {
     this.OuterInstance = outerInstance;
     this.Switcher      = switcher;
 }
Example #43
0
		private IndexReader GetIndexReader()
		{
			return IndexReader.Open(IndexDirectory, true);
		}
Example #44
0
        public virtual void TestTransitionAPI()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Documents.Document doc = new Documents.Document();
            doc.Add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED));
            doc.Add(new Field("tokenized_reader", new StringReader("abc xyz")));
            doc.Add(new Field("tokenized_tokenstream", w.w.Analyzer.TokenStream("tokenized_tokenstream", new StringReader("abc xyz"))));
            doc.Add(new Field("binary", new byte[10]));
            doc.Add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
            doc.Add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
            doc.Add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS));
            doc.Add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
            w.AddDocument(doc);
            IndexReader r = w.Reader;

            w.Dispose();

            doc = r.Document(0);
            // 4 stored fields
            Assert.AreEqual(4, doc.Fields.Count);
            Assert.AreEqual("abc", doc.Get("stored"));
            Assert.AreEqual("abc xyz", doc.Get("stored_indexed"));
            Assert.AreEqual("abc xyz", doc.Get("stored_tokenized"));
            BytesRef br = doc.GetBinaryValue("binary");

            Assert.IsNotNull(br);
            Assert.AreEqual(10, br.Length);

            IndexSearcher s = new IndexSearcher(r);

            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("indexed", "abc xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).TotalHits);

            foreach (string field in new string[] { "tv", "tv_pos", "tv_off", "tv_pos_off" })
            {
                Fields tvFields = r.GetTermVectors(0);
                Terms  tvs      = tvFields.Terms(field);
                Assert.IsNotNull(tvs);
                Assert.AreEqual(2, tvs.Size());
                TermsEnum tvsEnum = tvs.Iterator(null);
                Assert.AreEqual(new BytesRef("abc"), tvsEnum.Next());
                DocsAndPositionsEnum dpEnum = tvsEnum.DocsAndPositions(null, null);
                if (field.Equals("tv"))
                {
                    Assert.IsNull(dpEnum);
                }
                else
                {
                    Assert.IsNotNull(dpEnum);
                }
                Assert.AreEqual(new BytesRef("xyz"), tvsEnum.Next());
                Assert.IsNull(tvsEnum.Next());
            }

            r.Dispose();
            dir.Dispose();
        }
Example #45
0
 public override IndexSearcher NewSearcher(IndexReader ignored)
 {
     return(LuceneTestCase.NewSearcher(Other));
 }
Example #46
0
 public override Query Rewrite(IndexReader reader, MultiTermQuery query)
 {
     return(@delegate.Rewrite(reader, query));
 }
        /// <summary>
        /// Build the suggest index, using up to the specified
        ///  amount of temporary RAM while building.  Note that
        ///  the weights for the suggestions are ignored.
        /// </summary>
        public virtual void Build(IInputIterator iterator, double ramBufferSizeMB)
        {
            if (iterator.HasPayloads)
            {
                throw new ArgumentException("this suggester doesn't support payloads");
            }
            if (iterator.HasContexts)
            {
                throw new ArgumentException("this suggester doesn't support contexts");
            }

            string prefix    = this.GetType().Name;
            var    directory = OfflineSorter.DefaultTempDir();

            // LUCENENET specific - using GetRandomFileName() instead of picking a random int
            DirectoryInfo tempIndexPath = null;

            while (true)
            {
                tempIndexPath = new DirectoryInfo(Path.Combine(directory.FullName, prefix + ".index." + Path.GetFileNameWithoutExtension(Path.GetRandomFileName())));
                tempIndexPath.Create();
                if (System.IO.Directory.Exists(tempIndexPath.FullName))
                {
                    break;
                }
            }

            Directory dir = FSDirectory.Open(tempIndexPath);

            try
            {
#pragma warning disable 612, 618
                IndexWriterConfig iwc = new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, indexAnalyzer);
#pragma warning restore 612, 618
                iwc.SetOpenMode(OpenMode.CREATE);
                iwc.SetRAMBufferSizeMB(ramBufferSizeMB);
                IndexWriter writer = new IndexWriter(dir, iwc);

                var ft = new FieldType(TextField.TYPE_NOT_STORED);
                // TODO: if only we had IndexOptions.TERMS_ONLY...
                ft.IndexOptions = IndexOptions.DOCS_AND_FREQS;
                ft.OmitNorms    = true;
                ft.Freeze();

                Document doc   = new Document();
                Field    field = new Field("body", "", ft);
                doc.Add(field);

                totTokens = 0;
                IndexReader reader = null;

                bool success = false;
                count = 0;
                try
                {
                    while (true)
                    {
                        BytesRef surfaceForm = iterator.Next();
                        if (surfaceForm == null)
                        {
                            break;
                        }
                        field.SetStringValue(surfaceForm.Utf8ToString());
                        writer.AddDocument(doc);
                        count++;
                    }
                    reader = DirectoryReader.Open(writer, false);

                    Terms terms = MultiFields.GetTerms(reader, "body");
                    if (terms == null)
                    {
                        throw new ArgumentException("need at least one suggestion");
                    }

                    // Move all ngrams into an FST:
                    TermsEnum termsEnum = terms.GetIterator(null);

                    Outputs <long?> outputs = PositiveInt32Outputs.Singleton;
                    Builder <long?> builder = new Builder <long?>(FST.INPUT_TYPE.BYTE1, outputs);

                    Int32sRef scratchInts = new Int32sRef();
                    while (true)
                    {
                        BytesRef term = termsEnum.Next();
                        if (term == null)
                        {
                            break;
                        }
                        int ngramCount = CountGrams(term);
                        if (ngramCount > grams)
                        {
                            throw new ArgumentException("tokens must not contain separator byte; got token=" + term + " but gramCount=" + ngramCount + ", which is greater than expected max ngram size=" + grams);
                        }
                        if (ngramCount == 1)
                        {
                            totTokens += termsEnum.TotalTermFreq;
                        }

                        builder.Add(Lucene.Net.Util.Fst.Util.ToInt32sRef(term, scratchInts), EncodeWeight(termsEnum.TotalTermFreq));
                    }

                    fst = builder.Finish();
                    if (fst == null)
                    {
                        throw new ArgumentException("need at least one suggestion");
                    }
                    //System.out.println("FST: " + fst.getNodeCount() + " nodes");

                    /*
                     * PrintWriter pw = new PrintWriter("/x/tmp/out.dot");
                     * Util.toDot(fst, pw, true, true);
                     * pw.close();
                     */

                    success = true;
                }
                finally
                {
                    if (success)
                    {
                        IOUtils.Dispose(writer, reader);
                    }
                    else
                    {
                        IOUtils.DisposeWhileHandlingException(writer, reader);
                    }
                }
            }
            finally
            {
                try
                {
                    IOUtils.Dispose(dir);
                }
                finally
                {
                    // LUCENENET specific - since we are removing the entire directory anyway,
                    // it doesn't make sense to first do a loop in order remove the files.
                    // Let the System.IO.Directory.Delete() method handle that.
                    // We also need to dispose the Directory instance first before deleting from disk.
                    try
                    {
                        System.IO.Directory.Delete(tempIndexPath.FullName, true);
                    }
                    catch (Exception e)
                    {
                        throw new InvalidOperationException("failed to remove " + tempIndexPath, e);
                    }
                }
            }
        }
Example #48
0
 /// <summary>
 /// Constructor requiring an <see cref="IndexReader"/>.
 /// </summary>
 public MoreLikeThis(IndexReader ir)
     : this(ir, new DefaultSimilarity())
 {
 }
Example #49
0
        private void ExecuteRandomJoin(bool multipleValuesPerDocument, int maxIndexIter, int maxSearchIter,
                                       int numberOfDocumentsToIndex)
        {
            for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("indexIter=" + indexIter);
                }
                Directory         dir = NewDirectory();
                RandomIndexWriter w   = new RandomIndexWriter(Random(), dir,
                                                              NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false))
                                                              .SetMergePolicy(NewLogMergePolicy()));
                bool scoreDocsInOrder         = TestJoinUtil.Random().NextBoolean();
                IndexIterationContext context = CreateContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument,
                                                              scoreDocsInOrder);

                IndexReader topLevelReader = w.Reader;
                w.Dispose();
                for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("searchIter=" + searchIter);
                    }
                    IndexSearcher indexSearcher = NewSearcher(topLevelReader);

                    int         r              = Random().Next(context.RandomUniqueValues.Length);
                    bool        from           = context.RandomFrom[r];
                    string      randomValue    = context.RandomUniqueValues[r];
                    FixedBitSet expectedResult = CreateExpectedResult(randomValue, from, indexSearcher.IndexReader,
                                                                      context);

                    Query actualQuery = new TermQuery(new Term("value", randomValue));
                    if (VERBOSE)
                    {
                        Console.WriteLine("actualQuery=" + actualQuery);
                    }

                    var       scoreModeLength = Enum.GetNames(typeof(ScoreMode)).Length;
                    ScoreMode scoreMode       = (ScoreMode)Random().Next(scoreModeLength);
                    if (VERBOSE)
                    {
                        Console.WriteLine("scoreMode=" + scoreMode);
                    }

                    Query joinQuery;
                    if (from)
                    {
                        joinQuery = JoinUtil.CreateJoinQuery("from", multipleValuesPerDocument, "to", actualQuery,
                                                             indexSearcher, scoreMode);
                    }
                    else
                    {
                        joinQuery = JoinUtil.CreateJoinQuery("to", multipleValuesPerDocument, "from", actualQuery,
                                                             indexSearcher, scoreMode);
                    }
                    if (VERBOSE)
                    {
                        Console.WriteLine("joinQuery=" + joinQuery);
                    }

                    // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
                    FixedBitSet          actualResult         = new FixedBitSet(indexSearcher.IndexReader.MaxDoc);
                    TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(10, false);
                    indexSearcher.Search(joinQuery,
                                         new CollectorAnonymousInnerClassHelper2(this, scoreDocsInOrder, context, actualResult,
                                                                                 topScoreDocCollector));
                    // Asserting bit set...
                    if (VERBOSE)
                    {
                        Console.WriteLine("expected cardinality:" + expectedResult.Cardinality());
                        DocIdSetIterator iterator = expectedResult.GetIterator();
                        for (int doc = iterator.NextDoc();
                             doc != DocIdSetIterator.NO_MORE_DOCS;
                             doc = iterator.NextDoc())
                        {
                            Console.WriteLine(string.Format("Expected doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id")));
                        }
                        Console.WriteLine("actual cardinality:" + actualResult.Cardinality());
                        iterator = actualResult.GetIterator();
                        for (int doc = iterator.NextDoc();
                             doc != DocIdSetIterator.NO_MORE_DOCS;
                             doc = iterator.NextDoc())
                        {
                            Console.WriteLine(string.Format("Actual doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id")));
                        }
                    }
                    assertEquals(expectedResult, actualResult);

                    // Asserting TopDocs...
                    TopDocs expectedTopDocs = CreateExpectedTopDocs(randomValue, from, scoreMode, context);
                    TopDocs actualTopDocs   = topScoreDocCollector.GetTopDocs();
                    assertEquals(expectedTopDocs.TotalHits, actualTopDocs.TotalHits);
                    assertEquals(expectedTopDocs.ScoreDocs.Length, actualTopDocs.ScoreDocs.Length);
                    if (scoreMode == ScoreMode.None)
                    {
                        continue;
                    }

                    assertEquals(expectedTopDocs.MaxScore, actualTopDocs.MaxScore, 0.0f);
                    for (int i = 0; i < expectedTopDocs.ScoreDocs.Length; i++)
                    {
                        if (VERBOSE)
                        {
                            string.Format("Expected doc: {0} | Actual doc: {1}\n", expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc);
                            string.Format("Expected score: {0} | Actual score: {1}\n", expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score);
                        }
                        assertEquals(expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc);
                        assertEquals(expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score, 0.0f);
                        Explanation explanation = indexSearcher.Explain(joinQuery, expectedTopDocs.ScoreDocs[i].Doc);
                        assertEquals(expectedTopDocs.ScoreDocs[i].Score, explanation.Value, 0.0f);
                    }
                }
                topLevelReader.Dispose();
                dir.Dispose();
            }
        }
Example #50
0
 public void Init(IndexReader reader)
 {
     spellChecker.IndexDictionary(new LuceneDictionary(reader, field), workContext.CancellationToken);
 }
Example #51
0
        public void Search(Analyzer analyzer, string keyword)
        {
            if (string.IsNullOrEmpty(keyword))
            {
                throw new ArgumentException("content");
            }

            // 计时
            Stopwatch watch = new Stopwatch();

            watch.Start();

            // 设置
            int  numHits           = 10;
            bool docsScoredInOrder = true;
            bool isReadOnly        = true;

            // 创建Searcher
            FSDirectory   fsDir         = new SimpleFSDirectory(new DirectoryInfo(_indexerFolder));
            IndexSearcher indexSearcher = new IndexSearcher(IndexReader.Open(fsDir, isReadOnly));

            TopScoreDocCollector collector = TopScoreDocCollector.Create(numHits, docsScoredInOrder);
            QueryParser          parser    = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, Config.Field_Content, analyzer);
            Query query = parser.Parse(keyword);

            indexSearcher.Search(query, collector);

            //Console.WriteLine(collector.TotalHits);
            var result = collector.TopDocs().ScoreDocs;

            watch.Stop();
            Console.WriteLine("总共耗时{0}毫秒", watch.ElapsedMilliseconds);
            Console.WriteLine("总共找到{0}个文件", result.Count());

            foreach (var docs in result)
            {
                Document doc = indexSearcher.Doc(docs.Doc);
                Console.WriteLine("得分:{0},文件名:{1}", docs.Score, doc.Get(Config.Field_Name));
            }

            indexSearcher.Dispose();

            //BooleanQuery booleanQuery = new BooleanQuery();

            //QueryParser parser1 = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, Config.Field_Path, analyzer);
            //Query query1 = parser1.Parse(path);
            //parser1.DefaultOperator = QueryParser.Operator.AND;
            //booleanQuery.Add(query1, Occur.MUST);

            ////QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, Config.Field_Content, analyzer);
            ////Query query = parser.Parse(content);
            ////parser.DefaultOperator = QueryParser.Operator.AND;
            ////booleanQuery.Add(query, Occur.MUST);



            ////var queryParserFilePath = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, new string[] { Config.Field_FilePath }, analyzer);
            ////query.Add(queryParserFilePath.Parse(path), Occur.SHOULD);

            ////var queryParserContent = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, new string[] { Config.Field_Content}, analyzer);
            ////query.Add(queryParserContent.Parse(content), Occur.MUST);

            //FSDirectory fsDir = new SimpleFSDirectory(new DirectoryInfo(_indexerFolder));
            //bool isReadOnly = true;
            //IndexSearcher indexSearcher = new IndexSearcher(IndexReader.Open(fsDir, isReadOnly));
            ////TopDocs result = indexSearcher.Search(booleanQuery, 10);
            //Sort sort = new Sort(new SortField(Config.Field_Path, SortField.STRING, true));
            //var result = indexSearcher.Search(booleanQuery, (Filter)null, 10 * 1, sort);
        }
        public override void SetUp()
        {
            base.SetUp();
            Dir = NewDirectory();
            FieldName = Random().NextBoolean() ? "field" : ""; // sometimes use an empty string as field name
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));
            Document doc = new Document();
            Field field = NewStringField(FieldName, "", Field.Store.NO);
            doc.Add(field);
            List<string> terms = new List<string>();
            int num = AtLeast(200);
            for (int i = 0; i < num; i++)
            {
                string s = TestUtil.RandomUnicodeString(Random());
                field.StringValue = s;
                terms.Add(s);
                writer.AddDocument(doc);
            }

            if (VERBOSE)
            {
                // utf16 order
                terms.Sort();
                Console.WriteLine("UTF16 order:");
                foreach (string s in terms)
                {
                    Console.WriteLine("  " + UnicodeUtil.ToHexString(s));
                }
            }

            Reader = writer.Reader;
            Searcher1 = NewSearcher(Reader);
            Searcher2 = NewSearcher(Reader);
            writer.Dispose();
        }
Example #53
0
 public override IndexSearcher NewSearcher(IndexReader r)
 {
     return(new IndexSearcher(r));
 }
        public void TestWildcardInBoolean()
        {
            Directory dir = NewDirectory();
            // use simpleanalyzer for more natural tokenization (else "test." is a token)
            Analyzer          analyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true);
            IndexWriterConfig iwc      = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            FieldType offsetsType = new FieldType(TextField.TYPE_STORED);

            offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
            Field    body = new Field("body", "", offsetsType);
            Document doc  = new Document();

            doc.Add(body);

            body.SetStringValue("This is a test.");
            iw.AddDocument(doc);
            body.SetStringValue("Test a one sentence document.");
            iw.AddDocument(doc);

            IndexReader ir = iw.Reader;

            iw.Dispose();

            IndexSearcher       searcher    = NewSearcher(ir);
            PostingsHighlighter highlighter = new PostingsHighlighterAnalyzerHelper(analyzer);
            //PostingsHighlighter highlighter = new PostingsHighlighter() {
            //      @Override
            //      protected Analyzer getIndexAnalyzer(String field)
            //{
            //    return analyzer;
            //}
            //    };
            BooleanQuery query = new BooleanQuery();

            query.Add(new WildcardQuery(new Term("body", "te*")), Occur.SHOULD);
            TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);

            assertEquals(2, topDocs.TotalHits);
            String[] snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("This is a <b>test</b>.", snippets[0]);
            assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

            // must not
            query = new BooleanQuery();
            query.Add(new MatchAllDocsQuery(), Occur.SHOULD);
            query.Add(new WildcardQuery(new Term("bogus", "te*")), Occur.MUST_NOT);
            topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
            assertEquals(2, topDocs.TotalHits);
            snippets = highlighter.Highlight("body", query, searcher, topDocs);
            assertEquals(2, snippets.Length);
            assertEquals("This is a test.", snippets[0]);
            assertEquals("Test a one sentence document.", snippets[1]);

            ir.Dispose();
            dir.Dispose();
        }
 public static void AfterClass()
 {
     Reader.Dispose();
     MultiReader.Dispose();
     MultiReaderDupls.Dispose();
     Dir.Dispose();
     Sdir1.Dispose();
     Sdir2.Dispose();
     Reader = MultiReader = MultiReaderDupls = null;
     Searcher = MultiSearcher = MultiSearcherDupls = null;
     Dir = Sdir1 = Sdir2 = null;
 }
        public virtual void TestRangeQueryId()
        {
            // NOTE: uses index build in *super* setUp

            IndexReader   reader = SignedIndexReader;
            IndexSearcher search = NewSearcher(reader);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: reader=" + reader);
            }

            int medId = ((MaxId - MinId) / 2);

            string minIP = Pad(MinId);
            string maxIP = Pad(MaxId);
            string medIP = Pad(medId);

            int numDocs = reader.NumDocs;

            AssertEquals("num of docs", numDocs, 1 + MaxId - MinId);

            ScoreDoc[] result;

            // test id, bounded on both ends

            result = search.Search(Csrq("id", minIP, maxIP, T, T), null, numDocs).ScoreDocs;
            AssertEquals("find all", numDocs, result.Length);

            result = search.Search(Csrq("id", minIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs;
            AssertEquals("find all", numDocs, result.Length);

            result = search.Search(Csrq("id", minIP, maxIP, T, F), null, numDocs).ScoreDocs;
            AssertEquals("all but last", numDocs - 1, result.Length);

            result = search.Search(Csrq("id", minIP, maxIP, T, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs;
            AssertEquals("all but last", numDocs - 1, result.Length);

            result = search.Search(Csrq("id", minIP, maxIP, F, T), null, numDocs).ScoreDocs;
            AssertEquals("all but first", numDocs - 1, result.Length);

            result = search.Search(Csrq("id", minIP, maxIP, F, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs;
            AssertEquals("all but first", numDocs - 1, result.Length);

            result = search.Search(Csrq("id", minIP, maxIP, F, F), null, numDocs).ScoreDocs;
            AssertEquals("all but ends", numDocs - 2, result.Length);

            result = search.Search(Csrq("id", minIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs;
            AssertEquals("all but ends", numDocs - 2, result.Length);

            result = search.Search(Csrq("id", medIP, maxIP, T, T), null, numDocs).ScoreDocs;
            AssertEquals("med and up", 1 + MaxId - medId, result.Length);

            result = search.Search(Csrq("id", medIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs;
            AssertEquals("med and up", 1 + MaxId - medId, result.Length);

            result = search.Search(Csrq("id", minIP, medIP, T, T), null, numDocs).ScoreDocs;
            AssertEquals("up to med", 1 + medId - MinId, result.Length);

            result = search.Search(Csrq("id", minIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs;
            AssertEquals("up to med", 1 + medId - MinId, result.Length);

            // unbounded id

            result = search.Search(Csrq("id", minIP, null, T, F), null, numDocs).ScoreDocs;
            AssertEquals("min and up", numDocs, result.Length);

            result = search.Search(Csrq("id", null, maxIP, F, T), null, numDocs).ScoreDocs;
            AssertEquals("max and down", numDocs, result.Length);

            result = search.Search(Csrq("id", minIP, null, F, F), null, numDocs).ScoreDocs;
            AssertEquals("not min, but up", numDocs - 1, result.Length);

            result = search.Search(Csrq("id", null, maxIP, F, F), null, numDocs).ScoreDocs;
            AssertEquals("not max, but down", numDocs - 1, result.Length);

            result = search.Search(Csrq("id", medIP, maxIP, T, F), null, numDocs).ScoreDocs;
            AssertEquals("med and up, not max", MaxId - medId, result.Length);

            result = search.Search(Csrq("id", minIP, medIP, F, T), null, numDocs).ScoreDocs;
            AssertEquals("not min, up to med", medId - MinId, result.Length);

            // very small sets

            result = search.Search(Csrq("id", minIP, minIP, F, F), null, numDocs).ScoreDocs;
            AssertEquals("min,min,F,F", 0, result.Length);

            result = search.Search(Csrq("id", minIP, minIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs;
            AssertEquals("min,min,F,F", 0, result.Length);

            result = search.Search(Csrq("id", medIP, medIP, F, F), null, numDocs).ScoreDocs;
            AssertEquals("med,med,F,F", 0, result.Length);

            result = search.Search(Csrq("id", medIP, medIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs;
            AssertEquals("med,med,F,F", 0, result.Length);

            result = search.Search(Csrq("id", maxIP, maxIP, F, F), null, numDocs).ScoreDocs;
            AssertEquals("max,max,F,F", 0, result.Length);

            result = search.Search(Csrq("id", maxIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs;
            AssertEquals("max,max,F,F", 0, result.Length);

            result = search.Search(Csrq("id", minIP, minIP, T, T), null, numDocs).ScoreDocs;
            AssertEquals("min,min,T,T", 1, result.Length);

            result = search.Search(Csrq("id", minIP, minIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs;
            AssertEquals("min,min,T,T", 1, result.Length);

            result = search.Search(Csrq("id", null, minIP, F, T), null, numDocs).ScoreDocs;
            AssertEquals("nul,min,F,T", 1, result.Length);

            result = search.Search(Csrq("id", null, minIP, F, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs;
            AssertEquals("nul,min,F,T", 1, result.Length);

            result = search.Search(Csrq("id", maxIP, maxIP, T, T), null, numDocs).ScoreDocs;
            AssertEquals("max,max,T,T", 1, result.Length);

            result = search.Search(Csrq("id", maxIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs;
            AssertEquals("max,max,T,T", 1, result.Length);

            result = search.Search(Csrq("id", maxIP, null, T, F), null, numDocs).ScoreDocs;
            AssertEquals("max,nul,T,T", 1, result.Length);

            result = search.Search(Csrq("id", maxIP, null, T, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs;
            AssertEquals("max,nul,T,T", 1, result.Length);

            result = search.Search(Csrq("id", medIP, medIP, T, T), null, numDocs).ScoreDocs;
            AssertEquals("med,med,T,T", 1, result.Length);

            result = search.Search(Csrq("id", medIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs;
            AssertEquals("med,med,T,T", 1, result.Length);
        }
        public virtual void TestRangeQueryRand()
        {
            // NOTE: uses index build in *super* setUp

            IndexReader   reader = SignedIndexReader;
            IndexSearcher search = NewSearcher(reader);

            string minRP = Pad(SignedIndexDir.MinR);
            string maxRP = Pad(SignedIndexDir.MaxR);

            int numDocs = reader.NumDocs;

            AssertEquals("num of docs", numDocs, 1 + MaxId - MinId);

            ScoreDoc[] result;

            // test extremes, bounded on both ends

            result = search.Search(Csrq("rand", minRP, maxRP, T, T), null, numDocs).ScoreDocs;
            AssertEquals("find all", numDocs, result.Length);

            result = search.Search(Csrq("rand", minRP, maxRP, T, F), null, numDocs).ScoreDocs;
            AssertEquals("all but biggest", numDocs - 1, result.Length);

            result = search.Search(Csrq("rand", minRP, maxRP, F, T), null, numDocs).ScoreDocs;
            AssertEquals("all but smallest", numDocs - 1, result.Length);

            result = search.Search(Csrq("rand", minRP, maxRP, F, F), null, numDocs).ScoreDocs;
            AssertEquals("all but extremes", numDocs - 2, result.Length);

            // unbounded

            result = search.Search(Csrq("rand", minRP, null, T, F), null, numDocs).ScoreDocs;
            AssertEquals("smallest and up", numDocs, result.Length);

            result = search.Search(Csrq("rand", null, maxRP, F, T), null, numDocs).ScoreDocs;
            AssertEquals("biggest and down", numDocs, result.Length);

            result = search.Search(Csrq("rand", minRP, null, F, F), null, numDocs).ScoreDocs;
            AssertEquals("not smallest, but up", numDocs - 1, result.Length);

            result = search.Search(Csrq("rand", null, maxRP, F, F), null, numDocs).ScoreDocs;
            AssertEquals("not biggest, but down", numDocs - 1, result.Length);

            // very small sets

            result = search.Search(Csrq("rand", minRP, minRP, F, F), null, numDocs).ScoreDocs;
            AssertEquals("min,min,F,F", 0, result.Length);
            result = search.Search(Csrq("rand", maxRP, maxRP, F, F), null, numDocs).ScoreDocs;
            AssertEquals("max,max,F,F", 0, result.Length);

            result = search.Search(Csrq("rand", minRP, minRP, T, T), null, numDocs).ScoreDocs;
            AssertEquals("min,min,T,T", 1, result.Length);
            result = search.Search(Csrq("rand", null, minRP, F, T), null, numDocs).ScoreDocs;
            AssertEquals("nul,min,F,T", 1, result.Length);

            result = search.Search(Csrq("rand", maxRP, maxRP, T, T), null, numDocs).ScoreDocs;
            AssertEquals("max,max,T,T", 1, result.Length);
            result = search.Search(Csrq("rand", maxRP, null, T, F), null, numDocs).ScoreDocs;
            AssertEquals("max,nul,T,T", 1, result.Length);
        }
Example #58
0
 /// <param name="query">
 /// a Lucene query (ideally rewritten using <see cref="Query.Rewrite(IndexReader)"/> before
 /// being passed to this class and the searcher)
 /// </param>
 /// <param name="reader">
 /// used to compute IDF which can be used to a) score selected
 /// fragments better b) use graded highlights eg set font color
 /// intensity
 /// </param>
 /// <param name="fieldName">
 /// the field on which Inverse Document Frequency (IDF)
 /// calculations are based
 /// </param>
 public QueryTermScorer(Query query, IndexReader reader, string fieldName)
     : this(QueryTermExtractor.GetIdfWeightedTerms(query, reader, fieldName))
 {
 }
        private void CreateRandomIndexes()
        {
            dir1 = NewDirectory();
            dir2 = NewDirectory();
            int           numDocs     = AtLeast(150);
            int           numTerms    = TestUtil.NextInt(Random(), 1, numDocs / 5);
            ISet <string> randomTerms = new HashSet <string>();

            while (randomTerms.size() < numTerms)
            {
                randomTerms.add(TestUtil.RandomSimpleString(Random()));
            }
            terms = new List <string>(randomTerms);
            long seed = Random().NextLong();
            IndexWriterConfig iwc1 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed)));
            IndexWriterConfig iwc2 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed)));

            iwc2.SetMergePolicy(NewSortingMergePolicy(sort));
            RandomIndexWriter iw1 = new RandomIndexWriter(new Random((int)seed), dir1, iwc1);
            RandomIndexWriter iw2 = new RandomIndexWriter(new Random((int)seed), dir2, iwc2);

            for (int i = 0; i < numDocs; ++i)
            {
                if (Random().nextInt(5) == 0 && i != numDocs - 1)
                {
                    string term = RandomInts.RandomFrom(Random(), terms);
                    iw1.DeleteDocuments(new Term("s", term));
                    iw2.DeleteDocuments(new Term("s", term));
                }
                Document doc = randomDocument();
                iw1.AddDocument(doc);
                iw2.AddDocument(doc);
                if (Random().nextInt(8) == 0)
                {
                    iw1.Commit();
                    iw2.Commit();
                }
            }
            // Make sure we have something to merge
            iw1.Commit();
            iw2.Commit();
            Document doc2 = randomDocument();

            // NOTE: don't use RIW.addDocument directly, since it sometimes commits
            // which may trigger a merge, at which case forceMerge may not do anything.
            // With field updates this is a problem, since the updates can go into the
            // single segment in the index, and threefore the index won't be sorted.
            // This hurts the assumption of the test later on, that the index is sorted
            // by SortingMP.
            iw1.w.AddDocument(doc2);
            iw2.w.AddDocument(doc2);

            if (DefaultCodecSupportsFieldUpdates())
            {
                // update NDV of docs belonging to one term (covers many documents)
                long   value = Random().NextLong();
                string term  = RandomInts.RandomFrom(Random(), terms);
                iw1.w.UpdateNumericDocValue(new Term("s", term), "ndv", value);
                iw2.w.UpdateNumericDocValue(new Term("s", term), "ndv", value);
            }

            iw1.ForceMerge(1);
            iw2.ForceMerge(1);
            iw1.Dispose();
            iw2.Dispose();
            reader       = DirectoryReader.Open(dir1);
            sortedReader = DirectoryReader.Open(dir2);
        }
 public override void SetNextReader(IndexReader reader, int docBase)
 {
     _docBase = docBase;
 }