public override void SetUp()
 {
     base.SetUp();
     dir = NewDirectory();
     IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer
         (Random()));
     iwc.SetMergePolicy(NewLogMergePolicy());
     var iw = new RandomIndexWriter(Random(), dir, iwc);
     var doc = new Document
     {
         NewStringField("id", "1", Field.Store.YES),
         NewTextField("body", "some contents and more contents", Field.Store.NO),
         new NumericDocValuesField("popularity", 5)
     };
     iw.AddDocument(doc);
     doc = new Document
     {
         NewStringField("id", "2", Field.Store.YES),
         NewTextField("body", "another document with different contents", Field.Store
             .NO),
         new NumericDocValuesField("popularity", 20)
     };
     iw.AddDocument(doc);
     doc = new Document
     {
         NewStringField("id", "3", Field.Store.YES),
         NewTextField("body", "crappy contents", Field.Store.NO),
         new NumericDocValuesField("popularity", 2)
     };
     iw.AddDocument(doc);
     iw.ForceMerge(1);
     reader = iw.Reader;
     iw.Dispose();
 }
Ejemplo n.º 2
0
        public void TestFieldNotPresent()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            int num = AtLeast(3);
            int skip = Random().Next(num);
            var terms = new List<Term>();
            for (int i = 0; i < num; i++)
            {
                terms.Add(new Term("field" + i, "content1"));
                Document doc = new Document();
                if (skip == i)
                {
                    continue;
                }
                doc.Add(NewStringField("field" + i, "content1", Field.Store.YES));
                w.AddDocument(doc);
            }

            w.ForceMerge(1);
            IndexReader reader = w.Reader;
            w.Dispose();
            assertEquals(1, reader.Leaves.size());

            AtomicReaderContext context = reader.Leaves.First();
            TermsFilter tf = new TermsFilter(terms);

            FixedBitSet bits = (FixedBitSet)tf.GetDocIdSet(context, context.AtomicReader.LiveDocs);
            assertEquals("Must be num fields - 1 since we skip only one field", num - 1, bits.Cardinality());
            reader.Dispose();
            dir.Dispose();
        }
 public virtual void Test()
 {
     Directory dir = NewDirectory();
     IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
     conf.SetCodec(new Lucene46Codec());
     RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, conf);
     Document doc = new Document();
     // these fields should sometimes get term vectors, etc
     Field idField = NewStringField("id", "", Field.Store.NO);
     Field bodyField = NewTextField("body", "", Field.Store.NO);
     Field dvField = new NumericDocValuesField("dv", 5);
     doc.Add(idField);
     doc.Add(bodyField);
     doc.Add(dvField);
     for (int i = 0; i < 100; i++)
     {
         idField.StringValue = Convert.ToString(i);
         bodyField.StringValue = TestUtil.RandomUnicodeString(Random());
         riw.AddDocument(doc);
         if (Random().Next(7) == 0)
         {
             riw.Commit();
         }
         // TODO: we should make a new format with a clean header...
         // if (Random().nextInt(20) == 0) {
         //  riw.DeleteDocuments(new Term("id", Integer.toString(i)));
         // }
     }
     riw.Dispose();
     CheckHeaders(dir);
     dir.Dispose();
 }
        public override void SetUp()
        {
            base.SetUp();
            dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, true), Similarity, TimeZone);

            for (int i = 900; i < 1112; i++)
            {
                Document doc = new Document();
                string num = Regex.Replace(Regex.Replace(English.IntToEnglish(i), "[-]", " "), "[,]", "");
                doc.Add(NewTextField("numbers", num, Field.Store.NO));
                writer.AddDocument(doc);
            }

            {
                Document doc = new Document();
                doc.Add(NewTextField("numbers", "thou hast sand betwixt thy toes", Field.Store.NO));
                writer.AddDocument(doc);
            }
            {
                Document doc = new Document();
                doc.Add(NewTextField("numbers", "hundredeight eightyeight yeight", Field.Store.NO));
                writer.AddDocument(doc);
            }
            {
                Document doc = new Document();
                doc.Add(NewTextField("numbers", "tres y cinco", Field.Store.NO));
                writer.AddDocument(doc);
            }

            writer.Commit();
            writer.Dispose();
        }
        public void BeforeClass()
        {
            Dir = NewDirectory();
            Sdir1 = NewDirectory();
            Sdir2 = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, new MockAnalyzer(Random()), Similarity, TimeZone);
            RandomIndexWriter swriter1 = new RandomIndexWriter(Random(), Sdir1, new MockAnalyzer(Random()), Similarity, TimeZone);
            RandomIndexWriter swriter2 = new RandomIndexWriter(Random(), Sdir2, new MockAnalyzer(Random()), Similarity, TimeZone);

            for (int i = 0; i < 10; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("data", Convert.ToString(i), Field.Store.NO));
                writer.AddDocument(doc);
                ((i % 2 == 0) ? swriter1 : swriter2).AddDocument(doc);
            }
            writer.ForceMerge(1);
            swriter1.ForceMerge(1);
            swriter2.ForceMerge(1);
            writer.Dispose();
            swriter1.Dispose();
            swriter2.Dispose();

            Reader = DirectoryReader.Open(Dir);
            Searcher = NewSearcher(Reader);

            MultiReader = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Sdir2) }, true);
            MultiSearcher = NewSearcher(MultiReader);

            MultiReaderDupls = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Dir) }, true);
            MultiSearcherDupls = NewSearcher(MultiReaderDupls);
        }
Ejemplo n.º 6
0
        public override void SetUp()
        {
            base.SetUp();
            // we generate aweful regexps: good for testing.
            // but for preflex codec, the test can be very slow, so use less iterations.
            NumIterations = Codec.Default.Name.Equals("Lucene3x") ? 10 * RANDOM_MULTIPLIER : AtLeast(50);
            Dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));
            Document doc = new Document();
            Field field = NewStringField("field", "", Field.Store.YES);
            doc.Add(field);
            Terms = new SortedSet<BytesRef>();

            int num = AtLeast(200);
            for (int i = 0; i < num; i++)
            {
                string s = TestUtil.RandomUnicodeString(Random());
                field.StringValue = s;
                Terms.Add(new BytesRef(s));
                writer.AddDocument(doc);
            }

            TermsAutomaton = BasicAutomata.MakeStringUnion(Terms);

            Reader = writer.Reader;
            Searcher = NewSearcher(Reader);
            writer.Dispose();
        }
Ejemplo n.º 7
0
        public override void SetUp()
        {
            base.SetUp();
            Dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));

            Document doc = new Document();
            FieldType customType = new FieldType(TextField.TYPE_STORED);
            customType.OmitNorms = true;
            Field field = NewField("field", "", customType);
            doc.Add(field);

            NumberFormatInfo df = new NumberFormatInfo();
            df.NumberDecimalDigits = 0;

            //NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT));
            for (int i = 0; i < 1000; i++)
            {
                field.StringValue = i.ToString(df);
                writer.AddDocument(doc);
            }

            Reader = writer.Reader;
            writer.Dispose();
            Searcher = NewSearcher(Reader);
        }
Ejemplo n.º 8
0
        public virtual void TestBasic()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document doc = new Document();
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
            Field f = NewField("foo", "this is a test test", ft);
            doc.Add(f);
            for (int i = 0; i < 100; i++)
            {
                w.AddDocument(doc);
            }

            IndexReader reader = w.Reader;
            w.Dispose();

            Assert.IsNull(MultiFields.GetTermPositionsEnum(reader, null, "foo", new BytesRef("test")));

            DocsEnum de = TestUtil.Docs(Random(), reader, "foo", new BytesRef("test"), null, null, DocsEnum.FLAG_FREQS);
            while (de.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                Assert.AreEqual(2, de.Freq());
            }

            reader.Dispose();
            dir.Dispose();
        }
        public void BeforeClass()
        {
            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone);

            Document doc = new Document();
            Field field = NewStringField(FIELD, "meaninglessnames", Field.Store.NO);
            doc.Add(field);

            for (int i = 0; i < 5137; ++i)
            {
                writer.AddDocument(doc);
            }

            field.StringValue = "tangfulin";
            writer.AddDocument(doc);

            field.StringValue = "meaninglessnames";
            for (int i = 5138; i < 11377; ++i)
            {
                writer.AddDocument(doc);
            }

            field.StringValue = "tangfulin";
            writer.AddDocument(doc);

            Reader = writer.Reader;
            Searcher = NewSearcher(Reader);
            writer.Dispose();
        }
        public override void SetUp()
        {
            base.SetUp();
            dir = NewDirectory();
            var iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            int numDocs = TestUtil.NextInt(Random(), 2049, 4000);
            for (int i = 0; i < numDocs; i++)
            {
                var document = new Document
				{
				    NewTextField("english", English.IntToEnglish(i), Field.Store.NO),
				    NewTextField("oddeven", (i%2 == 0) ? "even" : "odd", Field.Store.NO
				        ),
				    NewStringField("byte", string.Empty + (unchecked((byte) Random().Next
				        ())), Field.Store.NO),
				    NewStringField("short", string.Empty + ((short) Random().Next()), Field.Store
				        .NO),
				    new IntField("int", Random().Next(), Field.Store.NO),
				    new LongField("long", Random().NextLong(), Field.Store.NO),
				    new FloatField("float", Random().NextFloat(), Field.Store.NO),
				    new DoubleField("double", Random().NextDouble(), Field.Store.NO),
				    new NumericDocValuesField("intdocvalues", Random().Next()),
				    new FloatDocValuesField("floatdocvalues", Random().NextFloat())
				};
                iw.AddDocument(document);
            }
            reader = iw.Reader;
            iw.Dispose();
            searcher = NewSearcher(reader);
        }
		public override void SetUp()
		{
			base.SetUp();
			dir = NewDirectory();
			var iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
			var doc = new Document
			{
			    NewStringField("id", "1", Field.Store.YES),
			    NewTextField("body", "some contents and more contents", Field.Store.NO),
			    new NumericDocValuesField("popularity", 5)
			};
		    iw.AddDocument(doc);
			doc = new Document
			{
			    NewStringField("id", "2", Field.Store.YES),
			    NewTextField("body", "another document with different contents", Field.Store
			        .NO),
			    new NumericDocValuesField("popularity", 20)
			};
		    iw.AddDocument(doc);
			doc = new Document
			{
			    NewStringField("id", "3", Field.Store.YES),
			    NewTextField("body", "crappy contents", Field.Store.NO),
			    new NumericDocValuesField("popularity", 2)
			};
		    iw.AddDocument(doc);
			reader = iw.Reader;
			searcher = new IndexSearcher(reader);
			iw.Dispose();
		}
Ejemplo n.º 12
0
        public virtual void TestPrefixQuery_Mem()
        {
            Directory directory = NewDirectory();

            string[] categories = new string[] { "/Computers", "/Computers/Mac", "/Computers/Windows" };
            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);
            for (int i = 0; i < categories.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("category", categories[i], Field.Store.YES));
                writer.AddDocument(doc);
            }
            IndexReader reader = writer.Reader;

            PrefixQuery query = new PrefixQuery(new Term("category", "/Computers"));
            IndexSearcher searcher = NewSearcher(reader);
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length, "All documents in /Computers category and below");

            query = new PrefixQuery(new Term("category", "/Computers/Mac"));
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length, "One in /Computers/Mac");

            query = new PrefixQuery(new Term("category", ""));
            Terms terms = MultiFields.GetTerms(searcher.IndexReader, "category");
            Assert.IsFalse(query.GetTermsEnum(terms) is PrefixTermsEnum);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length, "everything");
            writer.Dispose();
            reader.Dispose();
            directory.Dispose();
        }
Ejemplo n.º 13
0
        public void TestReverse()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document doc = new Document();
            doc.Add(NewStringField("value", "foo", Field.Store.NO));
            doc.Add(NewStringField("value", "bar", Field.Store.NO));
            doc.Add(NewStringField("id", "1", Field.Store.YES));
            writer.AddDocument(doc);
            doc = new Document();
            doc.Add(NewStringField("value", "baz", Field.Store.NO));
            doc.Add(NewStringField("id", "2", Field.Store.YES));
            writer.AddDocument(doc);

            IndexReader ir = writer.Reader;
            writer.Dispose();

            IndexSearcher searcher = NewSearcher(ir);
            Sort sort = new Sort(new SortedSetSortField("value", true));

            TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
            assertEquals(2, td.TotalHits);
            // 'bar' comes before 'baz'
            assertEquals("2", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
            assertEquals("1", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));

            ir.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 14
0
        public virtual void TestMethod()
        {
            Directory directory = NewDirectory();

            string[] values = new string[] { "1", "2", "3", "4" };

            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);
            for (int i = 0; i < values.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField(FIELD, values[i], Field.Store.YES));
                writer.AddDocument(doc);
            }
            IndexReader ir = writer.Reader;
            writer.Dispose();

            BooleanQuery booleanQuery1 = new BooleanQuery();
            booleanQuery1.Add(new TermQuery(new Term(FIELD, "1")), BooleanClause.Occur.SHOULD);
            booleanQuery1.Add(new TermQuery(new Term(FIELD, "2")), BooleanClause.Occur.SHOULD);

            BooleanQuery query = new BooleanQuery();
            query.Add(booleanQuery1, BooleanClause.Occur.MUST);
            query.Add(new TermQuery(new Term(FIELD, "9")), BooleanClause.Occur.MUST_NOT);

            IndexSearcher indexSearcher = NewSearcher(ir);
            ScoreDoc[] hits = indexSearcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length, "Number of matched documents");
            ir.Dispose();
            directory.Dispose();
        }
        public void TestInternalLevenshteinDistance()
        {
            DirectSpellChecker spellchecker = new DirectSpellChecker();
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir,
                new MockAnalyzer(Random(), MockTokenizer.KEYWORD, true), Similarity, TimeZone);

            string[] termsToAdd = { "metanoia", "metanoian", "metanoiai", "metanoias", "metanoi𐑍" };
            for (int i = 0; i < termsToAdd.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("repentance", termsToAdd[i], Field.Store.NO));
                writer.AddDocument(doc);
            }

            IndexReader ir = writer.Reader;
            string misspelled = "metanoix";
            SuggestWord[] similar = spellchecker.SuggestSimilar(new Term("repentance", misspelled), 4, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertTrue(similar.Length == 4);

            IStringDistance sd = spellchecker.Distance;
            assertTrue(sd is LuceneLevenshteinDistance);
            foreach (SuggestWord word in similar)
            {
                assertTrue(word.Score == sd.GetDistance(word.String, misspelled));
                assertTrue(word.Score == sd.GetDistance(misspelled, word.String)); // LUCNENET TODO: Perhaps change this to word.ToString()?
            }

            ir.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 16
0
        public virtual void TestString()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
            Document doc = new Document();
            doc.Add(NewStringField("value", "foo", Field.Store.YES));
            writer.AddDocument(doc);
            doc = new Document();
            doc.Add(NewStringField("value", "bar", Field.Store.YES));
            writer.AddDocument(doc);
            IndexReader ir = writer.Reader;
            writer.Dispose();

            IndexSearcher searcher = NewSearcher(ir);
            Sort sort = new Sort(new SortField("value", SortField.Type_e.STRING));

            TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
            Assert.AreEqual(2, td.TotalHits);
            // 'bar' comes before 'foo'
            Assert.AreEqual("bar", searcher.Doc(td.ScoreDocs[0].Doc).Get("value"));
            Assert.AreEqual("foo", searcher.Doc(td.ScoreDocs[1].Doc).Get("value"));

            ir.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 17
0
        public virtual void TestRollbackIntegrityWithBufferFlush()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter rw = new RandomIndexWriter(Random(), dir);
            for (int i = 0; i < 5; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("pk", Convert.ToString(i), Field.Store.YES));
                rw.AddDocument(doc);
            }
            rw.Dispose();

            // If buffer size is small enough to cause a flush, errors ensue...
            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND));

            for (int i = 0; i < 3; i++)
            {
                Document doc = new Document();
                string value = Convert.ToString(i);
                doc.Add(NewStringField("pk", value, Field.Store.YES));
                doc.Add(NewStringField("text", "foo", Field.Store.YES));
                w.UpdateDocument(new Term("pk", value), doc);
            }
            w.Rollback();

            IndexReader r = DirectoryReader.Open(dir);
            Assert.AreEqual(5, r.NumDocs, "index should contain same number of docs post rollback");
            r.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 18
0
 public override void SetUp()
 {
     base.SetUp();
     INDEX_SIZE = AtLeast(2000);
     Index = NewDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(Random(), Index);
     RandomGen random = new RandomGen(this, Random());
     for (int i = 0; i < INDEX_SIZE; ++i) // don't decrease; if to low the
     {
         // problem doesn't show up
         Document doc = new Document();
         if ((i % 5) != 0) // some documents must not have an entry in the first
         {
             // sort field
             doc.Add(NewStringField("publicationDate_", random.LuceneDate, Field.Store.YES));
         }
         if ((i % 7) == 0) // some documents to match the query (see below)
         {
             doc.Add(NewTextField("content", "test", Field.Store.YES));
         }
         // every document has a defined 'mandant' field
         doc.Add(NewStringField("mandant", Convert.ToString(i % 3), Field.Store.YES));
         writer.AddDocument(doc);
     }
     Reader = writer.Reader;
     writer.Dispose();
     Query = new TermQuery(new Term("content", "test"));
 }
Ejemplo n.º 19
0
        public override void SetUp()
        {
            base.SetUp();
            Dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));

            Document doc = new Document();
            Field field = NewStringField("field", "", Field.Store.NO);
            doc.Add(field);

            NumberFormatInfo df = new NumberFormatInfo();
            df.NumberDecimalDigits = 0;

            //NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT));
            for (int i = 0; i < 1000; i++)
            {
                field.StringValue = i.ToString(df);
                writer.AddDocument(doc);
            }

            Reader = writer.Reader;
            Searcher = NewSearcher(Reader);
            writer.Dispose();
            if (VERBOSE)
            {
                Console.WriteLine("TEST: setUp searcher=" + Searcher);
            }
        }
Ejemplo n.º 20
0
        public static void BeforeClass()
        {
            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
            for (int i = 0; i < DocFields.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField(field, DocFields[i], Field.Store.NO));
                writer.AddDocument(doc);
            }
            writer.Dispose();
            LittleReader = DirectoryReader.Open(Directory);
            Searcher = NewSearcher(LittleReader);
            // this is intentionally using the baseline sim, because it compares against bigSearcher (which uses a random one)
            Searcher.Similarity = new DefaultSimilarity();

            // Make big index
            Dir2 = new MockDirectoryWrapper(Random(), new RAMDirectory(Directory, IOContext.DEFAULT));

            // First multiply small test index:
            MulFactor = 1;
            int docCount = 0;
            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: now copy index...");
            }
            do
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: cycle...");
                }
                Directory copy = new MockDirectoryWrapper(Random(), new RAMDirectory(Dir2, IOContext.DEFAULT));
                RandomIndexWriter w = new RandomIndexWriter(Random(), Dir2);
                w.AddIndexes(copy);
                docCount = w.MaxDoc();
                w.Dispose();
                MulFactor *= 2;
            } while (docCount < 3000);

            RandomIndexWriter riw = new RandomIndexWriter(Random(), Dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));
            Document doc_ = new Document();
            doc_.Add(NewTextField("field2", "xxx", Field.Store.NO));
            for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++)
            {
                riw.AddDocument(doc_);
            }
            doc_ = new Document();
            doc_.Add(NewTextField("field2", "big bad bug", Field.Store.NO));
            for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++)
            {
                riw.AddDocument(doc_);
            }
            Reader = riw.Reader;
            BigSearcher = NewSearcher(Reader);
            riw.Dispose();
        }
Ejemplo n.º 21
0
 public virtual void Test()
 {
     Directory dir = NewDirectory();
     RandomIndexWriter riw = new RandomIndexWriter(Random(), dir);
     Document doc = new Document();
     doc.Add(new TextField("eng", new BugReproTokenStream()));
     riw.AddDocument(doc);
     riw.Dispose();
     dir.Dispose();
 }
Ejemplo n.º 22
0
        public virtual void TestBasic()
        {
            Directory dir = NewDirectory();

            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc);
            Document doc = new Document();

            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.IndexOptionsValue = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            if (Random().NextBoolean())
            {
                ft.StoreTermVectors = true;
                ft.StoreTermVectorPositions = Random().NextBoolean();
                ft.StoreTermVectorOffsets = Random().NextBoolean();
            }
            Token[] tokens = new Token[] { MakeToken("a", 1, 0, 6), MakeToken("b", 1, 8, 9), MakeToken("a", 1, 9, 17), MakeToken("c", 1, 19, 50) };
            doc.Add(new Field("content", new CannedTokenStream(tokens), ft));

            w.AddDocument(doc);
            IndexReader r = w.Reader;
            w.Dispose();

            DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(r, null, "content", new BytesRef("a"));
            Assert.IsNotNull(dp);
            Assert.AreEqual(0, dp.NextDoc());
            Assert.AreEqual(2, dp.Freq());
            Assert.AreEqual(0, dp.NextPosition());
            Assert.AreEqual(0, dp.StartOffset());
            Assert.AreEqual(6, dp.EndOffset());
            Assert.AreEqual(2, dp.NextPosition());
            Assert.AreEqual(9, dp.StartOffset());
            Assert.AreEqual(17, dp.EndOffset());
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc());

            dp = MultiFields.GetTermPositionsEnum(r, null, "content", new BytesRef("b"));
            Assert.IsNotNull(dp);
            Assert.AreEqual(0, dp.NextDoc());
            Assert.AreEqual(1, dp.Freq());
            Assert.AreEqual(1, dp.NextPosition());
            Assert.AreEqual(8, dp.StartOffset());
            Assert.AreEqual(9, dp.EndOffset());
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc());

            dp = MultiFields.GetTermPositionsEnum(r, null, "content", new BytesRef("c"));
            Assert.IsNotNull(dp);
            Assert.AreEqual(0, dp.NextDoc());
            Assert.AreEqual(1, dp.Freq());
            Assert.AreEqual(3, dp.NextPosition());
            Assert.AreEqual(19, dp.StartOffset());
            Assert.AreEqual(50, dp.EndOffset());
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc());

            r.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 23
0
        public virtual void TestBefore()
        {
            // create an index
            Directory indexStore = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), indexStore);

            long now = DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond;

            Document doc = new Document();
            // add time that is in the past
            doc.Add(NewStringField("datefield", DateTools.TimeToString(now - 1000, DateTools.Resolution.MILLISECOND), Field.Store.YES));
            doc.Add(NewTextField("body", "Today is a very sunny day in New York City", Field.Store.YES));
            writer.AddDocument(doc);

            IndexReader reader = writer.Reader;
            writer.Dispose();
            IndexSearcher searcher = NewSearcher(reader);

            // filter that should preserve matches
            // DateFilter df1 = DateFilter.Before("datefield", now);
            TermRangeFilter df1 = TermRangeFilter.NewStringRange("datefield", DateTools.TimeToString(now - 2000, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now, DateTools.Resolution.MILLISECOND), false, true);
            // filter that should discard matches
            // DateFilter df2 = DateFilter.Before("datefield", now - 999999);
            TermRangeFilter df2 = TermRangeFilter.NewStringRange("datefield", DateTools.TimeToString(0, DateTools.Resolution.MILLISECOND), DateTools.TimeToString(now - 2000, DateTools.Resolution.MILLISECOND), true, false);

            // search something that doesn't exist with DateFilter
            Query query1 = new TermQuery(new Term("body", "NoMatchForthis"));

            // search for something that does exists
            Query query2 = new TermQuery(new Term("body", "sunny"));

            ScoreDoc[] result;

            // ensure that queries return expected results without DateFilter first
            result = searcher.Search(query1, null, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);

            result = searcher.Search(query2, null, 1000).ScoreDocs;
            Assert.AreEqual(1, result.Length);

            // run queries with DateFilter
            result = searcher.Search(query1, df1, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);

            result = searcher.Search(query1, df2, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);

            result = searcher.Search(query2, df1, 1000).ScoreDocs;
            Assert.AreEqual(1, result.Length);

            result = searcher.Search(query2, df2, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);
            reader.Dispose();
            indexStore.Dispose();
        }
Ejemplo n.º 24
0
 public override void SetUp()
 {
     base.SetUp();
     Directory = NewDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone);
     Document doc = new Document();
     doc.Add(NewTextField(FN, "the quick brown fox jumps over the lazy ??? dog 493432 49344", Field.Store.NO));
     writer.AddDocument(doc);
     Reader = writer.Reader;
     writer.Dispose();
     Searcher = NewSearcher(Reader);
 }
Ejemplo n.º 25
0
        public override void SetUp()
        {
            base.SetUp();
            directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false), Similarity, TimeZone);

            AddDoc(writer, @"admin guest", @"010", @"20040101", @"Y");
            AddDoc(writer, @"guest", @"020", @"20040101", @"Y");
            AddDoc(writer, @"guest", @"020", @"20050101", @"Y");
            AddDoc(writer, @"admin", @"020", @"20050101", @"Maybe");
            AddDoc(writer, @"admin guest", @"030", @"20050101", @"N");
            reader = SlowCompositeReaderWrapper.Wrap(writer.Reader);
            writer.Dispose();
        }
Ejemplo n.º 26
0
        public override void SetUp()
        {
            base.SetUp();
            directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);

            // Add series of docs with specific information for MoreLikeThis
            AddDoc(writer, "lucene");
            AddDoc(writer, "lucene release");

            reader = writer.Reader;
            writer.Dispose();
            searcher = NewSearcher(reader);
        }
Ejemplo n.º 27
0
 public override void SetUp()
 {
     base.SetUp();
     // create test index
     MDirectory = NewDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(Random(), MDirectory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)).SetMergePolicy(NewLogMergePolicy()).SetSimilarity(new DefaultSimilarity()));
     AddDocument(writer, "1", "I think it should work.");
     AddDocument(writer, "2", "I think it should work.");
     AddDocument(writer, "3", "I think it should work.");
     AddDocument(writer, "4", "I think it should work.");
     Reader = writer.Reader;
     writer.Dispose();
     Searcher = NewSearcher(Reader);
     Searcher.Similarity = new DefaultSimilarity();
 }
Ejemplo n.º 28
0
 public override void SetUp()
 {
     base.SetUp();
     Dir = NewDirectory();
     RandomIndexWriter iw = new RandomIndexWriter(Random(), Dir, Similarity, TimeZone);
     for (int i = 0; i < 100; i++)
     {
         Document doc = new Document();
         doc.Add(NewStringField("field", Convert.ToString(i), Field.Store.NO));
         doc.Add(NewStringField("field2", Convert.ToString(i % 2 == 0), Field.Store.NO));
         iw.AddDocument(doc);
     }
     Reader = iw.Reader;
     iw.Dispose();
 }
Ejemplo n.º 29
0
 public override void SetUp()
 {
     base.SetUp();
     Directory = NewDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
     for (int i = 0; i < DocFields.Length; i++)
     {
         Document doc = new Document();
         doc.Add(NewTextField(field, DocFields[i], Field.Store.YES));
         writer.AddDocument(doc);
     }
     Reader = writer.Reader;
     writer.Dispose();
     Searcher = NewSearcher(Reader);
 }
Ejemplo n.º 30
0
 public override void SetUp()
 {
     base.SetUp();
     dir = NewDirectory();
     IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
     iwConfig.SetMergePolicy(NewLogMergePolicy());
     RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwConfig);
     Document document = new Document();
     Field idField = new StringField("id", "", Field.Store.NO);
     document.Add(idField);
     iw.AddDocument(document);
     ir = iw.Reader;
     @is = NewSearcher(ir);
     iw.Dispose();
 }
Ejemplo n.º 31
0
        public override void BeforeClass()
        {
            base.BeforeClass();

            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));

            for (int i = 0; i < DocFields.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField(field, DocFields[i], Field.Store.NO));
                writer.AddDocument(doc);
            }
            writer.Dispose();
            LittleReader = DirectoryReader.Open(Directory);
            Searcher     = NewSearcher(LittleReader);
            // this is intentionally using the baseline sim, because it compares against bigSearcher (which uses a random one)
            Searcher.Similarity = new DefaultSimilarity();

            // Make big index
            Dir2 = new MockDirectoryWrapper(Random(), new RAMDirectory(Directory, IOContext.DEFAULT));

            // First multiply small test index:
            MulFactor = 1;
            int docCount = 0;

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: now copy index...");
            }
            do
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: cycle...");
                }
                Directory         copy = new MockDirectoryWrapper(Random(), new RAMDirectory(Dir2, IOContext.DEFAULT));
                RandomIndexWriter w    = new RandomIndexWriter(Random(), Dir2, Similarity, TimeZone);
                w.AddIndexes(copy);
                docCount = w.MaxDoc;
                w.Dispose();
                MulFactor *= 2;
            } while (docCount < 3000);

            RandomIndexWriter riw  = new RandomIndexWriter(Random(), Dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));
            Document          doc_ = new Document();

            doc_.Add(NewTextField("field2", "xxx", Field.Store.NO));
            for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++)
            {
                riw.AddDocument(doc_);
            }
            doc_ = new Document();
            doc_.Add(NewTextField("field2", "big bad bug", Field.Store.NO));
            for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++)
            {
                riw.AddDocument(doc_);
            }
            Reader      = riw.Reader;
            BigSearcher = NewSearcher(Reader);
            riw.Dispose();
        }
Ejemplo n.º 32
0
        public void TestSimpleWithScoring()
        {
            const string idField = "id";
            const string toField = "movieId";

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir,
                                                          NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                                                          .SetMergePolicy(NewLogMergePolicy()));

            // 0
            Document doc = new Document();

            doc.Add(new TextField("description", "A random movie", Field.Store.NO));
            doc.Add(new TextField("name", "Movie 1", Field.Store.NO));
            doc.Add(new TextField(idField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            doc.Add(new TextField("subtitle", "The first subtitle of this movie", Field.Store.NO));
            doc.Add(new TextField(idField, "2", Field.Store.NO));
            doc.Add(new TextField(toField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            doc.Add(new TextField("subtitle", "random subtitle; random event movie", Field.Store.NO));
            doc.Add(new TextField(idField, "3", Field.Store.NO));
            doc.Add(new TextField(toField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 3
            doc = new Document();
            doc.Add(new TextField("description", "A second random movie", Field.Store.NO));
            doc.Add(new TextField("name", "Movie 2", Field.Store.NO));
            doc.Add(new TextField(idField, "4", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit();

            // 4
            doc = new Document();
            doc.Add(new TextField("subtitle", "a very random event happened during christmas night", Field.Store.NO));
            doc.Add(new TextField(idField, "5", Field.Store.NO));
            doc.Add(new TextField(toField, "4", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            doc.Add(new TextField("subtitle", "movie end movie test 123 test 123 random", Field.Store.NO));
            doc.Add(new TextField(idField, "6", Field.Store.NO));
            doc.Add(new TextField(toField, "4", Field.Store.NO));
            w.AddDocument(doc);

            IndexSearcher indexSearcher = new IndexSearcher(w.GetReader());

            w.Dispose();

            // Search for movie via subtitle
            Query joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField,
                                                       new TermQuery(new Term("subtitle", "random")), indexSearcher, ScoreMode.Max);
            TopDocs result = indexSearcher.Search(joinQuery, 10);

            assertEquals(2, result.TotalHits);
            assertEquals(0, result.ScoreDocs[0].Doc);
            assertEquals(3, result.ScoreDocs[1].Doc);

            // Score mode max.
            joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
                                                 indexSearcher, ScoreMode.Max);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(2, result.TotalHits);
            assertEquals(3, result.ScoreDocs[0].Doc);
            assertEquals(0, result.ScoreDocs[1].Doc);

            // Score mode total
            joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
                                                 indexSearcher, ScoreMode.Total);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(2, result.TotalHits);
            assertEquals(0, result.ScoreDocs[0].Doc);
            assertEquals(3, result.ScoreDocs[1].Doc);

            //Score mode avg
            joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
                                                 indexSearcher, ScoreMode.Avg);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(2, result.TotalHits);
            assertEquals(3, result.ScoreDocs[0].Doc);
            assertEquals(0, result.ScoreDocs[1].Doc);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 33
0
        public virtual void TestSparseFacets()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            FacetsConfig      config = new FacetsConfig();

            Document doc = new Document();

            doc.Add(new IntField("num", 10, Field.Store.NO));
            doc.Add(new FacetField("a", "foo1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            if (Random().NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new IntField("num", 20, Field.Store.NO));
            doc.Add(new FacetField("a", "foo2"));
            doc.Add(new FacetField("b", "bar1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            if (Random().NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new IntField("num", 30, Field.Store.NO));
            doc.Add(new FacetField("a", "foo3"));
            doc.Add(new FacetField("b", "bar2"));
            doc.Add(new FacetField("c", "baz1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            writer.Dispose();

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            taxoWriter.Dispose();

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new IntFieldSource("num"));

            // Ask for top 10 labels for any dims that have counts:
            IList <FacetResult> results = facets.GetAllDims(10);

            Assert.AreEqual(3, results.Count);
            Assert.AreEqual("dim=a path=[] value=60.0 childCount=3\n  foo3 (30.0)\n  foo2 (20.0)\n  foo1 (10.0)\n", results[0].ToString());
            Assert.AreEqual("dim=b path=[] value=50.0 childCount=2\n  bar2 (30.0)\n  bar1 (20.0)\n", results[1].ToString());
            Assert.AreEqual("dim=c path=[] value=30.0 childCount=1\n  baz1 (30.0)\n", results[2].ToString());

            IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir);
        }
Ejemplo n.º 34
0
        public virtual void TestBasic()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            FacetsConfig      config = new FacetsConfig();

            // Reused across documents, to add the necessary facet
            // fields:
            Document doc = new Document();

            doc.Add(new IntField("num", 10, Field.Store.NO));
            doc.Add(new FacetField("Author", "Bob"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new IntField("num", 20, Field.Store.NO));
            doc.Add(new FacetField("Author", "Lisa"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new IntField("num", 30, Field.Store.NO));
            doc.Add(new FacetField("Author", "Lisa"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new IntField("num", 40, Field.Store.NO));
            doc.Add(new FacetField("Author", "Susan"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new IntField("num", 45, Field.Store.NO));
            doc.Add(new FacetField("Author", "Frank"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            writer.Dispose();

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            taxoWriter.Dispose();

            // Aggregate the facet counts:
            FacetsCollector c = new FacetsCollector();

            // MatchAllDocsQuery is for "browsing" (counts facets
            // for all non-deleted docs in the index); normally
            // you'd use a "normal" query and one of the
            // Facets.search utility methods:
            searcher.Search(new MatchAllDocsQuery(), c);

            TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new IntFieldSource("num"));

            // Retrieve & verify results:
            Assert.AreEqual("dim=Author path=[] value=145.0 childCount=4\n  Lisa (50.0)\n  Frank (45.0)\n  Susan (40.0)\n  Bob (10.0)\n", facets.GetTopChildren(10, "Author").ToString());

            taxoReader.Dispose();
            searcher.IndexReader.Dispose();
            dir.Dispose();
            taxoDir.Dispose();
        }
Ejemplo n.º 35
0
        public virtual void TestSimple()
        {
            Random random = Random();

            DocValuesType[] dvTypes = new DocValuesType[] {
                DocValuesType.NUMERIC,
                DocValuesType.BINARY,
                DocValuesType.SORTED,
            };
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
                random,
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy()));
            bool          canUseDV = !"Lucene3x".Equals(w.w.Config.Codec.Name, StringComparison.Ordinal);
            DocValuesType dvType   = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : DocValuesType.NONE;

            Document doc = new Document();

            AddField(doc, groupField, "1", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "random text", Field.Store.NO));
            doc.Add(new StringField("id", "1", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            AddField(doc, groupField, "1", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "some more random text blob", Field.Store.NO));
            doc.Add(new StringField("id", "2", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            AddField(doc, groupField, "1", dvType);
            AddField(doc, countField, "2", dvType);
            doc.Add(new TextField("content", "some more random textual data", Field.Store.NO));
            doc.Add(new StringField("id", "3", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit(); // To ensure a second segment

            // 3
            doc = new Document();
            AddField(doc, groupField, "2", dvType);
            doc.Add(new TextField("content", "some random text", Field.Store.NO));
            doc.Add(new StringField("id", "4", Field.Store.NO));
            w.AddDocument(doc);

            // 4
            doc = new Document();
            AddField(doc, groupField, "3", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "some more random text", Field.Store.NO));
            doc.Add(new StringField("id", "5", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            AddField(doc, groupField, "3", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "random blob", Field.Store.NO));
            doc.Add(new StringField("id", "6", Field.Store.NO));
            w.AddDocument(doc);

            // 6 -- no author field
            doc = new Document();
            doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
            AddField(doc, countField, "1", dvType);
            doc.Add(new StringField("id", "6", Field.Store.NO));
            w.AddDocument(doc);

            IndexSearcher indexSearcher = NewSearcher(w.Reader);

            w.Dispose();

            var cmp = new ComparerAnonymousHelper1(this);

            // === Search for content:random
            IAbstractFirstPassGroupingCollector <IComparable> firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10);

            indexSearcher.Search(new TermQuery(new Term("content", "random")), firstCollector);
            IAbstractDistinctValuesCollector <AbstractDistinctValuesCollector.IGroupCount <IComparable> > distinctValuesCollector
                = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType);

            indexSearcher.Search(new TermQuery(new Term("content", "random")), distinctValuesCollector);

            //var gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>;
            // LUCENENET TODO: Try to work out how to do this without an O(n) operation
            var gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups);

            gcs.Sort(cmp);
            assertEquals(4, gcs.Count);

            CompareNull(gcs[0].GroupValue);
            List <IComparable> countValues = new List <IComparable>(gcs[0].UniqueValues);

            assertEquals(1, countValues.size());
            Compare("1", countValues[0]);

            Compare("1", gcs[1].GroupValue);
            countValues = new List <IComparable>(gcs[1].UniqueValues);
            countValues.Sort(nullComparer);
            assertEquals(2, countValues.size());
            Compare("1", countValues[0]);
            Compare("2", countValues[1]);

            Compare("2", gcs[2].GroupValue);
            countValues = new List <IComparable>(gcs[2].UniqueValues);
            assertEquals(1, countValues.size());
            CompareNull(countValues[0]);

            Compare("3", gcs[3].GroupValue);
            countValues = new List <IComparable>(gcs[3].UniqueValues);
            assertEquals(1, countValues.size());
            Compare("1", countValues[0]);

            // === Search for content:some
            firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
            indexSearcher.Search(new TermQuery(new Term("content", "some")), firstCollector);
            distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType);
            indexSearcher.Search(new TermQuery(new Term("content", "some")), distinctValuesCollector);

            // LUCENENET TODO: Try to work out how to do this without an O(n) operation
            //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>;
            gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups);
            gcs.Sort(cmp);
            assertEquals(3, gcs.Count);

            Compare("1", gcs[0].GroupValue);
            countValues = new List <IComparable>(gcs[0].UniqueValues);
            assertEquals(2, countValues.size());
            countValues.Sort(nullComparer);
            Compare("1", countValues[0]);
            Compare("2", countValues[1]);

            Compare("2", gcs[1].GroupValue);
            countValues = new List <IComparable>(gcs[1].UniqueValues);
            assertEquals(1, countValues.size());
            CompareNull(countValues[0]);

            Compare("3", gcs[2].GroupValue);
            countValues = new List <IComparable>(gcs[2].UniqueValues);
            assertEquals(1, countValues.size());
            Compare("1", countValues[0]);

            // === Search for content:blob
            firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
            indexSearcher.Search(new TermQuery(new Term("content", "blob")), firstCollector);
            distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType);
            indexSearcher.Search(new TermQuery(new Term("content", "blob")), distinctValuesCollector);

            // LUCENENET TODO: Try to work out how to do this without an O(n) operation
            //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>;
            gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups);
            gcs.Sort(cmp);
            assertEquals(2, gcs.Count);

            Compare("1", gcs[0].GroupValue);
            countValues = new List <IComparable>(gcs[0].UniqueValues);
            // B/c the only one document matched with blob inside the author 1 group
            assertEquals(1, countValues.Count);
            Compare("1", countValues[0]);

            Compare("3", gcs[1].GroupValue);
            countValues = new List <IComparable>(gcs[1].UniqueValues);
            assertEquals(1, countValues.Count);
            Compare("1", countValues[0]);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 36
0
        public override void SetUp()
        {
            base.SetUp();

            AllSortFields = new List <SortField>(Arrays.AsList(new SortField[] { new SortField("byte", SortField.Type_e.BYTE, false), new SortField("short", SortField.Type_e.SHORT, false), new SortField("int", SortField.Type_e.INT, false), new SortField("long", SortField.Type_e.LONG, false), new SortField("float", SortField.Type_e.FLOAT, false), new SortField("double", SortField.Type_e.DOUBLE, false), new SortField("bytes", SortField.Type_e.STRING, false), new SortField("bytesval", SortField.Type_e.STRING_VAL, false), new SortField("byte", SortField.Type_e.BYTE, true), new SortField("short", SortField.Type_e.SHORT, true), new SortField("int", SortField.Type_e.INT, true), new SortField("long", SortField.Type_e.LONG, true), new SortField("float", SortField.Type_e.FLOAT, true), new SortField("double", SortField.Type_e.DOUBLE, true), new SortField("bytes", SortField.Type_e.STRING, true), new SortField("bytesval", SortField.Type_e.STRING_VAL, true), SortField.FIELD_SCORE, SortField.FIELD_DOC }));

            if (SupportsDocValues)
            {
                AllSortFields.AddRange(Arrays.AsList(new SortField[] { new SortField("intdocvalues", SortField.Type_e.INT, false), new SortField("floatdocvalues", SortField.Type_e.FLOAT, false), new SortField("sortedbytesdocvalues", SortField.Type_e.STRING, false), new SortField("sortedbytesdocvaluesval", SortField.Type_e.STRING_VAL, false), new SortField("straightbytesdocvalues", SortField.Type_e.STRING_VAL, false), new SortField("intdocvalues", SortField.Type_e.INT, true), new SortField("floatdocvalues", SortField.Type_e.FLOAT, true), new SortField("sortedbytesdocvalues", SortField.Type_e.STRING, true), new SortField("sortedbytesdocvaluesval", SortField.Type_e.STRING_VAL, true), new SortField("straightbytesdocvalues", SortField.Type_e.STRING_VAL, true) }));
            }

            // Also test missing first / last for the "string" sorts:
            foreach (string field in new string[] { "bytes", "sortedbytesdocvalues" })
            {
                for (int rev = 0; rev < 2; rev++)
                {
                    bool      reversed = rev == 0;
                    SortField sf       = new SortField(field, SortField.Type_e.STRING, reversed);
                    sf.MissingValue = SortField.STRING_FIRST;
                    AllSortFields.Add(sf);

                    sf = new SortField(field, SortField.Type_e.STRING, reversed);
                    sf.MissingValue = SortField.STRING_LAST;
                    AllSortFields.Add(sf);
                }
            }

            int limit = AllSortFields.Count;

            for (int i = 0; i < limit; i++)
            {
                SortField sf = AllSortFields[i];
                if (sf.Type == SortField.Type_e.INT)
                {
                    SortField sf2 = new SortField(sf.Field, SortField.Type_e.INT, sf.Reverse);
                    sf2.MissingValue = Random().Next();
                    AllSortFields.Add(sf2);
                }
                else if (sf.Type == SortField.Type_e.LONG)
                {
                    SortField sf2 = new SortField(sf.Field, SortField.Type_e.LONG, sf.Reverse);
                    sf2.MissingValue = Random().NextLong();
                    AllSortFields.Add(sf2);
                }
                else if (sf.Type == SortField.Type_e.FLOAT)
                {
                    SortField sf2 = new SortField(sf.Field, SortField.Type_e.FLOAT, sf.Reverse);
                    sf2.MissingValue = (float)Random().NextDouble();
                    AllSortFields.Add(sf2);
                }
                else if (sf.Type == SortField.Type_e.DOUBLE)
                {
                    SortField sf2 = new SortField(sf.Field, SortField.Type_e.DOUBLE, sf.Reverse);
                    sf2.MissingValue = Random().NextDouble();
                    AllSortFields.Add(sf2);
                }
            }

            Dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), Dir);
            int numDocs          = AtLeast(200);

            for (int i = 0; i < numDocs; i++)
            {
                IList <Field> fields = new List <Field>();
                fields.Add(NewTextField("english", English.IntToEnglish(i), Field.Store.NO));
                fields.Add(NewTextField("oddeven", (i % 2 == 0) ? "even" : "odd", Field.Store.NO));
                fields.Add(NewStringField("byte", "" + ((sbyte)Random().Next()), Field.Store.NO));
                fields.Add(NewStringField("short", "" + ((short)Random().Next()), Field.Store.NO));
                fields.Add(new IntField("int", Random().Next(), Field.Store.NO));
                fields.Add(new LongField("long", Random().NextLong(), Field.Store.NO));

                fields.Add(new FloatField("float", (float)Random().NextDouble(), Field.Store.NO));
                fields.Add(new DoubleField("double", Random().NextDouble(), Field.Store.NO));
                fields.Add(NewStringField("bytes", TestUtil.RandomRealisticUnicodeString(Random()), Field.Store.NO));
                fields.Add(NewStringField("bytesval", TestUtil.RandomRealisticUnicodeString(Random()), Field.Store.NO));
                fields.Add(new DoubleField("double", Random().NextDouble(), Field.Store.NO));

                if (SupportsDocValues)
                {
                    fields.Add(new NumericDocValuesField("intdocvalues", Random().Next()));
                    fields.Add(new FloatDocValuesField("floatdocvalues", (float)Random().NextDouble()));
                    fields.Add(new SortedDocValuesField("sortedbytesdocvalues", new BytesRef(TestUtil.RandomRealisticUnicodeString(Random()))));
                    fields.Add(new SortedDocValuesField("sortedbytesdocvaluesval", new BytesRef(TestUtil.RandomRealisticUnicodeString(Random()))));
                    fields.Add(new BinaryDocValuesField("straightbytesdocvalues", new BytesRef(TestUtil.RandomRealisticUnicodeString(Random()))));
                }
                Document document = new Document();
                document.Add(new StoredField("id", "" + i));
                if (VERBOSE)
                {
                    Console.WriteLine("  add doc id=" + i);
                }
                foreach (Field field in fields)
                {
                    // So we are sometimes missing that field:
                    if (Random().Next(5) != 4)
                    {
                        document.Add(field);
                        if (VERBOSE)
                        {
                            Console.WriteLine("    " + field);
                        }
                    }
                }

                iw.AddDocument(document);

                if (Random().Next(50) == 17)
                {
                    iw.Commit();
                }
            }
            Reader = iw.Reader;
            iw.Dispose();
            Searcher = NewSearcher(Reader);
            if (VERBOSE)
            {
                Console.WriteLine("  searcher=" + Searcher);
            }
        }
        public virtual void TestPostings()
        {
            Directory         dir = NewFSDirectory(CreateTempDir("postings"));
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetCodec(Codec.ForName("Lucene40"));
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            Document doc = new Document();

            // id field
            FieldType idType = new FieldType(StringField.TYPE_NOT_STORED);

            idType.StoreTermVectors = true;
            Field idField = new Field("id", "", idType);

            doc.Add(idField);

            // title field: short text field
            FieldType titleType = new FieldType(TextField.TYPE_NOT_STORED);

            titleType.StoreTermVectors         = true;
            titleType.StoreTermVectorPositions = true;
            titleType.StoreTermVectorOffsets   = true;
            titleType.IndexOptions             = IndexOptions();
            Field titleField = new Field("title", "", titleType);

            doc.Add(titleField);

            // body field: long text field
            FieldType bodyType = new FieldType(TextField.TYPE_NOT_STORED);

            bodyType.StoreTermVectors         = true;
            bodyType.StoreTermVectorPositions = true;
            bodyType.StoreTermVectorOffsets   = true;
            bodyType.IndexOptions             = IndexOptions();
            Field bodyField = new Field("body", "", bodyType);

            doc.Add(bodyField);

            int numDocs = AtLeast(1000);

            for (int i = 0; i < numDocs; i++)
            {
                idField.SetStringValue(Convert.ToString(i));
                titleField.SetStringValue(FieldValue(1));
                bodyField.SetStringValue(FieldValue(3));
                iw.AddDocument(doc);
                if (Random().Next(20) == 0)
                {
                    iw.DeleteDocuments(new Term("id", Convert.ToString(i)));
                }
            }
            if (Random().NextBoolean())
            {
                // delete 1-100% of docs
                iw.DeleteDocuments(new Term("title", Terms[Random().Next(Terms.Length)]));
            }
            iw.Dispose();
            dir.Dispose(); // checkindex
        }
Ejemplo n.º 38
0
        public virtual void TestDeMorgan()
        {
            Directory         dir1 = NewDirectory();
            RandomIndexWriter iw1  = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir1);
            Document doc1 = new Document();

            doc1.Add(NewTextField("field", "foo bar", Field.Store.NO));
            iw1.AddDocument(doc1);
            IndexReader reader1 = iw1.GetReader();

            iw1.Dispose();

            Directory         dir2 = NewDirectory();
            RandomIndexWriter iw2  = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir2);
            Document doc2 = new Document();

            doc2.Add(NewTextField("field", "foo baz", Field.Store.NO));
            iw2.AddDocument(doc2);
            IndexReader reader2 = iw2.GetReader();

            iw2.Dispose();

            BooleanQuery query = new BooleanQuery(); // Query: +foo -ba*

            query.Add(new TermQuery(new Term("field", "foo")), Occur.MUST);
            WildcardQuery wildcardQuery = new WildcardQuery(new Term("field", "ba*"));

            wildcardQuery.MultiTermRewriteMethod = (MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
            query.Add(wildcardQuery, Occur.MUST_NOT);

            MultiReader   multireader = new MultiReader(reader1, reader2);
            IndexSearcher searcher    = NewSearcher(multireader);

            Assert.AreEqual(0, searcher.Search(query, 10).TotalHits);


            Task foo = new Task(TestDeMorgan);

            TaskScheduler es = TaskScheduler.Default;

            searcher = new IndexSearcher(multireader, es);
            if (VERBOSE)
            {
                Console.WriteLine("rewritten form: " + searcher.Rewrite(query));
            }
            Assert.AreEqual(0, searcher.Search(query, 10).TotalHits);

            multireader.Dispose();
            reader1.Dispose();
            reader2.Dispose();
            dir1.Dispose();
            dir2.Dispose();
        }
Ejemplo n.º 39
0
        public virtual void TestBS2DisjunctionNextVsAdvance()
        {
            Directory         d = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, d);
            int numDocs = AtLeast(300);

            for (int docUpto = 0; docUpto < numDocs; docUpto++)
            {
                string contents = "a";
                if (Random.Next(20) <= 16)
                {
                    contents += " b";
                }
                if (Random.Next(20) <= 8)
                {
                    contents += " c";
                }
                if (Random.Next(20) <= 4)
                {
                    contents += " d";
                }
                if (Random.Next(20) <= 2)
                {
                    contents += " e";
                }
                if (Random.Next(20) <= 1)
                {
                    contents += " f";
                }
                Document doc = new Document();
                doc.Add(new TextField("field", contents, Field.Store.NO));
                w.AddDocument(doc);
            }
            w.ForceMerge(1);
            IndexReader   r = w.GetReader();
            IndexSearcher s = NewSearcher(r);

            w.Dispose();

            for (int iter = 0; iter < 10 * RANDOM_MULTIPLIER; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("iter=" + iter);
                }
                IList <string> terms    = new List <string>(Arrays.AsList("a", "b", "c", "d", "e", "f"));
                int            numTerms = TestUtil.NextInt32(Random, 1, terms.Count);
                while (terms.Count > numTerms)
                {
                    terms.RemoveAt(Random.Next(terms.Count));
                }

                if (VERBOSE)
                {
                    Console.WriteLine("  terms=" + terms);
                }

                BooleanQuery q = new BooleanQuery();
                foreach (string term in terms)
                {
                    q.Add(new BooleanClause(new TermQuery(new Term("field", term)), Occur.SHOULD));
                }

                Weight weight = s.CreateNormalizedWeight(q);

                Scorer scorer = weight.GetScorer(s.m_leafContexts[0], null);

                // First pass: just use .NextDoc() to gather all hits
                IList <ScoreDoc> hits = new List <ScoreDoc>();
                while (scorer.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                {
                    hits.Add(new ScoreDoc(scorer.DocID, scorer.GetScore()));
                }

                if (VERBOSE)
                {
                    Console.WriteLine("  " + hits.Count + " hits");
                }

                // Now, randomly next/advance through the list and
                // verify exact match:
                for (int iter2 = 0; iter2 < 10; iter2++)
                {
                    weight = s.CreateNormalizedWeight(q);
                    scorer = weight.GetScorer(s.m_leafContexts[0], null);

                    if (VERBOSE)
                    {
                        Console.WriteLine("  iter2=" + iter2);
                    }

                    int upto = -1;
                    while (upto < hits.Count)
                    {
                        int nextUpto;
                        int nextDoc;
                        int left = hits.Count - upto;
                        if (left == 1 || Random.nextBoolean())
                        {
                            // next
                            nextUpto = 1 + upto;
                            nextDoc  = scorer.NextDoc();
                        }
                        else
                        {
                            // advance
                            int inc = TestUtil.NextInt32(Random, 1, left - 1);
                            nextUpto = inc + upto;
                            nextDoc  = scorer.Advance(hits[nextUpto].Doc);
                        }

                        if (nextUpto == hits.Count)
                        {
                            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, nextDoc);
                        }
                        else
                        {
                            ScoreDoc hit = hits[nextUpto];
                            Assert.AreEqual(hit.Doc, nextDoc);
                            // Test for precise float equality:
                            Assert.IsTrue(hit.Score == scorer.GetScore(), "doc " + hit.Doc + " has wrong score: expected=" + hit.Score + " actual=" + scorer.GetScore());
                        }
                        upto = nextUpto;
                    }
                }
            }

            r.Dispose();
            d.Dispose();
        }
Ejemplo n.º 40
0
        public virtual void TestNullOrSubScorer()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            Document doc = new Document();

            doc.Add(NewTextField("field", "a b c d", Field.Store.NO));
            w.AddDocument(doc);

            IndexReader   r = w.GetReader();
            IndexSearcher s = NewSearcher(r);

            // this test relies upon coord being the default implementation,
            // otherwise scores are different!
            s.Similarity = new DefaultSimilarity();

            BooleanQuery q = new BooleanQuery();

            q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD);

            // LUCENE-2617: make sure that a term not in the index still contributes to the score via coord factor
            float score    = s.Search(q, 10).MaxScore;
            Query subQuery = new TermQuery(new Term("field", "not_in_index"));

            subQuery.Boost = 0;
            q.Add(subQuery, Occur.SHOULD);
            float score2 = s.Search(q, 10).MaxScore;

            Assert.AreEqual(score * .5F, score2, 1e-6);

            // LUCENE-2617: make sure that a clause not in the index still contributes to the score via coord factor
            BooleanQuery qq     = (BooleanQuery)q.Clone();
            PhraseQuery  phrase = new PhraseQuery();

            phrase.Add(new Term("field", "not_in_index"));
            phrase.Add(new Term("field", "another_not_in_index"));
            phrase.Boost = 0;
            qq.Add(phrase, Occur.SHOULD);
            score2 = s.Search(qq, 10).MaxScore;
            Assert.AreEqual(score * (1 / 3F), score2, 1e-6);

            // now test BooleanScorer2
            subQuery       = new TermQuery(new Term("field", "b"));
            subQuery.Boost = 0;
            q.Add(subQuery, Occur.MUST);
            score2 = s.Search(q, 10).MaxScore;
            Assert.AreEqual(score * (2 / 3F), score2, 1e-6);

            // PhraseQuery w/ no terms added returns a null scorer
            PhraseQuery pq = new PhraseQuery();

            q.Add(pq, Occur.SHOULD);
            Assert.AreEqual(1, s.Search(q, 10).TotalHits);

            // A required clause which returns null scorer should return null scorer to
            // IndexSearcher.
            q  = new BooleanQuery();
            pq = new PhraseQuery();
            q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD);
            q.Add(pq, Occur.MUST);
            Assert.AreEqual(0, s.Search(q, 10).TotalHits);

            DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f);

            dmq.Add(new TermQuery(new Term("field", "a")));
            dmq.Add(pq);
            Assert.AreEqual(1, s.Search(dmq, 10).TotalHits);

            r.Dispose();
            w.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 41
0
        public void TestFuzziness()
        {
            //every test with SlowFuzzyQuery.defaultMinSimilarity
            //is exercising the Automaton, not the brute force linear method

            Directory         directory = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(Random, directory, Similarity, TimeZone);

            addDoc("aaaaa", writer);
            addDoc("aaaab", writer);
            addDoc("aaabb", writer);
            addDoc("aabbb", writer);
            addDoc("abbbb", writer);
            addDoc("bbbbb", writer);
            addDoc("ddddd", writer);

            IndexReader   reader   = writer.GetReader();
            IndexSearcher searcher = NewSearcher(reader);

            writer.Dispose();

            SlowFuzzyQuery query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 0);

            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(3, hits.Length);

            // same with prefix
            query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 1);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(3, hits.Length);
            query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(3, hits.Length);
            query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 3);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(3, hits.Length);
            query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 4);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(2, hits.Length);
            query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 5);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 6);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);

            // test scoring
            query = new SlowFuzzyQuery(new Term("field", "bbbbb"), SlowFuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals("3 documents should match", 3, hits.Length);
            List <String> order = Arrays.AsList("bbbbb", "abbbb", "aabbb");

            for (int i = 0; i < hits.Length; i++)
            {
                string term = searcher.Doc(hits[i].Doc).Get("field");
                //System.out.println(hits[i].score);
                assertEquals(order[i], term);
            }

            // test pq size by supplying maxExpansions=2
            // This query would normally return 3 documents, because 3 terms match (see above):
            query = new SlowFuzzyQuery(new Term("field", "bbbbb"), SlowFuzzyQuery.defaultMinSimilarity, 0, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals("only 2 documents should match", 2, hits.Length);
            order = Arrays.AsList("bbbbb", "abbbb");
            for (int i = 0; i < hits.Length; i++)
            {
                string term = searcher.Doc(hits[i].Doc).Get("field");
                //System.out.println(hits[i].score);
                assertEquals(order[i], term);
            }

            // not similar enough:
            query = new SlowFuzzyQuery(new Term("field", "xxxxx"), SlowFuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);
            query = new SlowFuzzyQuery(new Term("field", "aaccc"), SlowFuzzyQuery.defaultMinSimilarity, 0);   // edit distance to "aaaaa" = 3
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);

            // query identical to a word in the index:
            query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(3, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            // default allows for up to two edits:
            assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));

            // query similar to a word in the index:
            query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(3, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));

            // now with prefix
            query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 1);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(3, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(3, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 3);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(3, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 4);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(2, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 5);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);


            query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));

            // now with prefix
            query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 1);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 3);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 4);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 5);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);


            // different field = no match:
            query = new SlowFuzzyQuery(new Term("anotherfield", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);

            reader.Dispose();
            directory.Dispose();
        }
Ejemplo n.º 42
0
        public void TestFuzzinessLong()
        {
            Directory         directory = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(Random, directory, Similarity, TimeZone);

            addDoc("aaaaaaa", writer);
            addDoc("segment", writer);

            IndexReader   reader   = writer.GetReader();
            IndexSearcher searcher = NewSearcher(reader);

            writer.Dispose();

            SlowFuzzyQuery query;

            // not similar enough:
            query = new SlowFuzzyQuery(new Term("field", "xxxxx"), 0.5f, 0);
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);
            // edit distance to "aaaaaaa" = 3, this matches because the string is longer than
            // in testDefaultFuzziness so a bigger difference is allowed:
            query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa"));

            // now with prefix
            query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 1);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa"));
            query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 4);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa"));
            query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 5);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);

            // no match, more than half of the characters is wrong:
            query = new SlowFuzzyQuery(new Term("field", "aaacccc"), 0.5f, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);

            // now with prefix
            query = new SlowFuzzyQuery(new Term("field", "aaacccc"), 0.5f, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);

            // "student" and "stellent" are indeed similar to "segment" by default:
            query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);

            // now with prefix
            query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 1);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 1);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(1, hits.Length);
            query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);
            query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);

            // "student" doesn't match anymore thanks to increased minimum similarity:
            query = new SlowFuzzyQuery(new Term("field", "student"), 0.6f, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            assertEquals(0, hits.Length);

            try
            {
                query = new SlowFuzzyQuery(new Term("field", "student"), 1.1f);
                fail("Expected IllegalArgumentException");
            }
#pragma warning disable 168
            catch (ArgumentException e)
#pragma warning restore 168
            {
                // expecting exception
            }
            try
            {
                query = new SlowFuzzyQuery(new Term("field", "student"), -0.1f);
                fail("Expected IllegalArgumentException");
            }
#pragma warning disable 168
            catch (ArgumentException e)
#pragma warning restore 168
            {
                // expecting exception
            }

            reader.Dispose();
            directory.Dispose();
        }
Ejemplo n.º 43
0
        public virtual void Test()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            long startTime = Environment.TickCount;

            // TODO: replace w/ the @nightly test data; make this
            // into an optional @nightly stress test
            Document doc  = new Document();
            Field    body = NewTextField("body", "", Field.Store.NO);

            doc.Add(body);
            StringBuilder sb = new StringBuilder();

            for (int docCount = 0; docCount < NUM_DOCS; docCount++)
            {
                int numTerms = Random.Next(10);
                for (int termCount = 0; termCount < numTerms; termCount++)
                {
                    sb.Append(Random.NextBoolean() ? "aaa" : "bbb");
                    sb.Append(' ');
                }
                body.SetStringValue(sb.ToString());
                w.AddDocument(doc);
                sb.Remove(0, sb.Length);
            }
            IndexReader r = w.GetReader();

            w.Dispose();

            long endTime = Environment.TickCount;

            if (Verbose)
            {
                Console.WriteLine("BUILD took " + (endTime - startTime));
            }

            IndexSearcher s = NewSearcher(r);

            AtomicBoolean failed    = new AtomicBoolean();
            AtomicInt64   netSearch = new AtomicInt64();

            ThreadJob[] threads = new ThreadJob[NUM_SEARCH_THREADS];
            for (int threadID = 0; threadID < NUM_SEARCH_THREADS; threadID++)
            {
                threads[threadID] = new ThreadAnonymousInnerClassHelper(this, s, failed, netSearch);
                threads[threadID].IsBackground = (true);
            }

            foreach (ThreadJob t in threads)
            {
                t.Start();
            }

            foreach (ThreadJob t in threads)
            {
                t.Join();
            }

            if (Verbose)
            {
                Console.WriteLine(NUM_SEARCH_THREADS + " threads did " + netSearch + " searches");
            }

            r.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 44
0
        public virtual void TestTransitionAPI()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir, Similarity, TimeZone);

            Documents.Document doc = new Documents.Document();
#pragma warning disable 612, 618
            doc.Add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED));
            doc.Add(new Field("tokenized_reader", new StringReader("abc xyz")));
            doc.Add(new Field("tokenized_tokenstream", w.IndexWriter.Analyzer.GetTokenStream("tokenized_tokenstream", new StringReader("abc xyz"))));
            doc.Add(new Field("binary", new byte[10]));
            doc.Add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
            doc.Add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
            doc.Add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS));
            doc.Add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
#pragma warning restore 612, 618
            w.AddDocument(doc);
            IndexReader r = w.GetReader();
            w.Dispose();

            doc = r.Document(0);
            // 4 stored fields
            Assert.AreEqual(4, doc.Fields.Count);
            Assert.AreEqual("abc", doc.Get("stored"));
            Assert.AreEqual("abc xyz", doc.Get("stored_indexed"));
            Assert.AreEqual("abc xyz", doc.Get("stored_tokenized"));
            BytesRef br = doc.GetBinaryValue("binary");
            Assert.IsNotNull(br);
            Assert.AreEqual(10, br.Length);

            IndexSearcher s = new IndexSearcher(r);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("indexed", "abc xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).TotalHits);

            foreach (string field in new string[] { "tv", "tv_pos", "tv_off", "tv_pos_off" })
            {
                Fields tvFields = r.GetTermVectors(0);
                Terms  tvs      = tvFields.GetTerms(field);
                Assert.IsNotNull(tvs);
                Assert.AreEqual(2, tvs.Count);
                TermsEnum tvsEnum = tvs.GetIterator(null);
                Assert.AreEqual(new BytesRef("abc"), tvsEnum.Next());
                DocsAndPositionsEnum dpEnum = tvsEnum.DocsAndPositions(null, null);
                if (field.Equals("tv", StringComparison.Ordinal))
                {
                    Assert.IsNull(dpEnum);
                }
                else
                {
                    Assert.IsNotNull(dpEnum);
                }
                Assert.AreEqual(new BytesRef("xyz"), tvsEnum.Next());
                Assert.IsNull(tvsEnum.Next());
            }

            r.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 45
0
        private IndexContext CreateIndexContext()
        {
            Random random = Random();

            DocValuesType[] dvTypes = new DocValuesType[] {
                DocValuesType.BINARY,
                DocValuesType.SORTED
            };

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
                random,
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy())
                );

            bool          canUseDV = !"Lucene3x".Equals(w.w.Config.Codec.Name, StringComparison.Ordinal);
            DocValuesType dvType   = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : DocValuesType.NONE;

            int numDocs = 86 + random.nextInt(1087) * RANDOM_MULTIPLIER;

            string[] groupValues = new string[numDocs / 5];
            string[] countValues = new string[numDocs / 10];
            for (int i = 0; i < groupValues.Length; i++)
            {
                groupValues[i] = GenerateRandomNonEmptyString();
            }
            for (int i = 0; i < countValues.Length; i++)
            {
                countValues[i] = GenerateRandomNonEmptyString();
            }

            List <string> contentStrings = new List <string>();
            IDictionary <string, IDictionary <string, ISet <string> > > searchTermToGroupCounts = new HashMap <string, IDictionary <string, ISet <string> > >();

            for (int i = 1; i <= numDocs; i++)
            {
                string groupValue = random.nextInt(23) == 14 ? null : groupValues[random.nextInt(groupValues.Length)];
                string countValue = random.nextInt(21) == 13 ? null : countValues[random.nextInt(countValues.Length)];
                string content    = "random" + random.nextInt(numDocs / 20);
                IDictionary <string, ISet <string> > groupToCounts;
                if (!searchTermToGroupCounts.TryGetValue(content, out groupToCounts))
                {
                    // Groups sort always DOCID asc...
                    searchTermToGroupCounts.Add(content, groupToCounts = new LinkedHashMap <string, ISet <string> >());
                    contentStrings.Add(content);
                }

                ISet <string> countsVals;
                if (!groupToCounts.TryGetValue(groupValue, out countsVals))
                {
                    groupToCounts.Add(groupValue, countsVals = new HashSet <string>());
                }
                countsVals.Add(countValue);

                Document doc = new Document();
                doc.Add(new StringField("id", string.Format(CultureInfo.InvariantCulture, "{0:D9}", i), Field.Store.YES));
                if (groupValue != null)
                {
                    AddField(doc, groupField, groupValue, dvType);
                }
                if (countValue != null)
                {
                    AddField(doc, countField, countValue, dvType);
                }
                doc.Add(new TextField("content", content, Field.Store.YES));
                w.AddDocument(doc);
            }

            DirectoryReader reader = w.Reader;

            if (VERBOSE)
            {
                for (int docID = 0; docID < reader.MaxDoc; docID++)
                {
                    Document doc = reader.Document(docID);
                    Console.WriteLine("docID=" + docID + " id=" + doc.Get("id") + " content=" + doc.Get("content") + " author=" + doc.Get("author") + " publisher=" + doc.Get("publisher"));
                }
            }

            w.Dispose();
            return(new IndexContext(dir, reader, dvType, searchTermToGroupCounts, contentStrings.ToArray(/*new String[contentStrings.size()]*/)));
        }
Ejemplo n.º 46
0
        public void assertFromTestData(int[] codePointTable)
        {
            if (VERBOSE)
            {
                Console.WriteLine("TEST: codePointTable=" + codePointTable);
            }
            Stream     stream = GetType().getResourceAsStream("fuzzyTestData.txt");
            TextReader reader = new StreamReader(stream, Encoding.UTF8);

            int bits  = int.Parse(reader.ReadLine(), CultureInfo.InvariantCulture);
            int terms = (int)Math.Pow(2, bits);

            Store.Directory   dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)).SetMergePolicy(NewLogMergePolicy()));

            Document doc   = new Document();
            Field    field = NewTextField("field", "", Field.Store.NO);

            doc.Add(field);

            for (int i = 0; i < terms; i++)
            {
                field.SetStringValue(MapInt(codePointTable, i));
                writer.AddDocument(doc);
            }

            IndexReader   r        = writer.GetReader();
            IndexSearcher searcher = NewSearcher(r);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: searcher=" + searcher);
            }
            // even though this uses a boost-only rewrite, this test relies upon queryNorm being the default implementation,
            // otherwise scores are different!
            searcher.Similarity = (new DefaultSimilarity());

            writer.Dispose();
            String line;
            int    lineNum = 0;

            while ((line = reader.ReadLine()) != null)
            {
                lineNum++;
                String[] @params  = line.Split(',').TrimEnd();
                String   query    = MapInt(codePointTable, int.Parse(@params[0], CultureInfo.InvariantCulture));
                int      prefix   = int.Parse(@params[1], CultureInfo.InvariantCulture);
                int      pqSize   = int.Parse(@params[2], CultureInfo.InvariantCulture);
                float    minScore = float.Parse(@params[3], CultureInfo.InvariantCulture);
#pragma warning disable 612, 618
                SlowFuzzyQuery q = new SlowFuzzyQuery(new Term("field", query), minScore, prefix);
#pragma warning restore 612, 618
                q.MultiTermRewriteMethod = new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(pqSize);
                int     expectedResults = int.Parse(reader.ReadLine(), CultureInfo.InvariantCulture);
                TopDocs docs            = searcher.Search(q, expectedResults);
                assertEquals(expectedResults, docs.TotalHits);
                for (int i = 0; i < expectedResults; i++)
                {
                    String[] scoreDoc = reader.ReadLine().Split(',').TrimEnd();
                    assertEquals(int.Parse(scoreDoc[0], CultureInfo.InvariantCulture), docs.ScoreDocs[i].Doc);
                    assertEquals(float.Parse(scoreDoc[1], CultureInfo.InvariantCulture), docs.ScoreDocs[i].Score, epsilon);
                }
            }
            r.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 47
0
        public virtual void TestWrongIndexFieldName()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE);

            FacetsConfig config = new FacetsConfig();

            config.SetIndexFieldName("a", "$facets2");

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Document doc = new Document();

            doc.Add(new IntField("num", 10, Field.Store.NO));
            doc.Add(new FacetField("a", "foo1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            writer.Dispose();

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            taxoWriter.Dispose();

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, config, c, new IntFieldSource("num"));

            // Ask for top 10 labels for any dims that have counts:
            IList <FacetResult> results = facets.GetAllDims(10);

            Assert.True(results.Count == 0);

            try
            {
                facets.GetSpecificValue("a");
                Fail("should have hit exc");
            }
            catch (System.ArgumentException)
            {
                // expected
            }

            try
            {
                facets.GetTopChildren(10, "a");
                Fail("should have hit exc");
            }
            catch (System.ArgumentException)
            {
                // expected
            }

            IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir);
        }
Ejemplo n.º 48
0
        public virtual void Test()
        {
            Directory    dir      = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir, analyzer);
            LineFileDocs docs         = new LineFileDocs(Random, DefaultCodecSupportsDocValues);
            int          charsToIndex = AtLeast(100000);
            int          charsIndexed = 0;

            //System.out.println("bytesToIndex=" + charsToIndex);
            while (charsIndexed < charsToIndex)
            {
                Document doc = docs.NextDoc();
                charsIndexed += doc.Get("body").Length;
                w.AddDocument(doc);
                //System.out.println("  bytes=" + charsIndexed + " add: " + doc);
            }
            IndexReader r = w.GetReader();

            //System.out.println("numDocs=" + r.NumDocs);
            w.Dispose();

            IndexSearcher s         = NewSearcher(r);
            Terms         terms     = MultiFields.GetFields(r).GetTerms("body");
            int           termCount = 0;
            TermsEnum     termsEnum = terms.GetIterator(null);

            while (termsEnum.Next() != null)
            {
                termCount++;
            }
            Assert.IsTrue(termCount > 0);

            // Target ~10 terms to search:
            double chance = 10.0 / termCount;

            termsEnum = terms.GetIterator(termsEnum);
            IDictionary <BytesRef, TopDocs> answers = new Dictionary <BytesRef, TopDocs>();

            while (termsEnum.Next() != null)
            {
                if (Random.NextDouble() <= chance)
                {
                    BytesRef term = BytesRef.DeepCopyOf(termsEnum.Term);
                    answers[term] = s.Search(new TermQuery(new Term("body", term)), 100);
                }
            }

            if (answers.Count > 0)
            {
                CountdownEvent startingGun = new CountdownEvent(1);
                int            numThreads  = TestUtil.NextInt32(Random, 2, 5);
                ThreadClass[]  threads     = new ThreadClass[numThreads];
                for (int threadID = 0; threadID < numThreads; threadID++)
                {
                    ThreadClass thread = new ThreadAnonymousInnerClassHelper(this, s, answers, startingGun);
                    threads[threadID] = thread;
                    thread.Start();
                }
                startingGun.Signal();
                foreach (ThreadClass thread in threads)
                {
                    thread.Join();
                }
            }
            r.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 49
0
        private void ExecuteRandomJoin(bool multipleValuesPerDocument, int maxIndexIter, int maxSearchIter,
                                       int numberOfDocumentsToIndex)
        {
            for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++)
            {
                if (Verbose)
                {
                    Console.WriteLine("indexIter=" + indexIter);
                }
                Directory         dir = NewDirectory();
                RandomIndexWriter w   = new RandomIndexWriter(Random, dir,
                                                              NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false))
                                                              .SetMergePolicy(NewLogMergePolicy()));
                bool scoreDocsInOrder         = TestJoinUtil.Random.NextBoolean();
                IndexIterationContext context = CreateContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument,
                                                              scoreDocsInOrder);

                IndexReader topLevelReader = w.GetReader();
                w.Dispose();
                for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++)
                {
                    if (Verbose)
                    {
                        Console.WriteLine("searchIter=" + searchIter);
                    }
                    IndexSearcher indexSearcher = NewSearcher(topLevelReader);

                    int         r              = Random.Next(context.RandomUniqueValues.Length);
                    bool        from           = context.RandomFrom[r];
                    string      randomValue    = context.RandomUniqueValues[r];
                    FixedBitSet expectedResult = CreateExpectedResult(randomValue, from, indexSearcher.IndexReader,
                                                                      context);

                    Query actualQuery = new TermQuery(new Term("value", randomValue));
                    if (Verbose)
                    {
                        Console.WriteLine("actualQuery=" + actualQuery);
                    }

                    var       scoreModeLength = Enum.GetNames(typeof(ScoreMode)).Length;
                    ScoreMode scoreMode       = (ScoreMode)Random.Next(scoreModeLength);
                    if (Verbose)
                    {
                        Console.WriteLine("scoreMode=" + scoreMode);
                    }

                    Query joinQuery;
                    if (from)
                    {
                        joinQuery = JoinUtil.CreateJoinQuery("from", multipleValuesPerDocument, "to", actualQuery,
                                                             indexSearcher, scoreMode);
                    }
                    else
                    {
                        joinQuery = JoinUtil.CreateJoinQuery("to", multipleValuesPerDocument, "from", actualQuery,
                                                             indexSearcher, scoreMode);
                    }
                    if (Verbose)
                    {
                        Console.WriteLine("joinQuery=" + joinQuery);
                    }

                    // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
                    FixedBitSet          actualResult         = new FixedBitSet(indexSearcher.IndexReader.MaxDoc);
                    TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(10, false);
                    indexSearcher.Search(joinQuery,
                                         new CollectorAnonymousInnerClassHelper2(this, scoreDocsInOrder, context, actualResult,
                                                                                 topScoreDocCollector));
                    // Asserting bit set...
                    if (Verbose)
                    {
                        Console.WriteLine("expected cardinality:" + expectedResult.Cardinality());
                        DocIdSetIterator iterator = expectedResult.GetIterator();
                        for (int doc = iterator.NextDoc();
                             doc != DocIdSetIterator.NO_MORE_DOCS;
                             doc = iterator.NextDoc())
                        {
                            Console.WriteLine(string.Format("Expected doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id")));
                        }
                        Console.WriteLine("actual cardinality:" + actualResult.Cardinality());
                        iterator = actualResult.GetIterator();
                        for (int doc = iterator.NextDoc();
                             doc != DocIdSetIterator.NO_MORE_DOCS;
                             doc = iterator.NextDoc())
                        {
                            Console.WriteLine(string.Format("Actual doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id")));
                        }
                    }
                    assertEquals(expectedResult, actualResult);

                    // Asserting TopDocs...
                    TopDocs expectedTopDocs = CreateExpectedTopDocs(randomValue, from, scoreMode, context);
                    TopDocs actualTopDocs   = topScoreDocCollector.GetTopDocs();
                    assertEquals(expectedTopDocs.TotalHits, actualTopDocs.TotalHits);
                    assertEquals(expectedTopDocs.ScoreDocs.Length, actualTopDocs.ScoreDocs.Length);
                    if (scoreMode == ScoreMode.None)
                    {
                        continue;
                    }

                    assertEquals(expectedTopDocs.MaxScore, actualTopDocs.MaxScore, 0.0f);
                    for (int i = 0; i < expectedTopDocs.ScoreDocs.Length; i++)
                    {
                        if (Verbose)
                        {
                            string.Format("Expected doc: {0} | Actual doc: {1}\n", expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc);
                            string.Format("Expected score: {0} | Actual score: {1}\n", expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score);
                        }
                        assertEquals(expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc);
                        assertEquals(expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score, 0.0f);
                        Explanation explanation = indexSearcher.Explain(joinQuery, expectedTopDocs.ScoreDocs[i].Doc);
                        assertEquals(expectedTopDocs.ScoreDocs[i].Score, explanation.Value, 0.0f);
                    }
                }
                topLevelReader.Dispose();
                dir.Dispose();
            }
        }
Ejemplo n.º 50
0
        public void TestMinShouldMatch()
        {
            Directory         dir      = NewDirectory();
            MockAnalyzer      analyzer = new MockAnalyzer(Random());
            RandomIndexWriter w        = new RandomIndexWriter(Random(), dir, analyzer, Similarity, TimeZone);

            string[] docs = new string[]
            {
                @"this is the end of the world right", @"is this it or maybe not",
                @"this is the end of the universe as we know it",
                @"there is the famous restaurant at the end of the universe"
            };

            for (int i = 0; i < docs.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField(@"id", @"" + i, Field.Store.YES));
                doc.Add(NewTextField(@"field", docs[i], Field.Store.NO));
                w.AddDocument(doc);
            }

            IndexReader   r = w.Reader;
            IndexSearcher s = NewSearcher(r);

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 0.5F;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 1);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 2F;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 1);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 0.49F;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
                assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 1F;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
                assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
                assertTrue(search.ScoreDocs[1].Score > search.ScoreDocs[2].Score);
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch  = 1F;
                query.HighFreqMinimumNumberShouldMatch = 4F;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(search.ScoreDocs[1].Score, search.ScoreDocs[2].Score, 0F);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(new HashSet <string>(Arrays.AsList(@"2", @"3")), new HashSet <string>(Arrays.AsList(r.Document(search.ScoreDocs[1].Doc).Get(@"id"), r.Document(search.ScoreDocs[2].Doc).Get(@"id"))));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "the"));
                query.LowFreqMinimumNumberShouldMatch  = 1F;
                query.HighFreqMinimumNumberShouldMatch = 2F;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 4);
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "the"));
                query.LowFreqMinimumNumberShouldMatch  = 1F;
                query.HighFreqMinimumNumberShouldMatch = 2F;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 2);
                assertEquals(new HashSet <string>(Arrays.AsList(@"0", @"2")), new HashSet <string>(Arrays.AsList(r.Document(search.ScoreDocs[0].Doc).Get(@"id"), r.Document(search.ScoreDocs[1].Doc).Get(@"id"))));
            }

            r.Dispose();
            w.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 51
0
        public void TestSimple()
        {
            const string idField = "id";
            const string toField = "productId";

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir,
                                                          NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                                                          .SetMergePolicy(NewLogMergePolicy()));

            // 0
            Document doc = new Document();

            doc.Add(new TextField("description", "random text", Field.Store.NO));
            doc.Add(new TextField("name", "name1", Field.Store.NO));
            doc.Add(new TextField(idField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            doc.Add(new TextField("price", "10.0", Field.Store.NO));
            doc.Add(new TextField(idField, "2", Field.Store.NO));
            doc.Add(new TextField(toField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            doc.Add(new TextField("price", "20.0", Field.Store.NO));
            doc.Add(new TextField(idField, "3", Field.Store.NO));
            doc.Add(new TextField(toField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 3
            doc = new Document();
            doc.Add(new TextField("description", "more random text", Field.Store.NO));
            doc.Add(new TextField("name", "name2", Field.Store.NO));
            doc.Add(new TextField(idField, "4", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit();

            // 4
            doc = new Document();
            doc.Add(new TextField("price", "10.0", Field.Store.NO));
            doc.Add(new TextField(idField, "5", Field.Store.NO));
            doc.Add(new TextField(toField, "4", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            doc.Add(new TextField("price", "20.0", Field.Store.NO));
            doc.Add(new TextField(idField, "6", Field.Store.NO));
            doc.Add(new TextField(toField, "4", Field.Store.NO));
            w.AddDocument(doc);

            IndexSearcher indexSearcher = new IndexSearcher(w.GetReader());

            w.Dispose();

            // Search for product
            Query joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name2")),
                                                       indexSearcher, ScoreMode.None);

            TopDocs result = indexSearcher.Search(joinQuery, 10);

            assertEquals(2, result.TotalHits);
            assertEquals(4, result.ScoreDocs[0].Doc);
            assertEquals(5, result.ScoreDocs[1].Doc);

            joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name1")),
                                                 indexSearcher, ScoreMode.None);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(2, result.TotalHits);
            assertEquals(1, result.ScoreDocs[0].Doc);
            assertEquals(2, result.ScoreDocs[1].Doc);

            // Search for offer
            joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("id", "5")),
                                                 indexSearcher, ScoreMode.None);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(1, result.TotalHits);
            assertEquals(3, result.ScoreDocs[0].Doc);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 52
0
        public virtual void TestRandomPhrases()
        {
            Directory dir      = NewDirectory();
            Analyzer  analyzer = new MockAnalyzer(Random());

            RandomIndexWriter       w    = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMergePolicy(NewLogMergePolicy()));
            IList <IList <string> > docs = new List <IList <string> >();

            Documents.Document d = new Documents.Document();
            Field f = NewTextField("f", "", Field.Store.NO);

            d.Add(f);

            Random r = Random();

            int NUM_DOCS = AtLeast(10);

            for (int i = 0; i < NUM_DOCS; i++)
            {
                // must be > 4096 so it spans multiple chunks
                int termCount = TestUtil.NextInt(Random(), 4097, 8200);

                IList <string> doc = new List <string>();

                StringBuilder sb = new StringBuilder();
                while (doc.Count < termCount)
                {
                    if (r.Next(5) == 1 || docs.Count == 0)
                    {
                        // make new non-empty-string term
                        string term;
                        while (true)
                        {
                            term = TestUtil.RandomUnicodeString(r);
                            if (term.Length > 0)
                            {
                                break;
                            }
                        }
                        IOException priorException = null;
                        TokenStream ts             = analyzer.GetTokenStream("ignore", new StringReader(term));
                        try
                        {
                            ICharTermAttribute termAttr = ts.AddAttribute <ICharTermAttribute>();
                            ts.Reset();
                            while (ts.IncrementToken())
                            {
                                string text = termAttr.ToString();
                                doc.Add(text);
                                sb.Append(text).Append(' ');
                            }
                            ts.End();
                        }
                        catch (IOException e)
                        {
                            priorException = e;
                        }
                        finally
                        {
                            IOUtils.CloseWhileHandlingException(priorException, ts);
                        }
                    }
                    else
                    {
                        // pick existing sub-phrase
                        IList <string> lastDoc = docs[r.Next(docs.Count)];
                        int            len     = TestUtil.NextInt(r, 1, 10);
                        int            start   = r.Next(lastDoc.Count - len);
                        for (int k = start; k < start + len; k++)
                        {
                            string t = lastDoc[k];
                            doc.Add(t);
                            sb.Append(t).Append(' ');
                        }
                    }
                }
                docs.Add(doc);
                f.SetStringValue(sb.ToString());
                w.AddDocument(d);
            }

            IndexReader   reader = w.Reader;
            IndexSearcher s      = NewSearcher(reader);

            w.Dispose();

            // now search
            int num = AtLeast(10);

            for (int i = 0; i < num; i++)
            {
                int            docID = r.Next(docs.Count);
                IList <string> doc   = docs[docID];

                int           numTerm = TestUtil.NextInt(r, 2, 20);
                int           start   = r.Next(doc.Count - numTerm);
                PhraseQuery   pq      = new PhraseQuery();
                StringBuilder sb      = new StringBuilder();
                for (int t = start; t < start + numTerm; t++)
                {
                    pq.Add(new Term("f", doc[t]));
                    sb.Append(doc[t]).Append(' ');
                }

                TopDocs hits  = s.Search(pq, NUM_DOCS);
                bool    found = false;
                for (int j = 0; j < hits.ScoreDocs.Length; j++)
                {
                    if (hits.ScoreDocs[j].Doc == docID)
                    {
                        found = true;
                        break;
                    }
                }

                Assert.IsTrue(found, "phrase '" + sb + "' not found; start=" + start);
            }

            reader.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 53
0
        public virtual void TestPhraseQueryInConjunctionScorer()
        {
            Directory         directory = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);

            Documents.Document doc = new Documents.Document();
            doc.Add(NewTextField("source", "marketing info", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Documents.Document();
            doc.Add(NewTextField("contents", "foobar", Field.Store.YES));
            doc.Add(NewTextField("source", "marketing info", Field.Store.YES));
            writer.AddDocument(doc);

            IndexReader reader = writer.Reader;

            writer.Dispose();

            IndexSearcher searcher = NewSearcher(reader);

            PhraseQuery phraseQuery = new PhraseQuery();

            phraseQuery.Add(new Term("source", "marketing"));
            phraseQuery.Add(new Term("source", "info"));
            ScoreDoc[] hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);
            QueryUtils.Check(Random(), phraseQuery, searcher, Similarity);

            TermQuery    termQuery    = new TermQuery(new Term("contents", "foobar"));
            BooleanQuery booleanQuery = new BooleanQuery();

            booleanQuery.Add(termQuery, Occur.MUST);
            booleanQuery.Add(phraseQuery, Occur.MUST);
            hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            QueryUtils.Check(Random(), termQuery, searcher, Similarity);

            reader.Dispose();

            writer = new RandomIndexWriter(Random(), directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE));
            doc    = new Documents.Document();
            doc.Add(NewTextField("contents", "map entry woo", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Documents.Document();
            doc.Add(NewTextField("contents", "woo map entry", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Documents.Document();
            doc.Add(NewTextField("contents", "map foobarword entry woo", Field.Store.YES));
            writer.AddDocument(doc);

            reader = writer.Reader;
            writer.Dispose();

            searcher = NewSearcher(reader);

            termQuery   = new TermQuery(new Term("contents", "woo"));
            phraseQuery = new PhraseQuery();
            phraseQuery.Add(new Term("contents", "map"));
            phraseQuery.Add(new Term("contents", "entry"));

            hits = searcher.Search(termQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);

            booleanQuery = new BooleanQuery();
            booleanQuery.Add(termQuery, Occur.MUST);
            booleanQuery.Add(phraseQuery, Occur.MUST);
            hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);

            booleanQuery = new BooleanQuery();
            booleanQuery.Add(phraseQuery, Occur.MUST);
            booleanQuery.Add(termQuery, Occur.MUST);
            hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);
            QueryUtils.Check(Random(), booleanQuery, searcher, Similarity);

            reader.Dispose();
            directory.Dispose();
        }
Ejemplo n.º 54
0
        public virtual void Test10kPulsed()
        {
            // we always run this test with pulsing codec.
            Codec cp = TestUtil.AlwaysPostingsFormat(new Pulsing41PostingsFormat(1));

            DirectoryInfo        f   = CreateTempDir("10kpulsed");
            BaseDirectoryWrapper dir = NewFSDirectory(f);

            dir.CheckIndexOnDispose = false; // we do this ourselves explicitly
            RandomIndexWriter iw = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetCodec(cp));

            Document  document = new Document();
            FieldType ft       = new FieldType(TextField.TYPE_STORED);

            switch (TestUtil.NextInt32(Random, 0, 2))
            {
            case 0:
                ft.IndexOptions = IndexOptions.DOCS_ONLY;
                break;

            case 1:
                ft.IndexOptions = IndexOptions.DOCS_AND_FREQS;
                break;

            default:
                ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                break;
            }

            Field field = NewField("field", "", ft);

            document.Add(field);

            //NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ROOT));

            for (int i = 0; i < 10050; i++)
            {
                //field.StringValue = df.format(i);
                field.SetStringValue(i.ToString("00000", CultureInfo.InvariantCulture));
                iw.AddDocument(document);
            }

            IndexReader ir = iw.GetReader();

            iw.Dispose();

            TermsEnum te = MultiFields.GetTerms(ir, "field").GetIterator(null);
            DocsEnum  de = null;

            for (int i = 0; i < 10050; i++)
            {
                //string expected = df.format(i);
                string expected = i.ToString("00000", CultureInfo.InvariantCulture);
                assertEquals(expected, te.Next().Utf8ToString());
                de = TestUtil.Docs(Random, te, null, de, DocsFlags.NONE);
                assertTrue(de.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.NextDoc());
            }
            ir.Dispose();

            TestUtil.CheckIndex(dir);
            dir.Dispose();
        }
Ejemplo n.º 55
0
        public void TestInsideBooleanQuery()
        {
            const string idField = "id";
            const string toField = "productId";

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir,
                                                          NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                                                          .SetMergePolicy(NewLogMergePolicy()));

            // 0
            Document doc = new Document();

            doc.Add(new TextField("description", "random text", Field.Store.NO));
            doc.Add(new TextField("name", "name1", Field.Store.NO));
            doc.Add(new TextField(idField, "7", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            doc.Add(new TextField("price", "10.0", Field.Store.NO));
            doc.Add(new TextField(idField, "2", Field.Store.NO));
            doc.Add(new TextField(toField, "7", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            doc.Add(new TextField("price", "20.0", Field.Store.NO));
            doc.Add(new TextField(idField, "3", Field.Store.NO));
            doc.Add(new TextField(toField, "7", Field.Store.NO));
            w.AddDocument(doc);

            // 3
            doc = new Document();
            doc.Add(new TextField("description", "more random text", Field.Store.NO));
            doc.Add(new TextField("name", "name2", Field.Store.NO));
            doc.Add(new TextField(idField, "0", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit();

            // 4
            doc = new Document();
            doc.Add(new TextField("price", "10.0", Field.Store.NO));
            doc.Add(new TextField(idField, "5", Field.Store.NO));
            doc.Add(new TextField(toField, "0", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            doc.Add(new TextField("price", "20.0", Field.Store.NO));
            doc.Add(new TextField(idField, "6", Field.Store.NO));
            doc.Add(new TextField(toField, "0", Field.Store.NO));
            w.AddDocument(doc);

            w.ForceMerge(1);

            IndexSearcher indexSearcher = new IndexSearcher(w.GetReader());

            w.Dispose();

            // Search for product
            Query joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField,
                                                       new TermQuery(new Term("description", "random")), indexSearcher, ScoreMode.Avg);

            BooleanQuery bq = new BooleanQuery();

            bq.Add(joinQuery, Occur.SHOULD);
            bq.Add(new TermQuery(new Term("id", "3")), Occur.SHOULD);

            indexSearcher.Search(bq, new CollectorAnonymousInnerClassHelper(this));

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 56
0
        public virtual void TestFuzziness()
        {
            Directory         directory = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, directory);

            AddDoc("aaaaa", writer);
            AddDoc("aaaab", writer);
            AddDoc("aaabb", writer);
            AddDoc("aabbb", writer);
            AddDoc("abbbb", writer);
            AddDoc("bbbbb", writer);
            AddDoc("ddddd", writer);

            IndexReader   reader   = writer.GetReader();
            IndexSearcher searcher = NewSearcher(reader);

            writer.Dispose();

            FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 0);

            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);

            // same with prefix
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 1);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 3);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 4);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 5);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 6);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            // test scoring
            query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.DefaultMaxEdits, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length, "3 documents should match");
            IList <string> order = Arrays.AsList("bbbbb", "abbbb", "aabbb");

            for (int i = 0; i < hits.Length; i++)
            {
                string term = searcher.Doc(hits[i].Doc).Get("field");
                //System.out.println(hits[i].Score);
                Assert.AreEqual(order[i], term);
            }

            // test pq size by supplying maxExpansions=2
            // this query would normally return 3 documents, because 3 terms match (see above):
            query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.DefaultMaxEdits, 0, 2, false);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length, "only 2 documents should match");
            order = Arrays.AsList("bbbbb", "abbbb");
            for (int i = 0; i < hits.Length; i++)
            {
                string term = searcher.Doc(hits[i].Doc).Get("field");
                //System.out.println(hits[i].Score);
                Assert.AreEqual(order[i], term);
            }

            // not similar enough:
            query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.DefaultMaxEdits, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);
            query = new FuzzyQuery(new Term("field", "aaccc"), FuzzyQuery.DefaultMaxEdits, 0); // edit distance to "aaaaa" = 3
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // query identical to a word in the index:
            query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.DefaultMaxEdits, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            // default allows for up to two edits:
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));

            // query similar to a word in the index:
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));

            // now with prefix
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 1);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 3);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            Assert.AreEqual(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 4);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
            Assert.AreEqual(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
            query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.DefaultMaxEdits, 5);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));

            // now with prefix
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 1);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 2);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 3);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 4);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
            query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.DefaultMaxEdits, 5);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // different field = no match:
            query = new FuzzyQuery(new Term("anotherfield", "ddddX"), FuzzyQuery.DefaultMaxEdits, 0);
            hits  = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            reader.Dispose();
            directory.Dispose();
        }
Ejemplo n.º 57
0
        public void TestBasics()
        {
            Directory         dir      = NewDirectory();
            MockAnalyzer      analyzer = new MockAnalyzer(Random);
            RandomIndexWriter w        = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir, analyzer);
            var docs = new string[]
            {
                @"this is the end of the world right", @"is this it or maybe not",
                @"this is the end of the universe as we know it",
                @"there is the famous restaurant at the end of the universe"
            };

            for (int i = 0; i < docs.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField(@"id", @"" + i, Field.Store.YES));
                doc.Add(NewTextField(@"field", docs[i], Field.Store.NO));
                w.AddDocument(doc);
            }

            IndexReader   r = w.GetReader();
            IndexSearcher s = NewSearcher(r);

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
                assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, Random.NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 2);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.MUST, Random.NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 1);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.MUST, Random.NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "restaurant"));
                query.Add(new Term("field", "universe"));
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 1);
                assertEquals(@"3", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
            }

            r.Dispose();
            w.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 58
0
        private void DoTest(DocValuesType type)
        {
            Directory         d        = NewDirectory();
            IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
            int   nDocs = AtLeast(50);
            Field id    = new NumericDocValuesField("id", 0);
            Field f;

            switch (type)
            {
            case DocValuesType.BINARY:
                f = new BinaryDocValuesField("dv", new BytesRef());
                break;

            case DocValuesType.SORTED:
                f = new SortedDocValuesField("dv", new BytesRef());
                break;

            case DocValuesType.NUMERIC:
                f = new NumericDocValuesField("dv", 0);
                break;

            default:
                throw new InvalidOperationException();
            }
            Document document = new Document();

            document.Add(id);
            document.Add(f);

            object[] vals = new object[nDocs];

            RandomIndexWriter iw = new RandomIndexWriter(Random, d, iwConfig);

            for (int i = 0; i < nDocs; ++i)
            {
                id.SetInt64Value(i);
                switch (type)
                {
                case DocValuesType.SORTED:
                case DocValuesType.BINARY:
                    do
                    {
                        vals[i] = TestUtil.RandomSimpleString(Random, 20);
                    } while (((string)vals[i]).Length == 0);
                    f.SetBytesValue(new BytesRef((string)vals[i]));
                    break;

                case DocValuesType.NUMERIC:
                    int bitsPerValue = RandomInts.RandomInt32Between(Random, 1, 31);     // keep it an int
                    vals[i] = (long)Random.Next((int)PackedInt32s.MaxValue(bitsPerValue));
                    f.SetInt64Value((long)vals[i]);
                    break;
                }
                iw.AddDocument(document);
                if (Random.NextBoolean() && i % 10 == 9)
                {
                    iw.Commit();
                }
            }
            iw.Dispose();

            DirectoryReader rd = DirectoryReader.Open(d);

            foreach (AtomicReaderContext leave in rd.Leaves)
            {
                FunctionValues ids = (new Int64FieldSource("id")).GetValues(null, leave);
                ValueSource    vs;
                switch (type)
                {
                case DocValuesType.BINARY:
                case DocValuesType.SORTED:
                    vs = new BytesRefFieldSource("dv");
                    break;

                case DocValuesType.NUMERIC:
                    vs = new Int64FieldSource("dv");
                    break;

                default:
                    throw new InvalidOperationException();
                }
                FunctionValues values = vs.GetValues(null, leave);
                BytesRef       bytes  = new BytesRef();
                for (int i = 0; i < leave.AtomicReader.MaxDoc; ++i)
                {
                    assertTrue(values.Exists(i));
                    if (vs is BytesRefFieldSource)
                    {
                        assertTrue(values.ObjectVal(i) is string);
                    }
                    else if (vs is Int64FieldSource)
                    {
                        assertTrue(values.ObjectVal(i) is long?);
                        assertTrue(values.BytesVal(i, bytes));
                    }
                    else
                    {
                        throw new InvalidOperationException();
                    }

                    object expected = vals[ids.Int32Val(i)];
                    switch (type)
                    {
                    case DocValuesType.SORTED:
                        values.OrdVal(i);     // no exception
                        assertTrue(values.NumOrd >= 1);
                        goto case DocValuesType.BINARY;

                    case DocValuesType.BINARY:
                        assertEquals(expected, values.ObjectVal(i));
                        assertEquals(expected, values.StrVal(i));
                        assertEquals(expected, values.ObjectVal(i));
                        assertEquals(expected, values.StrVal(i));
                        assertTrue(values.BytesVal(i, bytes));
                        assertEquals(new BytesRef((string)expected), bytes);
                        break;

                    case DocValuesType.NUMERIC:
                        assertEquals(Convert.ToInt64(expected, CultureInfo.InvariantCulture), values.Int64Val(i));
                        break;
                    }
                }
            }
            rd.Dispose();
            d.Dispose();
        }
        public virtual void TestNRTAndCommit()
        {
            Directory           dir       = NewDirectory();
            NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
            MockAnalyzer        analyzer  = new MockAnalyzer(Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            RandomIndexWriter w    = new RandomIndexWriter(Random, cachedDir, conf);
            LineFileDocs      docs = new LineFileDocs(Random, DefaultCodecSupportsDocValues);
            int numDocs            = TestUtil.NextInt32(Random, 100, 400);

            if (Verbose)
            {
                Console.WriteLine("TEST: numDocs=" + numDocs);
            }

            IList <BytesRef> ids = new List <BytesRef>();
            DirectoryReader  r   = null;

            for (int docCount = 0; docCount < numDocs; docCount++)
            {
                Document doc = docs.NextDoc();
                ids.Add(new BytesRef(doc.Get("docid")));
                w.AddDocument(doc);
                if (Random.Next(20) == 17)
                {
                    if (r == null)
                    {
                        r = DirectoryReader.Open(w.IndexWriter, false);
                    }
                    else
                    {
                        DirectoryReader r2 = DirectoryReader.OpenIfChanged(r);
                        if (r2 != null)
                        {
                            r.Dispose();
                            r = r2;
                        }
                    }
                    Assert.AreEqual(1 + docCount, r.NumDocs);
                    IndexSearcher s = NewSearcher(r);
                    // Just make sure search can run; we can't assert
                    // totHits since it could be 0
                    TopDocs hits = s.Search(new TermQuery(new Term("body", "the")), 10);
                    // System.out.println("tot hits " + hits.totalHits);
                }
            }

            if (r != null)
            {
                r.Dispose();
            }

            // Close should force cache to clear since all files are sync'd
            w.Dispose();

            string[] cachedFiles = cachedDir.ListCachedFiles();
            foreach (string file in cachedFiles)
            {
                Console.WriteLine("FAIL: cached file " + file + " remains after sync");
            }
            Assert.AreEqual(0, cachedFiles.Length);

            r = DirectoryReader.Open(dir);
            foreach (BytesRef id in ids)
            {
                Assert.AreEqual(1, r.DocFreq(new Term("docid", id)));
            }
            r.Dispose();
            cachedDir.Dispose();
            docs.Dispose();
        }
Ejemplo n.º 60
0
        public void TestRandomIndex()
        {
            Directory    dir      = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir, analyzer);

            CreateRandomIndex(AtLeast(50), w, Random.NextInt64());
            DirectoryReader reader       = w.GetReader();
            AtomicReader    wrapper      = SlowCompositeReaderWrapper.Wrap(reader);
            string          field        = @"body";
            Terms           terms        = wrapper.GetTerms(field);
            var             lowFreqQueue = new AnonymousPriorityQueue(this, 5);

            Util.PriorityQueue <TermAndFreq> highFreqQueue = new AnonymousPriorityQueue1(this, 5);
            try
            {
                TermsEnum iterator = terms.GetEnumerator();
                while (iterator.MoveNext())
                {
                    if (highFreqQueue.Count < 5)
                    {
                        highFreqQueue.Add(new TermAndFreq(BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq));
                        lowFreqQueue.Add(new TermAndFreq(BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq));
                    }
                    else
                    {
                        if (highFreqQueue.Top.freq < iterator.DocFreq)
                        {
                            highFreqQueue.Top.freq = iterator.DocFreq;
                            highFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term);
                            highFreqQueue.UpdateTop();
                        }

                        if (lowFreqQueue.Top.freq > iterator.DocFreq)
                        {
                            lowFreqQueue.Top.freq = iterator.DocFreq;
                            lowFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term);
                            lowFreqQueue.UpdateTop();
                        }
                    }
                }

                int lowFreq  = lowFreqQueue.Top.freq;
                int highFreq = highFreqQueue.Top.freq;
                AssumeTrue(@"unlucky index", highFreq - 1 > lowFreq);
                List <TermAndFreq> highTerms  = QueueToList(highFreqQueue);
                List <TermAndFreq> lowTerms   = QueueToList(lowFreqQueue);
                IndexSearcher      searcher   = NewSearcher(reader);
                Occur            lowFreqOccur = RandomOccur(Random);
                BooleanQuery     verifyQuery  = new BooleanQuery();
                CommonTermsQuery cq           = new CommonTermsQuery(RandomOccur(Random), lowFreqOccur, highFreq - 1, Random.NextBoolean());
                foreach (TermAndFreq termAndFreq in lowTerms)
                {
                    cq.Add(new Term(field, termAndFreq.term));
                    verifyQuery.Add(new BooleanClause(new TermQuery(new Term(field, termAndFreq.term)), lowFreqOccur));
                }

                foreach (TermAndFreq termAndFreq in highTerms)
                {
                    cq.Add(new Term(field, termAndFreq.term));
                }

                TopDocs cqSearch     = searcher.Search(cq, reader.MaxDoc);
                TopDocs verifySearch = searcher.Search(verifyQuery, reader.MaxDoc);
                assertEquals(verifySearch.TotalHits, cqSearch.TotalHits);
                var hits = new JCG.HashSet <int>();
                foreach (ScoreDoc doc in verifySearch.ScoreDocs)
                {
                    hits.Add(doc.Doc);
                }

                foreach (ScoreDoc doc in cqSearch.ScoreDocs)
                {
                    assertTrue(hits.Remove(doc.Doc));
                }

                assertTrue(hits.Count == 0);
                w.ForceMerge(1);
                DirectoryReader reader2 = w.GetReader();
                QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                    this,
#endif
                    Random, cq, NewSearcher(reader2));
                reader2.Dispose();
            }
            finally
            {
                reader.Dispose();
                wrapper.Dispose();
                w.Dispose();
                dir.Dispose();
            }
        }