Пример #1
0
 public override void SetUp()
 {
     base.SetUp();
     INDEX_SIZE = AtLeast(2000);
     Index = NewDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(Random(), Index);
     RandomGen random = new RandomGen(this, Random());
     for (int i = 0; i < INDEX_SIZE; ++i) // don't decrease; if to low the
     {
         // problem doesn't show up
         Document doc = new Document();
         if ((i % 5) != 0) // some documents must not have an entry in the first
         {
             // sort field
             doc.Add(NewStringField("publicationDate_", random.LuceneDate, Field.Store.YES));
         }
         if ((i % 7) == 0) // some documents to match the query (see below)
         {
             doc.Add(NewTextField("content", "test", Field.Store.YES));
         }
         // every document has a defined 'mandant' field
         doc.Add(NewStringField("mandant", Convert.ToString(i % 3), Field.Store.YES));
         writer.AddDocument(doc);
     }
     Reader = writer.Reader;
     writer.Dispose();
     Query = new TermQuery(new Term("content", "test"));
 }
 private void CreateRandomIndexes(int maxSegments)
 {
     dir = NewDirectory();
     numDocs = AtLeast(150);
     int numTerms = TestUtil.NextInt(Random(), 1, numDocs / 5);
     ISet<string> randomTerms = new HashSet<string>();
     while (randomTerms.size() < numTerms)
     {
         randomTerms.add(TestUtil.RandomSimpleString(Random()));
     }
     terms = new List<string>(randomTerms);
     int seed = Random().Next();
     IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
     iwc.SetMergePolicy(TestSortingMergePolicy.NewSortingMergePolicy(sort));
     iw = new RandomIndexWriter(new Random(seed), dir, iwc);
     for (int i = 0; i < numDocs; ++i)
     {
         Document doc = RandomDocument();
         iw.AddDocument(doc);
         if (i == numDocs / 2 || (i != numDocs - 1 && Random().nextInt(8) == 0))
         {
             iw.Commit();
         }
         if (Random().nextInt(15) == 0)
         {
             string term = RandomInts.RandomFrom(Random(), terms);
             iw.DeleteDocuments(new Term("s", term));
         }
     }
     reader = iw.Reader;
 }
        public override void SetUp()
        {
            base.SetUp();
            Dir = NewDirectory();
            FieldName = Random().NextBoolean() ? "field" : ""; // sometimes use an empty string as field name
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));
            Document doc = new Document();
            Field field = NewStringField(FieldName, "", Field.Store.NO);
            doc.Add(field);
            List<string> terms = new List<string>();
            int num = AtLeast(200);
            for (int i = 0; i < num; i++)
            {
                string s = TestUtil.RandomUnicodeString(Random());
                field.StringValue = s;
                terms.Add(s);
                writer.AddDocument(doc);
            }

            if (VERBOSE)
            {
                // utf16 order
                terms.Sort();
                Console.WriteLine("UTF16 order:");
                foreach (string s in terms)
                {
                    Console.WriteLine("  " + UnicodeUtil.ToHexString(s));
                }
            }

            Reader = writer.Reader;
            Searcher1 = NewSearcher(Reader);
            Searcher2 = NewSearcher(Reader);
            writer.Dispose();
        }
        public override void SetUp()
        {
            base.SetUp();
            dir = NewDirectory();
            var iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            int numDocs = TestUtil.NextInt(Random(), 2049, 4000);
            for (int i = 0; i < numDocs; i++)
            {
                var document = new Document
				{
				    NewTextField("english", English.IntToEnglish(i), Field.Store.NO),
				    NewTextField("oddeven", (i%2 == 0) ? "even" : "odd", Field.Store.NO
				        ),
				    NewStringField("byte", string.Empty + (unchecked((byte) Random().Next
				        ())), Field.Store.NO),
				    NewStringField("short", string.Empty + ((short) Random().Next()), Field.Store
				        .NO),
				    new IntField("int", Random().Next(), Field.Store.NO),
				    new LongField("long", Random().NextLong(), Field.Store.NO),
				    new FloatField("float", Random().NextFloat(), Field.Store.NO),
				    new DoubleField("double", Random().NextDouble(), Field.Store.NO),
				    new NumericDocValuesField("intdocvalues", Random().Next()),
				    new FloatDocValuesField("floatdocvalues", Random().NextFloat())
				};
                iw.AddDocument(document);
            }
            reader = iw.Reader;
            iw.Dispose();
            searcher = NewSearcher(reader);
        }
        public void TestMax()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document doc = new Document();
            doc.Add(new SortedSetDocValuesField("value", new BytesRef("foo")));
            doc.Add(new SortedSetDocValuesField("value", new BytesRef("bar")));
            doc.Add(NewStringField("id", "1", Field.Store.YES));
            writer.AddDocument(doc);
            doc = new Document();
            doc.Add(new SortedSetDocValuesField("value", new BytesRef("baz")));
            doc.Add(NewStringField("id", "2", Field.Store.YES));
            writer.AddDocument(doc);
            IndexReader ir = writer.Reader;
            writer.Dispose();

            // slow wrapper does not support random access ordinals (there is no need for that!)
            IndexSearcher searcher = NewSearcher(ir, false);

            Sort sort = new Sort(new SortedSetSortField("value", false, Selector.MAX));

            TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
            assertEquals(2, td.TotalHits);
            // 'baz' comes before 'foo'
            assertEquals("2", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
            assertEquals("1", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
            assertNoFieldCaches();

            ir.Dispose();
            dir.Dispose();
        }
 public virtual void Test()
 {
     Directory dir = NewDirectory();
     IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
     conf.SetCodec(new Lucene46Codec());
     RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, conf);
     Document doc = new Document();
     // these fields should sometimes get term vectors, etc
     Field idField = NewStringField("id", "", Field.Store.NO);
     Field bodyField = NewTextField("body", "", Field.Store.NO);
     Field dvField = new NumericDocValuesField("dv", 5);
     doc.Add(idField);
     doc.Add(bodyField);
     doc.Add(dvField);
     for (int i = 0; i < 100; i++)
     {
         idField.StringValue = Convert.ToString(i);
         bodyField.StringValue = TestUtil.RandomUnicodeString(Random());
         riw.AddDocument(doc);
         if (Random().Next(7) == 0)
         {
             riw.Commit();
         }
         // TODO: we should make a new format with a clean header...
         // if (Random().nextInt(20) == 0) {
         //  riw.DeleteDocuments(new Term("id", Integer.toString(i)));
         // }
     }
     riw.Dispose();
     CheckHeaders(dir);
     dir.Dispose();
 }
Пример #7
0
        public virtual void TestIndexing()
        {
            DirectoryInfo tmpDir = CreateTempDir("TestNeverDelete");
            BaseDirectoryWrapper d = NewFSDirectory(tmpDir);

            // We want to "see" files removed if Lucene removed
            // them.  this is still worth running on Windows since
            // some files the IR opens and closes.
            if (d is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)d).NoDeleteOpenFile = false;
            }
            RandomIndexWriter w = new RandomIndexWriter(Random(), d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE));
            w.w.Config.SetMaxBufferedDocs(TestUtil.NextInt(Random(), 5, 30));

            w.Commit();
            ThreadClass[] indexThreads = new ThreadClass[Random().Next(4)];
            long stopTime = Environment.TickCount + AtLeast(1000);
            for (int x = 0; x < indexThreads.Length; x++)
            {
                indexThreads[x] = new ThreadAnonymousInnerClassHelper(w, stopTime);
                indexThreads[x].Name = "Thread " + x;
                indexThreads[x].Start();
            }

            HashSet<string> allFiles = new HashSet<string>();

            DirectoryReader r = DirectoryReader.Open(d);
            while (Environment.TickCount < stopTime)
            {
                IndexCommit ic = r.IndexCommit;
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: check files: " + ic.FileNames);
                }
                allFiles.AddAll(ic.FileNames);
                // Make sure no old files were removed
                foreach (string fileName in allFiles)
                {
                    Assert.IsTrue(SlowFileExists(d, fileName), "file " + fileName + " does not exist");
                }
                DirectoryReader r2 = DirectoryReader.OpenIfChanged(r);
                if (r2 != null)
                {
                    r.Dispose();
                    r = r2;
                }
                Thread.Sleep(1);
            }
            r.Dispose();

            foreach (ThreadClass t in indexThreads)
            {
                t.Join();
            }
            w.Dispose();
            d.Dispose();

            System.IO.Directory.Delete(tmpDir.FullName, true);
        }
Пример #8
0
        public override void SetUp()
        {
            base.SetUp();
            // we generate aweful regexps: good for testing.
            // but for preflex codec, the test can be very slow, so use less iterations.
            NumIterations = Codec.Default.Name.Equals("Lucene3x") ? 10 * RANDOM_MULTIPLIER : AtLeast(50);
            Dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));
            Document doc = new Document();
            Field field = NewStringField("field", "", Field.Store.YES);
            doc.Add(field);
            Terms = new SortedSet<BytesRef>();

            int num = AtLeast(200);
            for (int i = 0; i < num; i++)
            {
                string s = TestUtil.RandomUnicodeString(Random());
                field.StringValue = s;
                Terms.Add(new BytesRef(s));
                writer.AddDocument(doc);
            }

            TermsAutomaton = BasicAutomata.MakeStringUnion(Terms);

            Reader = writer.Reader;
            Searcher = NewSearcher(Reader);
            writer.Dispose();
        }
Пример #9
0
        public virtual void TestPrefixQuery_Mem()
        {
            Directory directory = NewDirectory();

            string[] categories = new string[] { "/Computers", "/Computers/Mac", "/Computers/Windows" };
            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);
            for (int i = 0; i < categories.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("category", categories[i], Field.Store.YES));
                writer.AddDocument(doc);
            }
            IndexReader reader = writer.Reader;

            PrefixQuery query = new PrefixQuery(new Term("category", "/Computers"));
            IndexSearcher searcher = NewSearcher(reader);
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length, "All documents in /Computers category and below");

            query = new PrefixQuery(new Term("category", "/Computers/Mac"));
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length, "One in /Computers/Mac");

            query = new PrefixQuery(new Term("category", ""));
            Terms terms = MultiFields.GetTerms(searcher.IndexReader, "category");
            Assert.IsFalse(query.GetTermsEnum(terms) is PrefixTermsEnum);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length, "everything");
            writer.Dispose();
            reader.Dispose();
            directory.Dispose();
        }
        public void BeforeClass()
        {
            Dir = NewDirectory();
            Sdir1 = NewDirectory();
            Sdir2 = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, new MockAnalyzer(Random()), Similarity, TimeZone);
            RandomIndexWriter swriter1 = new RandomIndexWriter(Random(), Sdir1, new MockAnalyzer(Random()), Similarity, TimeZone);
            RandomIndexWriter swriter2 = new RandomIndexWriter(Random(), Sdir2, new MockAnalyzer(Random()), Similarity, TimeZone);

            for (int i = 0; i < 10; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("data", Convert.ToString(i), Field.Store.NO));
                writer.AddDocument(doc);
                ((i % 2 == 0) ? swriter1 : swriter2).AddDocument(doc);
            }
            writer.ForceMerge(1);
            swriter1.ForceMerge(1);
            swriter2.ForceMerge(1);
            writer.Dispose();
            swriter1.Dispose();
            swriter2.Dispose();

            Reader = DirectoryReader.Open(Dir);
            Searcher = NewSearcher(Reader);

            MultiReader = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Sdir2) }, true);
            MultiSearcher = NewSearcher(MultiReader);

            MultiReaderDupls = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Dir) }, true);
            MultiSearcherDupls = NewSearcher(MultiReaderDupls);
        }
		public override void SetUp()
		{
			base.SetUp();
			dir = NewDirectory();
			var iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
			var doc = new Document
			{
			    NewStringField("id", "1", Field.Store.YES),
			    NewTextField("body", "some contents and more contents", Field.Store.NO),
			    new NumericDocValuesField("popularity", 5)
			};
		    iw.AddDocument(doc);
			doc = new Document
			{
			    NewStringField("id", "2", Field.Store.YES),
			    NewTextField("body", "another document with different contents", Field.Store
			        .NO),
			    new NumericDocValuesField("popularity", 20)
			};
		    iw.AddDocument(doc);
			doc = new Document
			{
			    NewStringField("id", "3", Field.Store.YES),
			    NewTextField("body", "crappy contents", Field.Store.NO),
			    new NumericDocValuesField("popularity", 2)
			};
		    iw.AddDocument(doc);
			reader = iw.Reader;
			searcher = new IndexSearcher(reader);
			iw.Dispose();
		}
Пример #12
0
        public virtual void TestMethod()
        {
            Directory directory = NewDirectory();

            string[] values = new string[] { "1", "2", "3", "4" };

            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);
            for (int i = 0; i < values.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField(FIELD, values[i], Field.Store.YES));
                writer.AddDocument(doc);
            }
            IndexReader ir = writer.Reader;
            writer.Dispose();

            BooleanQuery booleanQuery1 = new BooleanQuery();
            booleanQuery1.Add(new TermQuery(new Term(FIELD, "1")), BooleanClause.Occur.SHOULD);
            booleanQuery1.Add(new TermQuery(new Term(FIELD, "2")), BooleanClause.Occur.SHOULD);

            BooleanQuery query = new BooleanQuery();
            query.Add(booleanQuery1, BooleanClause.Occur.MUST);
            query.Add(new TermQuery(new Term(FIELD, "9")), BooleanClause.Occur.MUST_NOT);

            IndexSearcher indexSearcher = NewSearcher(ir);
            ScoreDoc[] hits = indexSearcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length, "Number of matched documents");
            ir.Dispose();
            directory.Dispose();
        }
Пример #13
0
        public virtual void TestReuseDocsEnumNoReuse()
        {
            Directory dir = NewDirectory();
            Codec cp = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat());
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp));
            int numdocs = AtLeast(20);
            CreateRandomIndex(numdocs, writer, Random());
            writer.Commit();

            DirectoryReader open = DirectoryReader.Open(dir);
            foreach (AtomicReaderContext ctx in open.Leaves())
            {
                AtomicReader indexReader = (AtomicReader)ctx.Reader();
                Terms terms = indexReader.Terms("body");
                TermsEnum iterator = terms.Iterator(null);
                IdentityHashMap<DocsEnum, bool?> enums = new IdentityHashMap<DocsEnum, bool?>();
                MatchNoBits bits = new MatchNoBits(indexReader.MaxDoc());
                while ((iterator.Next()) != null)
                {
                    DocsEnum docs = iterator.Docs(Random().NextBoolean() ? bits : new MatchNoBits(indexReader.MaxDoc()), null, Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
                    enums[docs] = true;
                }

                Assert.AreEqual(terms.Size(), enums.Count);
            }
            IOUtils.Close(writer, open, dir);
        }
Пример #14
0
        public override void SetUp()
        {
            base.SetUp();
            _dir = NewDirectory();
            _indexWriter = new RandomIndexWriter(Random(), _dir, new MockAnalyzer(Random()), Similarity, TimeZone);

            FieldType ft = new FieldType(TextField.TYPE_STORED);
            ft.StoreTermVectors = true;
            ft.StoreTermVectorOffsets = true;
            ft.StoreTermVectorPositions = true;

            Analyzer analyzer = new MockAnalyzer(Random());

            Document doc;
            for (int i = 0; i < 100; i++)
            {
                doc = new Document();
                doc.Add(new Field(_idFieldName, Random().toString(), ft));
                doc.Add(new Field(_textFieldName, new StringBuilder(Random().toString()).append(Random().toString()).append(
                    Random().toString()).toString(), ft));
                doc.Add(new Field(_classFieldName, Random().toString(), ft));
                _indexWriter.AddDocument(doc, analyzer);
            }

            _indexWriter.Commit();

            _originalIndex = SlowCompositeReaderWrapper.Wrap(_indexWriter.Reader);
        }
Пример #15
0
        public virtual void TestRollbackIntegrityWithBufferFlush()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter rw = new RandomIndexWriter(Random(), dir);
            for (int i = 0; i < 5; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("pk", Convert.ToString(i), Field.Store.YES));
                rw.AddDocument(doc);
            }
            rw.Dispose();

            // If buffer size is small enough to cause a flush, errors ensue...
            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND));

            for (int i = 0; i < 3; i++)
            {
                Document doc = new Document();
                string value = Convert.ToString(i);
                doc.Add(NewStringField("pk", value, Field.Store.YES));
                doc.Add(NewStringField("text", "foo", Field.Store.YES));
                w.UpdateDocument(new Term("pk", value), doc);
            }
            w.Rollback();

            IndexReader r = DirectoryReader.Open(dir);
            Assert.AreEqual(5, r.NumDocs, "index should contain same number of docs post rollback");
            r.Dispose();
            dir.Dispose();
        }
Пример #16
0
        public override void SetUp()
        {
            base.SetUp();
            Dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));

            Document doc = new Document();
            FieldType customType = new FieldType(TextField.TYPE_STORED);
            customType.OmitNorms = true;
            Field field = NewField("field", "", customType);
            doc.Add(field);

            NumberFormatInfo df = new NumberFormatInfo();
            df.NumberDecimalDigits = 0;

            //NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT));
            for (int i = 0; i < 1000; i++)
            {
                field.StringValue = i.ToString(df);
                writer.AddDocument(doc);
            }

            Reader = writer.Reader;
            writer.Dispose();
            Searcher = NewSearcher(Reader);
        }
Пример #17
0
        public virtual void TestBasic()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document doc = new Document();
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
            Field f = NewField("foo", "this is a test test", ft);
            doc.Add(f);
            for (int i = 0; i < 100; i++)
            {
                w.AddDocument(doc);
            }

            IndexReader reader = w.Reader;
            w.Dispose();

            Assert.IsNull(MultiFields.GetTermPositionsEnum(reader, null, "foo", new BytesRef("test")));

            DocsEnum de = TestUtil.Docs(Random(), reader, "foo", new BytesRef("test"), null, null, DocsEnum.FLAG_FREQS);
            while (de.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                Assert.AreEqual(2, de.Freq());
            }

            reader.Dispose();
            dir.Dispose();
        }
Пример #18
0
        public virtual void TestString()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
            Document doc = new Document();
            doc.Add(NewStringField("value", "foo", Field.Store.YES));
            writer.AddDocument(doc);
            doc = new Document();
            doc.Add(NewStringField("value", "bar", Field.Store.YES));
            writer.AddDocument(doc);
            IndexReader ir = writer.Reader;
            writer.Dispose();

            IndexSearcher searcher = NewSearcher(ir);
            Sort sort = new Sort(new SortField("value", SortField.Type_e.STRING));

            TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
            Assert.AreEqual(2, td.TotalHits);
            // 'bar' comes before 'foo'
            Assert.AreEqual("bar", searcher.Doc(td.ScoreDocs[0].Doc).Get("value"));
            Assert.AreEqual("foo", searcher.Doc(td.ScoreDocs[1].Doc).Get("value"));

            ir.Dispose();
            dir.Dispose();
        }
Пример #19
0
        /// <summary>
        /// populates a writer with random stuff. this must be fully reproducable with the seed!
        /// </summary>
        public static void CreateRandomIndex(int numdocs, RandomIndexWriter writer, long seed)
        {
            Random random = new Random((int)seed);
            // primary source for our data is from linefiledocs, its realistic.
            LineFileDocs lineFileDocs = new LineFileDocs(random);

            // TODO: we should add other fields that use things like docs&freqs but omit positions,
            // because linefiledocs doesn't cover all the possibilities.
            for (int i = 0; i < numdocs; i++)
            {
                Document document = lineFileDocs.NextDoc();
                // grab the title and add some SortedSet instances for fun
                string title = document.Get("titleTokenized");
                string[] split = title.Split("\\s+".ToCharArray());
                foreach (string trash in split)
                {
                    document.Add(new SortedSetDocValuesField("sortedset", new BytesRef(trash)));
                }
                // add a numeric dv field sometimes
                document.RemoveFields("sparsenumeric");
                if (random.Next(4) == 2)
                {
                    document.Add(new NumericDocValuesField("sparsenumeric", random.Next()));
                }
                writer.AddDocument(document);
            }

            lineFileDocs.Dispose();
        }
        public void BeforeClass()
        {
            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone);

            Document doc = new Document();
            Field field = NewStringField(FIELD, "meaninglessnames", Field.Store.NO);
            doc.Add(field);

            for (int i = 0; i < 5137; ++i)
            {
                writer.AddDocument(doc);
            }

            field.StringValue = "tangfulin";
            writer.AddDocument(doc);

            field.StringValue = "meaninglessnames";
            for (int i = 5138; i < 11377; ++i)
            {
                writer.AddDocument(doc);
            }

            field.StringValue = "tangfulin";
            writer.AddDocument(doc);

            Reader = writer.Reader;
            Searcher = NewSearcher(Reader);
            writer.Dispose();
        }
        public virtual void TestDefault()
        {
            Directory indexDir = NewDirectory();
            Directory taxoDir = NewDirectory();

            // create and open an index writer
            var iw = new RandomIndexWriter(Random(), indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)));
            // create and open a taxonomy writer
            var tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
            var config = Config;

            seedIndex(tw, iw, config);

            IndexReader ir = iw.Reader;
            tw.Commit();

            // prepare index reader and taxonomy.
            var tr = new DirectoryTaxonomyReader(taxoDir);

            // prepare searcher to search against
            IndexSearcher searcher = NewSearcher(ir);

            FacetsCollector sfc = PerformSearch(tr, ir, searcher);

            // Obtain facets results and hand-test them
            AssertCorrectResults(GetTaxonomyFacetCounts(tr, config, sfc));

            assertOrdinalsExist("$facets", ir);

            IOUtils.Close(tr, ir, iw, tw, indexDir, taxoDir);
        }
 public override void SetUp()
 {
     base.SetUp();
     dir = NewDirectory();
     IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer
         (Random()));
     iwc.SetMergePolicy(NewLogMergePolicy());
     var iw = new RandomIndexWriter(Random(), dir, iwc);
     var doc = new Document
     {
         NewStringField("id", "1", Field.Store.YES),
         NewTextField("body", "some contents and more contents", Field.Store.NO),
         new NumericDocValuesField("popularity", 5)
     };
     iw.AddDocument(doc);
     doc = new Document
     {
         NewStringField("id", "2", Field.Store.YES),
         NewTextField("body", "another document with different contents", Field.Store
             .NO),
         new NumericDocValuesField("popularity", 20)
     };
     iw.AddDocument(doc);
     doc = new Document
     {
         NewStringField("id", "3", Field.Store.YES),
         NewTextField("body", "crappy contents", Field.Store.NO),
         new NumericDocValuesField("popularity", 2)
     };
     iw.AddDocument(doc);
     iw.ForceMerge(1);
     reader = iw.Reader;
     iw.Dispose();
 }
Пример #23
0
        public void TestFieldNotPresent()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            int num = AtLeast(3);
            int skip = Random().Next(num);
            var terms = new List<Term>();
            for (int i = 0; i < num; i++)
            {
                terms.Add(new Term("field" + i, "content1"));
                Document doc = new Document();
                if (skip == i)
                {
                    continue;
                }
                doc.Add(NewStringField("field" + i, "content1", Field.Store.YES));
                w.AddDocument(doc);
            }

            w.ForceMerge(1);
            IndexReader reader = w.Reader;
            w.Dispose();
            assertEquals(1, reader.Leaves.size());

            AtomicReaderContext context = reader.Leaves.First();
            TermsFilter tf = new TermsFilter(terms);

            FixedBitSet bits = (FixedBitSet)tf.GetDocIdSet(context, context.AtomicReader.LiveDocs);
            assertEquals("Must be num fields - 1 since we skip only one field", num - 1, bits.Cardinality());
            reader.Dispose();
            dir.Dispose();
        }
Пример #24
0
        public virtual void TestEmptyBucketWithMoreDocs()
        {
            // this test checks the logic of nextDoc() when all sub scorers have docs
            // beyond the first bucket (for example). Currently, the code relies on the
            // 'more' variable to work properly, and this test ensures that if the logic
            // changes, we have a test to back it up.

            Directory directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);
            writer.Commit();
            IndexReader ir = writer.Reader;
            writer.Dispose();
            IndexSearcher searcher = NewSearcher(ir);
            BooleanWeight weight = (BooleanWeight)(new BooleanQuery()).CreateWeight(searcher);

            BulkScorer[] scorers = new BulkScorer[] {
            new BulkScorerAnonymousInnerClassHelper()
        };

            BooleanScorer bs = new BooleanScorer(weight, false, 1, Arrays.AsList(scorers), new List<BulkScorer>(), scorers.Length);

            IList<int> hits = new List<int>();
            bs.Score(new CollectorAnonymousInnerClassHelper(this, hits));

            Assert.AreEqual(1, hits.Count, "should have only 1 hit");
            Assert.AreEqual(3000, (int)hits[0], "hit should have been docID=3000");
            ir.Dispose();
            directory.Dispose();
        }
Пример #25
0
        public virtual void TestDocsAndPositionsEnumStart()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
            Document doc = new Document();
            doc.Add(NewTextField("foo", "bar", Field.Store.NO));
            writer.AddDocument(doc);
            DirectoryReader reader = writer.Reader;
            AtomicReader r = GetOnlySegmentReader(reader);
            DocsAndPositionsEnum disi = r.TermPositionsEnum(new Term("foo", "bar"));
            int docid = disi.DocID();
            Assert.AreEqual(-1, docid);
            Assert.IsTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);

            // now reuse and check again
            TermsEnum te = r.Terms("foo").Iterator(null);
            Assert.IsTrue(te.SeekExact(new BytesRef("bar")));
            disi = te.DocsAndPositions(null, disi);
            docid = disi.DocID();
            Assert.AreEqual(-1, docid);
            Assert.IsTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            writer.Dispose();
            r.Dispose();
            dir.Dispose();
        }
Пример #26
0
        public void TestReverse()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document doc = new Document();
            doc.Add(NewStringField("value", "foo", Field.Store.NO));
            doc.Add(NewStringField("value", "bar", Field.Store.NO));
            doc.Add(NewStringField("id", "1", Field.Store.YES));
            writer.AddDocument(doc);
            doc = new Document();
            doc.Add(NewStringField("value", "baz", Field.Store.NO));
            doc.Add(NewStringField("id", "2", Field.Store.YES));
            writer.AddDocument(doc);

            IndexReader ir = writer.Reader;
            writer.Dispose();

            IndexSearcher searcher = NewSearcher(ir);
            Sort sort = new Sort(new SortedSetSortField("value", true));

            TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
            assertEquals(2, td.TotalHits);
            // 'bar' comes before 'baz'
            assertEquals("2", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
            assertEquals("1", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));

            ir.Dispose();
            dir.Dispose();
        }
Пример #27
0
        public static void BeforeClass()
        {
            Directory = NewDirectory();
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, analyzer);

            Documents.Document doc = new Documents.Document();
            doc.Add(NewTextField("field", "one two three four five", Field.Store.YES));
            doc.Add(NewTextField("repeated", "this is a repeated field - first part", Field.Store.YES));
            IndexableField repeatedField = NewTextField("repeated", "second part of a repeated field", Field.Store.YES);
            doc.Add(repeatedField);
            doc.Add(NewTextField("palindrome", "one two three two one", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Documents.Document();
            doc.Add(NewTextField("nonexist", "phrase exist notexist exist found", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Documents.Document();
            doc.Add(NewTextField("nonexist", "phrase exist notexist exist found", Field.Store.YES));
            writer.AddDocument(doc);

            Reader = writer.Reader;
            writer.Dispose();

            Searcher = NewSearcher(Reader);
        }
Пример #28
0
        public static void BeforeClass()
        {
            Dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), Dir);
            int numDocs = AtLeast(300);
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();

                AddSome(doc, AlwaysTerms);

                if (Random().Next(100) < 90)
                {
                    AddSome(doc, CommonTerms);
                }
                if (Random().Next(100) < 50)
                {
                    AddSome(doc, MediumTerms);
                }
                if (Random().Next(100) < 10)
                {
                    AddSome(doc, RareTerms);
                }
                iw.AddDocument(doc);
            }
            iw.ForceMerge(1);
            iw.Dispose();
            r = DirectoryReader.Open(Dir);
            atomicReader = GetOnlySegmentReader(r);
            Searcher = new IndexSearcher(atomicReader);
            Searcher.Similarity = new DefaultSimilarityAnonymousInnerClassHelper();
        }
Пример #29
0
 // TODO: create a testNormsNotPresent ourselves by adding/deleting/merging docs
 public virtual void BuildIndex(Directory dir)
 {
     Random random = Random();
     MockAnalyzer analyzer = new MockAnalyzer(Random());
     analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
     IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
     Similarity provider = new MySimProvider(this);
     config.SetSimilarity(provider);
     RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
     LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues());
     int num = AtLeast(100);
     for (int i = 0; i < num; i++)
     {
         Document doc = docs.NextDoc();
         int boost = Random().Next(255);
         Field f = new TextField(ByteTestField, "" + boost, Field.Store.YES);
         f.Boost = boost;
         doc.Add(f);
         writer.AddDocument(doc);
         doc.RemoveField(ByteTestField);
         if (Rarely())
         {
             writer.Commit();
         }
     }
     writer.Commit();
     writer.Dispose();
     docs.Dispose();
 }
Пример #30
0
        public override void SetUp()
        {
            base.SetUp();
            Dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));

            Document doc = new Document();
            Field field = NewStringField("field", "", Field.Store.NO);
            doc.Add(field);

            NumberFormatInfo df = new NumberFormatInfo();
            df.NumberDecimalDigits = 0;

            //NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT));
            for (int i = 0; i < 1000; i++)
            {
                field.StringValue = i.ToString(df);
                writer.AddDocument(doc);
            }

            Reader = writer.Reader;
            Searcher = NewSearcher(Reader);
            writer.Dispose();
            if (VERBOSE)
            {
                Console.WriteLine("TEST: setUp searcher=" + Searcher);
            }
        }
        public virtual void TestRandom()
        {
            string[]        tokens   = GetRandomTokens(10);
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir  = NewDirectory();

            RandomIndexWriter w      = new RandomIndexWriter(Random(), indexDir, Similarity, TimeZone);
            var             tw       = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig    config   = new FacetsConfig();
            int             numDocs  = AtLeast(1000);
            int             numDims  = TestUtil.NextInt(Random(), 1, 7);
            IList <TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims);

            foreach (TestDoc testDoc in testDocs)
            {
                Document doc = new Document();
                doc.Add(NewStringField("content", testDoc.content, Field.Store.NO));
                testDoc.value = Random().NextFloat();
                doc.Add(new SingleDocValuesField("value", testDoc.value));
                for (int j = 0; j < numDims; j++)
                {
                    if (testDoc.dims[j] != null)
                    {
                        doc.Add(new FacetField("dim" + j, testDoc.dims[j]));
                    }
                }
                w.AddDocument(config.Build(tw, doc));
            }

            // NRT open
            IndexSearcher searcher = NewSearcher(w.Reader);

            // NRT open
            var tr = new DirectoryTaxonomyReader(tw);

            ValueSource values = new SingleFieldSource("value");

            int iters = AtLeast(100);

            for (int iter = 0; iter < iters; iter++)
            {
                string searchToken = tokens[Random().Next(tokens.Length)];
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter content=" + searchToken);
                }
                FacetsCollector fc = new FacetsCollector();
                FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc);
                Facets facets = new TaxonomyFacetSumValueSource(tr, config, fc, values);

                // Slow, yet hopefully bug-free, faceting:
                var expectedValues = new List <Dictionary <string, float?> >(numDims);
                for (int i = 0; i < numDims; i++)
                {
                    expectedValues.Add(new Dictionary <string, float?>());
                }

                foreach (TestDoc doc in testDocs)
                {
                    if (doc.content.Equals(searchToken, StringComparison.Ordinal))
                    {
                        for (int j = 0; j < numDims; j++)
                        {
                            if (doc.dims[j] != null)
                            {
                                float?v = expectedValues[j].ContainsKey(doc.dims[j]) ? expectedValues[j][doc.dims[j]] : null;
                                if (v == null)
                                {
                                    expectedValues[j][doc.dims[j]] = doc.value;
                                }
                                else
                                {
                                    expectedValues[j][doc.dims[j]] = (float)v + doc.value;
                                }
                            }
                        }
                    }
                }

                List <FacetResult> expected = new List <FacetResult>();
                for (int i = 0; i < numDims; i++)
                {
                    List <LabelAndValue> labelValues = new List <LabelAndValue>();
                    float totValue = 0;
                    foreach (KeyValuePair <string, float?> ent in expectedValues[i])
                    {
                        labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value));
                        totValue += ent.Value.Value;
                    }
                    SortLabelValues(labelValues);
                    if (totValue > 0)
                    {
                        expected.Add(new FacetResult("dim" + i, new string[0], totValue, labelValues.ToArray(), labelValues.Count));
                    }
                }

                // Sort by highest value, tie break by value:
                SortFacetResults(expected);

                IList <FacetResult> actual = facets.GetAllDims(10);

                // Messy: fixup ties
                SortTies(actual);

                if (VERBOSE)
                {
                    Console.WriteLine("expected=\n" + expected.ToString());
                    Console.WriteLine("actual=\n" + actual.ToString());
                }

                AssertFloatValuesEquals(expected, actual);
            }

            IOUtils.Dispose(w, tw, searcher.IndexReader, tr, indexDir, taxoDir);
        }
        public virtual void TestSparseFacets()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            FacetsConfig      config = new FacetsConfig();

            Document doc = new Document();

            doc.Add(new Int32Field("num", 10, Field.Store.NO));
            doc.Add(new FacetField("a", "foo1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            if (Random().NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new Int32Field("num", 20, Field.Store.NO));
            doc.Add(new FacetField("a", "foo2"));
            doc.Add(new FacetField("b", "bar1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            if (Random().NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new Int32Field("num", 30, Field.Store.NO));
            doc.Add(new FacetField("a", "foo3"));
            doc.Add(new FacetField("b", "bar2"));
            doc.Add(new FacetField("c", "baz1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            writer.Dispose();

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            taxoWriter.Dispose();

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new Int32FieldSource("num"));

            // Ask for top 10 labels for any dims that have counts:
            IList <FacetResult> results = facets.GetAllDims(10);

            Assert.AreEqual(3, results.Count);
            Assert.AreEqual("dim=a path=[] value=60.0 childCount=3\n  foo3 (30.0)\n  foo2 (20.0)\n  foo1 (10.0)\n", results[0].ToString());
            Assert.AreEqual("dim=b path=[] value=50.0 childCount=2\n  bar2 (30.0)\n  bar1 (20.0)\n", results[1].ToString());
            Assert.AreEqual("dim=c path=[] value=30.0 childCount=1\n  baz1 (30.0)\n", results[2].ToString());

            IOUtils.Dispose(searcher.IndexReader, taxoReader, dir, taxoDir);
        }
        public virtual void TestBasic()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            FacetsConfig      config = new FacetsConfig();

            // Reused across documents, to add the necessary facet
            // fields:
            Document doc = new Document();

            doc.Add(new Int32Field("num", 10, Field.Store.NO));
            doc.Add(new FacetField("Author", "Bob"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new Int32Field("num", 20, Field.Store.NO));
            doc.Add(new FacetField("Author", "Lisa"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new Int32Field("num", 30, Field.Store.NO));
            doc.Add(new FacetField("Author", "Lisa"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new Int32Field("num", 40, Field.Store.NO));
            doc.Add(new FacetField("Author", "Susan"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new Int32Field("num", 45, Field.Store.NO));
            doc.Add(new FacetField("Author", "Frank"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            writer.Dispose();

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            taxoWriter.Dispose();

            // Aggregate the facet counts:
            FacetsCollector c = new FacetsCollector();

            // MatchAllDocsQuery is for "browsing" (counts facets
            // for all non-deleted docs in the index); normally
            // you'd use a "normal" query and one of the
            // Facets.search utility methods:
            searcher.Search(new MatchAllDocsQuery(), c);

            TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new Int32FieldSource("num"));

            // Retrieve & verify results:
            Assert.AreEqual("dim=Author path=[] value=145.0 childCount=4\n  Lisa (50.0)\n  Frank (45.0)\n  Susan (40.0)\n  Bob (10.0)\n", facets.GetTopChildren(10, "Author").ToString());

            taxoReader.Dispose();
            searcher.IndexReader.Dispose();
            dir.Dispose();
            taxoDir.Dispose();
        }
Пример #34
0
        public void TestSimpleExamples()
        {
            DirectSpellChecker spellChecker = new DirectSpellChecker();

            spellChecker.MinQueryLength = (0);
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir,
                                                             new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true), Similarity, TimeZone);

            for (int i = 0; i < 20; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("numbers", English.IntToEnglish(i), Field.Store.NO));
                writer.AddDocument(doc);
            }

            IndexReader ir = writer.Reader;

            SuggestWord[] similar = spellChecker.SuggestSimilar(new Term("numbers",
                                                                         "fvie"), 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertTrue(similar.Length > 0);
            assertEquals("five", similar[0].String);

            similar = spellChecker.SuggestSimilar(new Term("numbers", "five"), 2, ir,
                                                  SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            if (similar.Length > 0)
            {
                assertFalse(similar[0].String.equals("five")); // don't suggest a word for itself
            }

            similar = spellChecker.SuggestSimilar(new Term("numbers", "fvie"), 2, ir,
                                                  SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertTrue(similar.Length > 0);
            assertEquals("five", similar[0].String);

            similar = spellChecker.SuggestSimilar(new Term("numbers", "fiv"), 2, ir,
                                                  SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertTrue(similar.Length > 0);
            assertEquals("five", similar[0].String);

            similar = spellChecker.SuggestSimilar(new Term("numbers", "fives"), 2, ir,
                                                  SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertTrue(similar.Length > 0);
            assertEquals("five", similar[0].String);

            assertTrue(similar.Length > 0);
            similar = spellChecker.SuggestSimilar(new Term("numbers", "fie"), 2, ir,
                                                  SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertEquals("five", similar[0].String);

            // add some more documents
            for (int i = 1000; i < 1100; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("numbers", English.IntToEnglish(i), Field.Store.NO));
                writer.AddDocument(doc);
            }

            ir.Dispose();
            ir = writer.Reader;

            // look ma, no spellcheck index rebuild
            similar = spellChecker.SuggestSimilar(new Term("numbers", "tousand"), 10,
                                                  ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertTrue(similar.Length > 0);
            assertEquals("thousand", similar[0].String);

            ir.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
        public virtual void TestWrongIndexFieldName()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            FacetsConfig config = new FacetsConfig();

            config.SetIndexFieldName("a", "$facets2");

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Document doc = new Document();

            doc.Add(new Int32Field("num", 10, Field.Store.NO));
            doc.Add(new FacetField("a", "foo1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            writer.Dispose();

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            taxoWriter.Dispose();

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, config, c, new Int32FieldSource("num"));

            // Ask for top 10 labels for any dims that have counts:
            IList <FacetResult> results = facets.GetAllDims(10);

            Assert.True(results.Count == 0);

            try
            {
                facets.GetSpecificValue("a");
                Fail("should have hit exc");
            }
            catch (System.ArgumentException)
            {
                // expected
            }

            try
            {
                facets.GetTopChildren(10, "a");
                Fail("should have hit exc");
            }
            catch (System.ArgumentException)
            {
                // expected
            }

            IOUtils.Dispose(searcher.IndexReader, taxoReader, dir, taxoDir);
        }
Пример #36
0
        public void TestBasics()
        {
            Directory         dir      = NewDirectory();
            MockAnalyzer      analyzer = new MockAnalyzer(Random());
            RandomIndexWriter w        = new RandomIndexWriter(Random(), dir, analyzer, Similarity, TimeZone);
            var docs = new string[]
            {
                @"this is the end of the world right", @"is this it or maybe not",
                @"this is the end of the universe as we know it",
                @"there is the famous restaurant at the end of the universe"
            };

            for (int i = 0; i < docs.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField(@"id", @"" + i, Field.Store.YES));
                doc.Add(NewTextField(@"field", docs[i], Field.Store.NO));
                w.AddDocument(doc);
            }

            IndexReader   r = w.Reader;
            IndexSearcher s = NewSearcher(r);

            {
                CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
                assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, Random().NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 2);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.MUST, Random().NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 1);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.MUST, Random().NextBoolean() ? 2F : 0.5F);
                query.Add(new Term("field", "restaurant"));
                query.Add(new Term("field", "universe"));
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 1);
                assertEquals(@"3", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
            }

            r.Dispose();
            w.Dispose();
            dir.Dispose();
        }
Пример #37
0
        public virtual void TestPhrasePrefix()
        {
            Directory         indexStore = NewDirectory();
            RandomIndexWriter writer     = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, indexStore);
            Document doc1 = new Document();
            Document doc2 = new Document();
            Document doc3 = new Document();
            Document doc4 = new Document();
            Document doc5 = new Document();

            doc1.Add(NewTextField("body", "blueberry pie", Field.Store.YES));
            doc2.Add(NewTextField("body", "blueberry strudel", Field.Store.YES));
            doc3.Add(NewTextField("body", "blueberry pizza", Field.Store.YES));
            doc4.Add(NewTextField("body", "blueberry chewing gum", Field.Store.YES));
            doc5.Add(NewTextField("body", "piccadilly circus", Field.Store.YES));
            writer.AddDocument(doc1);
            writer.AddDocument(doc2);
            writer.AddDocument(doc3);
            writer.AddDocument(doc4);
            writer.AddDocument(doc5);
            IndexReader reader = writer.GetReader();

            writer.Dispose();

            IndexSearcher searcher = NewSearcher(reader);

            // PhrasePrefixQuery query1 = new PhrasePrefixQuery();
            MultiPhraseQuery query1 = new MultiPhraseQuery();
            // PhrasePrefixQuery query2 = new PhrasePrefixQuery();
            MultiPhraseQuery query2 = new MultiPhraseQuery();

            query1.Add(new Term("body", "blueberry"));
            query2.Add(new Term("body", "strawberry"));

            LinkedList <Term> termsWithPrefix = new LinkedList <Term>();

            // this TermEnum gives "piccadilly", "pie" and "pizza".
            string    prefix = "pi";
            TermsEnum te     = MultiFields.GetFields(reader).GetTerms("body").GetIterator(null);

            te.SeekCeil(new BytesRef(prefix));
            do
            {
                string s = te.Term.Utf8ToString();
                if (s.StartsWith(prefix, StringComparison.Ordinal))
                {
                    termsWithPrefix.AddLast(new Term("body", s));
                }
                else
                {
                    break;
                }
            } while (te.Next() != null);

            query1.Add(termsWithPrefix.ToArray(/*new Term[0]*/));
            query2.Add(termsWithPrefix.ToArray(/*new Term[0]*/));

            ScoreDoc[] result;
            result = searcher.Search(query1, null, 1000).ScoreDocs;
            Assert.AreEqual(2, result.Length);

            result = searcher.Search(query2, null, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);
            reader.Dispose();
            indexStore.Dispose();
        }
        public void TestWithDeletions()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer            = new RandomIndexWriter(Random, dir, iwc);
            IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100));
            Random        rand       = Random;
            List <string> termsToDel = new List <string>();

            foreach (Document doc in docs.Values)
            {
                if (rand.nextBoolean() && termsToDel.size() < docs.size() - 1)
                {
                    termsToDel.Add(doc.Get(FIELD_NAME));
                }
                writer.AddDocument(doc);
            }
            writer.Commit();

            Term[] delTerms = new Term[termsToDel.size()];
            for (int i = 0; i < termsToDel.size(); i++)
            {
                delTerms[i] = new Term(FIELD_NAME, termsToDel[i]);
            }

            foreach (Term delTerm in delTerms)
            {
                writer.DeleteDocuments(delTerm);
            }
            writer.Commit();
            writer.Dispose();

            foreach (string termToDel in termsToDel)
            {
                var toDel = docs[termToDel];
                docs.Remove(termToDel);
                assertTrue(null != toDel);
            }

            IndexReader ir = DirectoryReader.Open(dir);

            assertTrue("NumDocs should be > 0 but was " + ir.NumDocs, ir.NumDocs > 0);
            assertEquals(ir.NumDocs, docs.size());
            ValueSource[] toAdd = new ValueSource[] { new Int64FieldSource(WEIGHT_FIELD_NAME_1), new Int64FieldSource(WEIGHT_FIELD_NAME_2) };

            IDictionary    dictionary    = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumSingleFunction(toAdd), PAYLOAD_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                string   field = f.Utf8ToString();
                Document doc   = docs[field];
                docs.Remove(field);
                long w1 = doc.GetField(WEIGHT_FIELD_NAME_1).GetInt64ValueOrDefault();
                long w2 = doc.GetField(WEIGHT_FIELD_NAME_2).GetInt64ValueOrDefault();
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                assertEquals(inputIterator.Weight, w2 + w1);
                assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue()));
            }
            assertTrue(!docs.Any());
            ir.Dispose();
            dir.Dispose();
        }
Пример #39
0
        public void TestOptions()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir,
                                                             new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true), Similarity, TimeZone);

            Document doc = new Document();

            doc.Add(NewTextField("text", "foobar", Field.Store.NO));
            writer.AddDocument(doc);
            doc.Add(NewTextField("text", "foobar", Field.Store.NO));
            writer.AddDocument(doc);
            doc.Add(NewTextField("text", "foobaz", Field.Store.NO));
            writer.AddDocument(doc);
            doc.Add(NewTextField("text", "fobar", Field.Store.NO));
            writer.AddDocument(doc);

            IndexReader ir = writer.Reader;

            DirectSpellChecker spellChecker = new DirectSpellChecker();

            spellChecker.MaxQueryFrequency = (0F);
            SuggestWord[] similar = spellChecker.SuggestSimilar(new Term("text",
                                                                         "fobar"), 1, ir, SuggestMode.SUGGEST_MORE_POPULAR);
            assertEquals(0, similar.Length);

            spellChecker = new DirectSpellChecker(); // reset defaults
            spellChecker.MinQueryLength = (5);
            similar = spellChecker.SuggestSimilar(new Term("text", "foba"), 1, ir,
                                                  SuggestMode.SUGGEST_MORE_POPULAR);
            assertEquals(0, similar.Length);

            spellChecker          = new DirectSpellChecker(); // reset defaults
            spellChecker.MaxEdits = (1);
            similar = spellChecker.SuggestSimilar(new Term("text", "foobazzz"), 1, ir,
                                                  SuggestMode.SUGGEST_MORE_POPULAR);
            assertEquals(0, similar.Length);

            spellChecker          = new DirectSpellChecker(); // reset defaults
            spellChecker.Accuracy = (0.9F);
            similar = spellChecker.SuggestSimilar(new Term("text", "foobazzz"), 1, ir,
                                                  SuggestMode.SUGGEST_MORE_POPULAR);
            assertEquals(0, similar.Length);

            spellChecker           = new DirectSpellChecker(); // reset defaults
            spellChecker.MinPrefix = (0);
            similar = spellChecker.SuggestSimilar(new Term("text", "roobaz"), 1, ir,
                                                  SuggestMode.SUGGEST_MORE_POPULAR);
            assertEquals(1, similar.Length);
            similar = spellChecker.SuggestSimilar(new Term("text", "roobaz"), 1, ir,
                                                  SuggestMode.SUGGEST_MORE_POPULAR);

            spellChecker           = new DirectSpellChecker(); // reset defaults
            spellChecker.MinPrefix = (1);
            similar = spellChecker.SuggestSimilar(new Term("text", "roobaz"), 1, ir,
                                                  SuggestMode.SUGGEST_MORE_POPULAR);
            assertEquals(0, similar.Length);

            spellChecker          = new DirectSpellChecker(); // reset defaults
            spellChecker.MaxEdits = (2);
            similar = spellChecker.SuggestSimilar(new Term("text", "fobar"), 2, ir,
                                                  SuggestMode.SUGGEST_ALWAYS);
            assertEquals(2, similar.Length);

            ir.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
Пример #40
0
        public virtual void Test()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            long startTime = Environment.TickCount;

            // TODO: replace w/ the @nightly test data; make this
            // into an optional @nightly stress test
            Document doc  = new Document();
            Field    body = NewTextField("body", "", Field.Store.NO);

            doc.Add(body);
            StringBuilder sb = new StringBuilder();

            for (int docCount = 0; docCount < NUM_DOCS; docCount++)
            {
                int numTerms = Random().Next(10);
                for (int termCount = 0; termCount < numTerms; termCount++)
                {
                    sb.Append(Random().NextBoolean() ? "aaa" : "bbb");
                    sb.Append(' ');
                }
                body.StringValue = sb.ToString();
                w.AddDocument(doc);
                sb.Remove(0, sb.Length);
            }
            IndexReader r = w.Reader;

            w.Dispose();

            long endTime = Environment.TickCount;

            if (VERBOSE)
            {
                Console.WriteLine("BUILD took " + (endTime - startTime));
            }

            IndexSearcher s = NewSearcher(r);

            AtomicBoolean failed    = new AtomicBoolean();
            AtomicLong    netSearch = new AtomicLong();

            ThreadClass[] threads = new ThreadClass[NUM_SEARCH_THREADS];
            for (int threadID = 0; threadID < NUM_SEARCH_THREADS; threadID++)
            {
                threads[threadID] = new ThreadAnonymousInnerClassHelper(this, s, failed, netSearch);
                threads[threadID].SetDaemon(true);
            }

            foreach (ThreadClass t in threads)
            {
                t.Start();
            }

            foreach (ThreadClass t in threads)
            {
                t.Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine(NUM_SEARCH_THREADS + " threads did " + netSearch.Get() + " searches");
            }

            r.Dispose();
            dir.Dispose();
        }
Пример #41
0
        public void BeforeClassDrillDownQueryTest()
        {
            dir = NewDirectory();
            Random            r      = Random();
            RandomIndexWriter writer = new RandomIndexWriter(r, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(r, MockTokenizer.KEYWORD, false)));

            taxoDir = NewDirectory();
            TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

            config = new FacetsConfig();

            // Randomize the per-dim config:
            config.SetHierarchical("a", Random().NextBoolean());
            config.SetMultiValued("a", Random().NextBoolean());
            if (Random().NextBoolean())
            {
                config.SetIndexFieldName("a", "$a");
            }
            config.SetRequireDimCount("a", true);

            config.SetHierarchical("b", Random().NextBoolean());
            config.SetMultiValued("b", Random().NextBoolean());
            if (Random().NextBoolean())
            {
                config.SetIndexFieldName("b", "$b");
            }
            config.SetRequireDimCount("b", true);

            for (int i = 0; i < 100; i++)
            {
                Document doc = new Document();
                if (i % 2 == 0) // 50
                {
                    doc.Add(new TextField("content", "foo", Field.Store.NO));
                }
                if (i % 3 == 0) // 33
                {
                    doc.Add(new TextField("content", "bar", Field.Store.NO));
                }
                if (i % 4 == 0) // 25
                {
                    if (r.NextBoolean())
                    {
                        doc.Add(new FacetField("a", "1"));
                    }
                    else
                    {
                        doc.Add(new FacetField("a", "2"));
                    }
                }
                if (i % 5 == 0) // 20
                {
                    doc.Add(new FacetField("b", "1"));
                }
                writer.AddDocument(config.Build(taxoWriter, doc));
            }

            taxoWriter.Dispose();
            reader = writer.Reader;
            writer.Dispose();

            taxo = new DirectoryTaxonomyReader(taxoDir);
        }