RandomIndexWriter.AddDocument C# (CSharp) Code-Beispiele

Beispiel #1

0

Datei anzeigen

Datei: TestExpressionRescorer.cs Projekt: ChristopherHaws/lucenenet

		public override void SetUp()
		{
			base.SetUp();
			dir = NewDirectory();
			var iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
			var doc = new Document
			{
			    NewStringField("id", "1", Field.Store.YES),
			    NewTextField("body", "some contents and more contents", Field.Store.NO),
			    new NumericDocValuesField("popularity", 5)
			};
		    iw.AddDocument(doc);
			doc = new Document
			{
			    NewStringField("id", "2", Field.Store.YES),
			    NewTextField("body", "another document with different contents", Field.Store
			        .NO),
			    new NumericDocValuesField("popularity", 20)
			};
		    iw.AddDocument(doc);
			doc = new Document
			{
			    NewStringField("id", "3", Field.Store.YES),
			    NewTextField("body", "crappy contents", Field.Store.NO),
			    new NumericDocValuesField("popularity", 2)
			};
		    iw.AddDocument(doc);
			reader = iw.Reader;
			searcher = new IndexSearcher(reader);
			iw.Dispose();
		}

Beispiel #2

0

Datei anzeigen

Datei: TestSortedSetSortField.cs Projekt: apache/lucenenet

        public void TestReverse()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document doc = new Document();
            doc.Add(NewStringField("value", "foo", Field.Store.NO));
            doc.Add(NewStringField("value", "bar", Field.Store.NO));
            doc.Add(NewStringField("id", "1", Field.Store.YES));
            writer.AddDocument(doc);
            doc = new Document();
            doc.Add(NewStringField("value", "baz", Field.Store.NO));
            doc.Add(NewStringField("id", "2", Field.Store.YES));
            writer.AddDocument(doc);

            IndexReader ir = writer.Reader;
            writer.Dispose();

            IndexSearcher searcher = NewSearcher(ir);
            Sort sort = new Sort(new SortedSetSortField("value", true));

            TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
            assertEquals(2, td.TotalHits);
            // 'bar' comes before 'baz'
            assertEquals("2", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
            assertEquals("1", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));

            ir.Dispose();
            dir.Dispose();
        }

Beispiel #3

0

Datei anzeigen

Datei: TestSortedSetSortFieldSelectors.cs Projekt: apache/lucenenet

        public void TestMax()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document doc = new Document();
            doc.Add(new SortedSetDocValuesField("value", new BytesRef("foo")));
            doc.Add(new SortedSetDocValuesField("value", new BytesRef("bar")));
            doc.Add(NewStringField("id", "1", Field.Store.YES));
            writer.AddDocument(doc);
            doc = new Document();
            doc.Add(new SortedSetDocValuesField("value", new BytesRef("baz")));
            doc.Add(NewStringField("id", "2", Field.Store.YES));
            writer.AddDocument(doc);
            IndexReader ir = writer.Reader;
            writer.Dispose();

            // slow wrapper does not support random access ordinals (there is no need for that!)
            IndexSearcher searcher = NewSearcher(ir, false);

            Sort sort = new Sort(new SortedSetSortField("value", false, Selector.MAX));

            TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
            assertEquals(2, td.TotalHits);
            // 'baz' comes before 'foo'
            assertEquals("2", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
            assertEquals("1", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
            assertNoFieldCaches();

            ir.Dispose();
            dir.Dispose();
        }

Beispiel #4

0

Datei anzeigen

Datei: TestSort.cs Projekt: joyanta/lucene.net

        public virtual void TestString()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
            Document doc = new Document();
            doc.Add(NewStringField("value", "foo", Field.Store.YES));
            writer.AddDocument(doc);
            doc = new Document();
            doc.Add(NewStringField("value", "bar", Field.Store.YES));
            writer.AddDocument(doc);
            IndexReader ir = writer.Reader;
            writer.Dispose();

            IndexSearcher searcher = NewSearcher(ir);
            Sort sort = new Sort(new SortField("value", SortField.Type_e.STRING));

            TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
            Assert.AreEqual(2, td.TotalHits);
            // 'bar' comes before 'foo'
            Assert.AreEqual("bar", searcher.Doc(td.ScoreDocs[0].Doc).Get("value"));
            Assert.AreEqual("foo", searcher.Doc(td.ScoreDocs[1].Doc).Get("value"));

            ir.Dispose();
            dir.Dispose();
        }

Beispiel #5

0

Datei anzeigen

Datei: TestPrefixInBooleanQuery.cs Projekt: ChristopherHaws/lucenenet

        public void BeforeClass()
        {
            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone);

            Document doc = new Document();
            Field field = NewStringField(FIELD, "meaninglessnames", Field.Store.NO);
            doc.Add(field);

            for (int i = 0; i < 5137; ++i)
            {
                writer.AddDocument(doc);
            }

            field.StringValue = "tangfulin";
            writer.AddDocument(doc);

            field.StringValue = "meaninglessnames";
            for (int i = 5138; i < 11377; ++i)
            {
                writer.AddDocument(doc);
            }

            field.StringValue = "tangfulin";
            writer.AddDocument(doc);

            Reader = writer.Reader;
            Searcher = NewSearcher(Reader);
            writer.Dispose();
        }

Beispiel #6

0

Datei anzeigen

Datei: TestExpressionValueSource.cs Projekt: WakeflyCBass/lucenenet

 public override void SetUp()
 {
     base.SetUp();
     dir = NewDirectory();
     IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer
         (Random()));
     iwc.SetMergePolicy(NewLogMergePolicy());
     var iw = new RandomIndexWriter(Random(), dir, iwc);
     var doc = new Document
     {
         NewStringField("id", "1", Field.Store.YES),
         NewTextField("body", "some contents and more contents", Field.Store.NO),
         new NumericDocValuesField("popularity", 5)
     };
     iw.AddDocument(doc);
     doc = new Document
     {
         NewStringField("id", "2", Field.Store.YES),
         NewTextField("body", "another document with different contents", Field.Store
             .NO),
         new NumericDocValuesField("popularity", 20)
     };
     iw.AddDocument(doc);
     doc = new Document
     {
         NewStringField("id", "3", Field.Store.YES),
         NewTextField("body", "crappy contents", Field.Store.NO),
         new NumericDocValuesField("popularity", 2)
     };
     iw.AddDocument(doc);
     iw.ForceMerge(1);
     reader = iw.Reader;
     iw.Dispose();
 }

Beispiel #7

0

Datei anzeigen

Datei: TestWordBreakSpellChecker.cs Projekt: ChristopherHaws/lucenenet

        public override void SetUp()
        {
            base.SetUp();
            dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, true), Similarity, TimeZone);

            for (int i = 900; i < 1112; i++)
            {
                Document doc = new Document();
                string num = Regex.Replace(Regex.Replace(English.IntToEnglish(i), "[-]", " "), "[,]", "");
                doc.Add(NewTextField("numbers", num, Field.Store.NO));
                writer.AddDocument(doc);
            }

            {
                Document doc = new Document();
                doc.Add(NewTextField("numbers", "thou hast sand betwixt thy toes", Field.Store.NO));
                writer.AddDocument(doc);
            }
            {
                Document doc = new Document();
                doc.Add(NewTextField("numbers", "hundredeight eightyeight yeight", Field.Store.NO));
                writer.AddDocument(doc);
            }
            {
                Document doc = new Document();
                doc.Add(NewTextField("numbers", "tres y cinco", Field.Store.NO));
                writer.AddDocument(doc);
            }

            writer.Commit();
            writer.Dispose();
        }

Beispiel #8

0

Datei anzeigen

Datei: TestSortedSetDocValuesFacets.cs Projekt: ChristopherHaws/lucenenet

        public virtual void TestBasic()
        {

            AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet());
            Directory dir = NewDirectory();

            FacetsConfig config = new FacetsConfig();
            config.SetMultiValued("a", true);
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Document doc = new Document();
            doc.Add(new SortedSetDocValuesFacetField("a", "foo"));
            doc.Add(new SortedSetDocValuesFacetField("a", "bar"));
            doc.Add(new SortedSetDocValuesFacetField("a", "zoo"));
            doc.Add(new SortedSetDocValuesFacetField("b", "baz"));
            writer.AddDocument(config.Build(doc));
            if (Random().NextBoolean())
            {
                writer.Commit();
            }

            doc = new Document();
            doc.Add(new SortedSetDocValuesFacetField("a", "foo"));
            writer.AddDocument(config.Build(doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.Reader);

            // Per-top-reader state:
            SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader);

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            SortedSetDocValuesFacetCounts facets = new SortedSetDocValuesFacetCounts(state, c);

            Assert.AreEqual("dim=a path=[] value=4 childCount=3\n  foo (2)\n  bar (1)\n  zoo (1)\n", facets.GetTopChildren(10, "a").ToString());
            Assert.AreEqual("dim=b path=[] value=1 childCount=1\n  baz (1)\n", facets.GetTopChildren(10, "b").ToString());

            // DrillDown:
            DrillDownQuery q = new DrillDownQuery(config);
            q.Add("a", "foo");
            q.Add("b", "baz");
            TopDocs hits = searcher.Search(q, 1);
            Assert.AreEqual(1, hits.TotalHits);

            IOUtils.Close(writer, searcher.IndexReader, dir);
        }

Beispiel #9

0

Datei anzeigen

Datei: TestOmitPositions.cs Projekt: ChristopherHaws/lucenenet

        public virtual void TestBasic()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document doc = new Document();
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
            Field f = NewField("foo", "this is a test test", ft);
            doc.Add(f);
            for (int i = 0; i < 100; i++)
            {
                w.AddDocument(doc);
            }

            IndexReader reader = w.Reader;
            w.Dispose();

            Assert.IsNull(MultiFields.GetTermPositionsEnum(reader, null, "foo", new BytesRef("test")));

            DocsEnum de = TestUtil.Docs(Random(), reader, "foo", new BytesRef("test"), null, null, DocsEnum.FLAG_FREQS);
            while (de.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                Assert.AreEqual(2, de.Freq());
            }

            reader.Dispose();
            dir.Dispose();
        }

Beispiel #10

0

Datei anzeigen

Datei: TermsFilterTest.cs Projekt: ChristopherHaws/lucenenet

        public void TestFieldNotPresent()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            int num = AtLeast(3);
            int skip = Random().Next(num);
            var terms = new List<Term>();
            for (int i = 0; i < num; i++)
            {
                terms.Add(new Term("field" + i, "content1"));
                Document doc = new Document();
                if (skip == i)
                {
                    continue;
                }
                doc.Add(NewStringField("field" + i, "content1", Field.Store.YES));
                w.AddDocument(doc);
            }

            w.ForceMerge(1);
            IndexReader reader = w.Reader;
            w.Dispose();
            assertEquals(1, reader.Leaves.size());

            AtomicReaderContext context = reader.Leaves.First();
            TermsFilter tf = new TermsFilter(terms);

            FixedBitSet bits = (FixedBitSet)tf.GetDocIdSet(context, context.AtomicReader.LiveDocs);
            assertEquals("Must be num fields - 1 since we skip only one field", num - 1, bits.Cardinality());
            reader.Dispose();
            dir.Dispose();
        }

Beispiel #11

0

Datei anzeigen

Datei: TestEarlyTermination.cs Projekt: ChristopherHaws/lucenenet

 private void CreateRandomIndexes(int maxSegments)
 {
     dir = NewDirectory();
     numDocs = AtLeast(150);
     int numTerms = TestUtil.NextInt(Random(), 1, numDocs / 5);
     ISet<string> randomTerms = new HashSet<string>();
     while (randomTerms.size() < numTerms)
     {
         randomTerms.add(TestUtil.RandomSimpleString(Random()));
     }
     terms = new List<string>(randomTerms);
     int seed = Random().Next();
     IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
     iwc.SetMergePolicy(TestSortingMergePolicy.NewSortingMergePolicy(sort));
     iw = new RandomIndexWriter(new Random(seed), dir, iwc);
     for (int i = 0; i < numDocs; ++i)
     {
         Document doc = RandomDocument();
         iw.AddDocument(doc);
         if (i == numDocs / 2 || (i != numDocs - 1 && Random().nextInt(8) == 0))
         {
             iw.Commit();
         }
         if (Random().nextInt(15) == 0)
         {
             string term = RandomInts.RandomFrom(Random(), terms);
             iw.DeleteDocuments(new Term("s", term));
         }
     }
     reader = iw.Reader;
 }

Beispiel #12

0

Datei anzeigen

Datei: TestPrefixQuery.cs Projekt: ChristopherHaws/lucenenet

        public virtual void TestPrefixQuery_Mem()
        {
            Directory directory = NewDirectory();

            string[] categories = new string[] { "/Computers", "/Computers/Mac", "/Computers/Windows" };
            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);
            for (int i = 0; i < categories.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("category", categories[i], Field.Store.YES));
                writer.AddDocument(doc);
            }
            IndexReader reader = writer.Reader;

            PrefixQuery query = new PrefixQuery(new Term("category", "/Computers"));
            IndexSearcher searcher = NewSearcher(reader);
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length, "All documents in /Computers category and below");

            query = new PrefixQuery(new Term("category", "/Computers/Mac"));
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length, "One in /Computers/Mac");

            query = new PrefixQuery(new Term("category", ""));
            Terms terms = MultiFields.GetTerms(searcher.IndexReader, "category");
            Assert.IsFalse(query.GetTermsEnum(terms) is PrefixTermsEnum);
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length, "everything");
            writer.Dispose();
            reader.Dispose();
            directory.Dispose();
        }

Beispiel #13

0

Datei anzeigen

Datei: TestRegexpRandom2.cs Projekt: ChristopherHaws/lucenenet

        public override void SetUp()
        {
            base.SetUp();
            Dir = NewDirectory();
            FieldName = Random().NextBoolean() ? "field" : ""; // sometimes use an empty string as field name
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));
            Document doc = new Document();
            Field field = NewStringField(FieldName, "", Field.Store.NO);
            doc.Add(field);
            List<string> terms = new List<string>();
            int num = AtLeast(200);
            for (int i = 0; i < num; i++)
            {
                string s = TestUtil.RandomUnicodeString(Random());
                field.StringValue = s;
                terms.Add(s);
                writer.AddDocument(doc);
            }

            if (VERBOSE)
            {
                // utf16 order
                terms.Sort();
                Console.WriteLine("UTF16 order:");
                foreach (string s in terms)
                {
                    Console.WriteLine("  " + UnicodeUtil.ToHexString(s));
                }
            }

            Reader = writer.Reader;
            Searcher1 = NewSearcher(Reader);
            Searcher2 = NewSearcher(Reader);
            writer.Dispose();
        }

Beispiel #14

0

Datei anzeigen

Datei: TestAllFilesHaveCodecHeader.cs Projekt: Cefa68000/lucenenet

 public virtual void Test()
 {
     Directory dir = NewDirectory();
     IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
     conf.SetCodec(new Lucene46Codec());
     RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, conf);
     Document doc = new Document();
     // these fields should sometimes get term vectors, etc
     Field idField = NewStringField("id", "", Field.Store.NO);
     Field bodyField = NewTextField("body", "", Field.Store.NO);
     Field dvField = new NumericDocValuesField("dv", 5);
     doc.Add(idField);
     doc.Add(bodyField);
     doc.Add(dvField);
     for (int i = 0; i < 100; i++)
     {
         idField.StringValue = Convert.ToString(i);
         bodyField.StringValue = TestUtil.RandomUnicodeString(Random());
         riw.AddDocument(doc);
         if (Random().Next(7) == 0)
         {
             riw.Commit();
         }
         // TODO: we should make a new format with a clean header...
         // if (Random().nextInt(20) == 0) {
         //  riw.DeleteDocuments(new Term("id", Integer.toString(i)));
         // }
     }
     riw.Dispose();
     CheckHeaders(dir);
     dir.Dispose();
 }

Beispiel #15

0

Datei anzeigen

Datei: TestMultiTermQueryRewrites.cs Projekt: ChristopherHaws/lucenenet

        public void BeforeClass()
        {
            Dir = NewDirectory();
            Sdir1 = NewDirectory();
            Sdir2 = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, new MockAnalyzer(Random()), Similarity, TimeZone);
            RandomIndexWriter swriter1 = new RandomIndexWriter(Random(), Sdir1, new MockAnalyzer(Random()), Similarity, TimeZone);
            RandomIndexWriter swriter2 = new RandomIndexWriter(Random(), Sdir2, new MockAnalyzer(Random()), Similarity, TimeZone);

            for (int i = 0; i < 10; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("data", Convert.ToString(i), Field.Store.NO));
                writer.AddDocument(doc);
                ((i % 2 == 0) ? swriter1 : swriter2).AddDocument(doc);
            }
            writer.ForceMerge(1);
            swriter1.ForceMerge(1);
            swriter2.ForceMerge(1);
            writer.Dispose();
            swriter1.Dispose();
            swriter2.Dispose();

            Reader = DirectoryReader.Open(Dir);
            Searcher = NewSearcher(Reader);

            MultiReader = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Sdir2) }, true);
            MultiSearcher = NewSearcher(MultiReader);

            MultiReaderDupls = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Dir) }, true);
            MultiSearcherDupls = NewSearcher(MultiReaderDupls);
        }

Beispiel #16

0

Datei anzeigen

Datei: TestRegexpRandom.cs Projekt: paulirwin/lucene.net

        public override void SetUp()
        {
            base.SetUp();
            Dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));

            Document doc = new Document();
            FieldType customType = new FieldType(TextField.TYPE_STORED);
            customType.OmitNorms = true;
            Field field = NewField("field", "", customType);
            doc.Add(field);

            NumberFormatInfo df = new NumberFormatInfo();
            df.NumberDecimalDigits = 0;

            //NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT));
            for (int i = 0; i < 1000; i++)
            {
                field.StringValue = i.ToString(df);
                writer.AddDocument(doc);
            }

            Reader = writer.Reader;
            writer.Dispose();
            Searcher = NewSearcher(Reader);
        }

Beispiel #17

0

Datei anzeigen

Datei: TestExpressionSorts.cs Projekt: ChristopherHaws/lucenenet

        public override void SetUp()
        {
            base.SetUp();
            dir = NewDirectory();
            var iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            int numDocs = TestUtil.NextInt(Random(), 2049, 4000);
            for (int i = 0; i < numDocs; i++)
            {
                var document = new Document
				{
				    NewTextField("english", English.IntToEnglish(i), Field.Store.NO),
				    NewTextField("oddeven", (i%2 == 0) ? "even" : "odd", Field.Store.NO
				        ),
				    NewStringField("byte", string.Empty + (unchecked((byte) Random().Next
				        ())), Field.Store.NO),
				    NewStringField("short", string.Empty + ((short) Random().Next()), Field.Store
				        .NO),
				    new IntField("int", Random().Next(), Field.Store.NO),
				    new LongField("long", Random().NextLong(), Field.Store.NO),
				    new FloatField("float", Random().NextFloat(), Field.Store.NO),
				    new DoubleField("double", Random().NextDouble(), Field.Store.NO),
				    new NumericDocValuesField("intdocvalues", Random().Next()),
				    new FloatDocValuesField("floatdocvalues", Random().NextFloat())
				};
                iw.AddDocument(document);
            }
            reader = iw.Reader;
            iw.Dispose();
            searcher = NewSearcher(reader);
        }

Beispiel #18

0

Datei anzeigen

Datei: TestCustomSearcherSort.cs Projekt: joyanta/lucene.net

 public override void SetUp()
 {
     base.SetUp();
     INDEX_SIZE = AtLeast(2000);
     Index = NewDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(Random(), Index);
     RandomGen random = new RandomGen(this, Random());
     for (int i = 0; i < INDEX_SIZE; ++i) // don't decrease; if to low the
     {
         // problem doesn't show up
         Document doc = new Document();
         if ((i % 5) != 0) // some documents must not have an entry in the first
         {
             // sort field
             doc.Add(NewStringField("publicationDate_", random.LuceneDate, Field.Store.YES));
         }
         if ((i % 7) == 0) // some documents to match the query (see below)
         {
             doc.Add(NewTextField("content", "test", Field.Store.YES));
         }
         // every document has a defined 'mandant' field
         doc.Add(NewStringField("mandant", Convert.ToString(i % 3), Field.Store.YES));
         writer.AddDocument(doc);
     }
     Reader = writer.Reader;
     writer.Dispose();
     Query = new TermQuery(new Term("content", "test"));
 }

Beispiel #19

0

Datei anzeigen

Datei: TestBooleanScorer.cs Projekt: ChristopherHaws/lucenenet

        public virtual void TestMethod()
        {
            Directory directory = NewDirectory();

            string[] values = new string[] { "1", "2", "3", "4" };

            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);
            for (int i = 0; i < values.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField(FIELD, values[i], Field.Store.YES));
                writer.AddDocument(doc);
            }
            IndexReader ir = writer.Reader;
            writer.Dispose();

            BooleanQuery booleanQuery1 = new BooleanQuery();
            booleanQuery1.Add(new TermQuery(new Term(FIELD, "1")), BooleanClause.Occur.SHOULD);
            booleanQuery1.Add(new TermQuery(new Term(FIELD, "2")), BooleanClause.Occur.SHOULD);

            BooleanQuery query = new BooleanQuery();
            query.Add(booleanQuery1, BooleanClause.Occur.MUST);
            query.Add(new TermQuery(new Term(FIELD, "9")), BooleanClause.Occur.MUST_NOT);

            IndexSearcher indexSearcher = NewSearcher(ir);
            ScoreDoc[] hits = indexSearcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length, "Number of matched documents");
            ir.Dispose();
            directory.Dispose();
        }

Beispiel #20

0

Datei anzeigen

Datei: DataSplitterTest.cs Projekt: ChristopherHaws/lucenenet

        public override void SetUp()
        {
            base.SetUp();
            _dir = NewDirectory();
            _indexWriter = new RandomIndexWriter(Random(), _dir, new MockAnalyzer(Random()), Similarity, TimeZone);

            FieldType ft = new FieldType(TextField.TYPE_STORED);
            ft.StoreTermVectors = true;
            ft.StoreTermVectorOffsets = true;
            ft.StoreTermVectorPositions = true;

            Analyzer analyzer = new MockAnalyzer(Random());

            Document doc;
            for (int i = 0; i < 100; i++)
            {
                doc = new Document();
                doc.Add(new Field(_idFieldName, Random().toString(), ft));
                doc.Add(new Field(_textFieldName, new StringBuilder(Random().toString()).append(Random().toString()).append(
                    Random().toString()).toString(), ft));
                doc.Add(new Field(_classFieldName, Random().toString(), ft));
                _indexWriter.AddDocument(doc, analyzer);
            }

            _indexWriter.Commit();

            _originalIndex = SlowCompositeReaderWrapper.Wrap(_indexWriter.Reader);
        }

Beispiel #21

0

Datei anzeigen

Datei: TestRollback.cs Projekt: Cefa68000/lucenenet

        public virtual void TestRollbackIntegrityWithBufferFlush()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter rw = new RandomIndexWriter(Random(), dir);
            for (int i = 0; i < 5; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("pk", Convert.ToString(i), Field.Store.YES));
                rw.AddDocument(doc);
            }
            rw.Dispose();

            // If buffer size is small enough to cause a flush, errors ensue...
            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND));

            for (int i = 0; i < 3; i++)
            {
                Document doc = new Document();
                string value = Convert.ToString(i);
                doc.Add(NewStringField("pk", value, Field.Store.YES));
                doc.Add(NewStringField("text", "foo", Field.Store.YES));
                w.UpdateDocument(new Term("pk", value), doc);
            }
            w.Rollback();

            IndexReader r = DirectoryReader.Open(dir);
            Assert.AreEqual(5, r.NumDocs, "index should contain same number of docs post rollback");
            r.Dispose();
            dir.Dispose();
        }

Beispiel #22

0

Datei anzeigen

Datei: TestMinShouldMatch2.cs Projekt: joyanta/lucene.net

        public static void BeforeClass()
        {
            Dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), Dir);
            int numDocs = AtLeast(300);
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();

                AddSome(doc, AlwaysTerms);

                if (Random().Next(100) < 90)
                {
                    AddSome(doc, CommonTerms);
                }
                if (Random().Next(100) < 50)
                {
                    AddSome(doc, MediumTerms);
                }
                if (Random().Next(100) < 10)
                {
                    AddSome(doc, RareTerms);
                }
                iw.AddDocument(doc);
            }
            iw.ForceMerge(1);
            iw.Dispose();
            r = DirectoryReader.Open(Dir);
            atomicReader = GetOnlySegmentReader(r);
            Searcher = new IndexSearcher(atomicReader);
            Searcher.Similarity = new DefaultSimilarityAnonymousInnerClassHelper();
        }

Beispiel #23

0

Datei anzeigen

Datei: TestWildcardRandom.cs Projekt: joyanta/lucene.net

        public override void SetUp()
        {
            base.SetUp();
            Dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));

            Document doc = new Document();
            Field field = NewStringField("field", "", Field.Store.NO);
            doc.Add(field);

            NumberFormatInfo df = new NumberFormatInfo();
            df.NumberDecimalDigits = 0;

            //NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT));
            for (int i = 0; i < 1000; i++)
            {
                field.StringValue = i.ToString(df);
                writer.AddDocument(doc);
            }

            Reader = writer.Reader;
            Searcher = NewSearcher(Reader);
            writer.Dispose();
            if (VERBOSE)
            {
                Console.WriteLine("TEST: setUp searcher=" + Searcher);
            }
        }

Beispiel #24

0

Datei anzeigen

Datei: TestDirectSpellChecker.cs Projekt: ChristopherHaws/lucenenet

        public void TestInternalLevenshteinDistance()
        {
            DirectSpellChecker spellchecker = new DirectSpellChecker();
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir,
                new MockAnalyzer(Random(), MockTokenizer.KEYWORD, true), Similarity, TimeZone);

            string[] termsToAdd = { "metanoia", "metanoian", "metanoiai", "metanoias", "metanoið‘" };
            for (int i = 0; i < termsToAdd.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("repentance", termsToAdd[i], Field.Store.NO));
                writer.AddDocument(doc);
            }

            IndexReader ir = writer.Reader;
            string misspelled = "metanoix";
            SuggestWord[] similar = spellchecker.SuggestSimilar(new Term("repentance", misspelled), 4, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertTrue(similar.Length == 4);

            IStringDistance sd = spellchecker.Distance;
            assertTrue(sd is LuceneLevenshteinDistance);
            foreach (SuggestWord word in similar)
            {
                assertTrue(word.Score == sd.GetDistance(word.String, misspelled));
                assertTrue(word.Score == sd.GetDistance(misspelled, word.String)); // LUCNENET TODO: Perhaps change this to word.ToString()?
            }

            ir.Dispose();
            writer.Dispose();
            dir.Dispose();
        }

Beispiel #25

0

Datei anzeigen

Datei: TestTermsEnum2.cs Projekt: joyanta/lucene.net

        public override void SetUp()
        {
            base.SetUp();
            // we generate aweful regexps: good for testing.
            // but for preflex codec, the test can be very slow, so use less iterations.
            NumIterations = Codec.Default.Name.Equals("Lucene3x") ? 10 * RANDOM_MULTIPLIER : AtLeast(50);
            Dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));
            Document doc = new Document();
            Field field = NewStringField("field", "", Field.Store.YES);
            doc.Add(field);
            Terms = new SortedSet<BytesRef>();

            int num = AtLeast(200);
            for (int i = 0; i < num; i++)
            {
                string s = TestUtil.RandomUnicodeString(Random());
                field.StringValue = s;
                Terms.Add(new BytesRef(s));
                writer.AddDocument(doc);
            }

            TermsAutomaton = BasicAutomata.MakeStringUnion(Terms);

            Reader = writer.Reader;
            Searcher = NewSearcher(Reader);
            writer.Dispose();
        }

Beispiel #26

0

Datei anzeigen

Datei: TestFieldMaskingSpanQuery.cs Projekt: ChristopherHaws/lucenenet

        public void BeforeClass()
        {
            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));

            writer.AddDocument(Doc(new Field[] { GetField("id", "0"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "jones") }));

            writer.AddDocument(Doc(new Field[] { GetField("id", "1"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "smith"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "jones") }));

            writer.AddDocument(Doc(new Field[] { GetField("id", "2"), GetField("gender", "female"), GetField("first", "greta"), GetField("last", "jones"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "smith"), GetField("gender", "male"), GetField("first", "james"), GetField("last", "jones") }));

            writer.AddDocument(Doc(new Field[] { GetField("id", "3"), GetField("gender", "female"), GetField("first", "lisa"), GetField("last", "jones"), GetField("gender", "male"), GetField("first", "bob"), GetField("last", "costas") }));

            writer.AddDocument(Doc(new Field[] { GetField("id", "4"), GetField("gender", "female"), GetField("first", "sally"), GetField("last", "smith"), GetField("gender", "female"), GetField("first", "linda"), GetField("last", "dixit"), GetField("gender", "male"), GetField("first", "bubba"), GetField("last", "jones") }));
            Reader = writer.Reader;
            writer.Dispose();
            Searcher = NewSearcher(Reader);
        }

Beispiel #27

0

Datei anzeigen

Datei: TestBoolean2.cs Projekt: joyanta/lucene.net

        public static void BeforeClass()
        {
            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
            for (int i = 0; i < DocFields.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField(field, DocFields[i], Field.Store.NO));
                writer.AddDocument(doc);
            }
            writer.Dispose();
            LittleReader = DirectoryReader.Open(Directory);
            Searcher = NewSearcher(LittleReader);
            // this is intentionally using the baseline sim, because it compares against bigSearcher (which uses a random one)
            Searcher.Similarity = new DefaultSimilarity();

            // Make big index
            Dir2 = new MockDirectoryWrapper(Random(), new RAMDirectory(Directory, IOContext.DEFAULT));

            // First multiply small test index:
            MulFactor = 1;
            int docCount = 0;
            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: now copy index...");
            }
            do
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: cycle...");
                }
                Directory copy = new MockDirectoryWrapper(Random(), new RAMDirectory(Dir2, IOContext.DEFAULT));
                RandomIndexWriter w = new RandomIndexWriter(Random(), Dir2);
                w.AddIndexes(copy);
                docCount = w.MaxDoc();
                w.Dispose();
                MulFactor *= 2;
            } while (docCount < 3000);

            RandomIndexWriter riw = new RandomIndexWriter(Random(), Dir2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));
            Document doc_ = new Document();
            doc_.Add(NewTextField("field2", "xxx", Field.Store.NO));
            for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++)
            {
                riw.AddDocument(doc_);
            }
            doc_ = new Document();
            doc_.Add(NewTextField("field2", "big bad bug", Field.Store.NO));
            for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++)
            {
                riw.AddDocument(doc_);
            }
            Reader = riw.Reader;
            BigSearcher = NewSearcher(Reader);
            riw.Dispose();
        }

Beispiel #28

0

Datei anzeigen

Datei: TestPayloadsOnVectors.cs Projekt: WakeflyCBass/lucenenet

        public virtual void TestMixupDocs()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);
            Document doc = new Document();
            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
            customType.StoreTermVectors = true;
            customType.StoreTermVectorPositions = true;
            customType.StoreTermVectorPayloads = true;
            customType.StoreTermVectorOffsets = Random().NextBoolean();
            Field field = new Field("field", "", customType);
            TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true);
            Assert.IsFalse(ts.HasAttribute<IPayloadAttribute>());
            field.TokenStream = ts;
            doc.Add(field);
            writer.AddDocument(doc);

            Token withPayload = new Token("withPayload", 0, 11);
            withPayload.Payload = new BytesRef("test");
            ts = new CannedTokenStream(withPayload);
            Assert.IsTrue(ts.HasAttribute<IPayloadAttribute>());
            field.TokenStream = ts;
            writer.AddDocument(doc);

            ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true);
            Assert.IsFalse(ts.HasAttribute<IPayloadAttribute>());
            field.TokenStream = ts;
            writer.AddDocument(doc);

            DirectoryReader reader = writer.Reader;
            Terms terms = reader.GetTermVector(1, "field");
            Debug.Assert(terms != null);
            TermsEnum termsEnum = terms.Iterator(null);
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("withPayload")));
            DocsAndPositionsEnum de = termsEnum.DocsAndPositions(null, null);
            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(0, de.NextPosition());
            Assert.AreEqual(new BytesRef("test"), de.Payload);
            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }

Beispiel #29

0

Datei anzeigen

Datei: TestSpanMultiTermQueryWrapper.cs Projekt: ChristopherHaws/lucenenet

        public override void SetUp()
        {
            base.SetUp();
            Directory = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone);
            Document doc = new Document();
            Field field = NewTextField("field", "", Field.Store.NO);
            doc.Add(field);

            field.StringValue = "quick brown fox";
            iw.AddDocument(doc);
            field.StringValue = "jumps over lazy broun dog";
            iw.AddDocument(doc);
            field.StringValue = "jumps over extremely very lazy broxn dog";
            iw.AddDocument(doc);
            Reader = iw.Reader;
            iw.Dispose();
            Searcher = NewSearcher(Reader);
        }

Beispiel #30

0

Datei anzeigen

Datei: TestSameTokenSamePosition.cs Projekt: joyanta/lucene.net

 public virtual void Test()
 {
     Directory dir = NewDirectory();
     RandomIndexWriter riw = new RandomIndexWriter(Random(), dir);
     Document doc = new Document();
     doc.Add(new TextField("eng", new BugReproTokenStream()));
     riw.AddDocument(doc);
     riw.Dispose();
     dir.Dispose();
 }

Beispiel #31

0

Datei anzeigen

Datei: BaseTestRangeFilter.cs Projekt: clieben/lucenenet

        /// <summary>
        /// LUCENENET specific
        /// Passed in because NewStringField and NewIndexWriterConfig are no
        /// longer static.
        /// </summary>
        private IndexReader Build(Random random, TestIndex index)
        {
            /* build an index */

            Document doc       = new Document();
            Field    idField   = NewStringField(random, "id", "", Field.Store.YES);
            Field    randField = NewStringField(random, "rand", "", Field.Store.YES);
            Field    bodyField = NewStringField(random, "body", "", Field.Store.NO);

            doc.Add(idField);
            doc.Add(randField);
            doc.Add(bodyField);

            RandomIndexWriter writer = new RandomIndexWriter(random, index.Index, NewIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetOpenMode(OpenMode.CREATE).SetMaxBufferedDocs(TestUtil.NextInt32(random, 50, 1000)).SetMergePolicy(NewLogMergePolicy()));

            TestUtil.ReduceOpenFiles(writer.IndexWriter);

            while (true)
            {
                int minCount = 0;
                int maxCount = 0;

                for (int d = MinId; d <= MaxId; d++)
                {
                    idField.SetStringValue(Pad(d));
                    int r = index.AllowNegativeRandomInts ? random.Next() : random.Next(int.MaxValue);
                    if (index.MaxR < r)
                    {
                        index.MaxR = r;
                        maxCount   = 1;
                    }
                    else if (index.MaxR == r)
                    {
                        maxCount++;
                    }

                    if (r < index.MinR)
                    {
                        index.MinR = r;
                        minCount   = 1;
                    }
                    else if (r == index.MinR)
                    {
                        minCount++;
                    }
                    randField.SetStringValue(Pad(r));
                    bodyField.SetStringValue("body");
                    writer.AddDocument(doc);
                }

                if (minCount == 1 && maxCount == 1)
                {
                    // our subclasses rely on only 1 doc having the min or
                    // max, so, we loop until we satisfy that.  it should be
                    // exceedingly rare (Yonik calculates 1 in ~429,000)
                    // times) that this loop requires more than one try:
                    IndexReader ir = writer.GetReader();
                    writer.Dispose();
                    return(ir);
                }

                // try again
                writer.DeleteAll();
            }
        }

Beispiel #32

0

Datei anzeigen

Datei: TestFieldCache.cs Projekt: zalintyre/lucenenet

        public virtual void TestNonIndexedFields()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter iw  = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            Document doc = new Document();

            doc.Add(new StoredField("bogusbytes", "bogus"));
            doc.Add(new StoredField("bogusshorts", "bogus"));
            doc.Add(new StoredField("bogusints", "bogus"));
            doc.Add(new StoredField("boguslongs", "bogus"));
            doc.Add(new StoredField("bogusfloats", "bogus"));
            doc.Add(new StoredField("bogusdoubles", "bogus"));
            doc.Add(new StoredField("bogusterms", "bogus"));
            doc.Add(new StoredField("bogustermsindex", "bogus"));
            doc.Add(new StoredField("bogusmultivalued", "bogus"));
            doc.Add(new StoredField("bogusbits", "bogus"));
            iw.AddDocument(doc);
            DirectoryReader ir = iw.GetReader();

            iw.Dispose();

            AtomicReader ar = GetOnlySegmentReader(ir);

            IFieldCache cache = FieldCache.DEFAULT;

            cache.PurgeAllCaches();
            Assert.AreEqual(0, cache.GetCacheEntries().Length);

#pragma warning disable 612, 618
            Bytes bytes = cache.GetBytes(ar, "bogusbytes", true);
            Assert.AreEqual((byte)0, bytes.Get(0));

            Int16s shorts = cache.GetInt16s(ar, "bogusshorts", true);
            Assert.AreEqual(0, shorts.Get(0));
#pragma warning restore 612, 618

            Int32s ints = cache.GetInt32s(ar, "bogusints", true);
            Assert.AreEqual(0, ints.Get(0));

            Int64s longs = cache.GetInt64s(ar, "boguslongs", true);
            Assert.AreEqual(0, longs.Get(0));

            Singles floats = cache.GetSingles(ar, "bogusfloats", true);
            Assert.AreEqual(0, floats.Get(0), 0.0f);

            Doubles doubles = cache.GetDoubles(ar, "bogusdoubles", true);
            Assert.AreEqual(0, doubles.Get(0), 0.0D);

            BytesRef        scratch  = new BytesRef();
            BinaryDocValues binaries = cache.GetTerms(ar, "bogusterms", true);
            binaries.Get(0, scratch);
            Assert.AreEqual(0, scratch.Length);

            SortedDocValues sorted = cache.GetTermsIndex(ar, "bogustermsindex");
            Assert.AreEqual(-1, sorted.GetOrd(0));
            sorted.Get(0, scratch);
            Assert.AreEqual(0, scratch.Length);

            SortedSetDocValues sortedSet = cache.GetDocTermOrds(ar, "bogusmultivalued");
            sortedSet.SetDocument(0);
            Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd());

            IBits bits = cache.GetDocsWithField(ar, "bogusbits");
            Assert.IsFalse(bits.Get(0));

            // check that we cached nothing
            Assert.AreEqual(0, cache.GetCacheEntries().Length);
            ir.Dispose();
            dir.Dispose();
        }

Beispiel #33

0

Datei anzeigen

Datei: ClassificationTestBase.cs Projekt: wwb/lucenenet

        private void PopulateSampleIndex(Analyzer analyzer)
        {
            indexWriter.DeleteAll();
            indexWriter.Commit();

            String text;

            Document doc = new Document();

            text = "The traveling press secretary for Mitt Romney lost his cool and cursed at reporters " +
                   "who attempted to ask questions of the Republican presidential candidate in a public plaza near the Tomb of " +
                   "the Unknown Soldier in Warsaw Tuesday.";
            doc.Add(new Field(textFieldName, text, ft));
            doc.Add(new Field(categoryFieldName, "politics", ft));
            doc.Add(new Field(booleanFieldName, "true", ft));

            indexWriter.AddDocument(doc, analyzer);

            doc  = new Document();
            text = "Mitt Romney seeks to assure Israel and Iran, as well as Jewish voters in the United" +
                   " States, that he will be tougher against Iran's nuclear ambitions than President Barack Obama.";
            doc.Add(new Field(textFieldName, text, ft));
            doc.Add(new Field(categoryFieldName, "politics", ft));
            doc.Add(new Field(booleanFieldName, "true", ft));
            indexWriter.AddDocument(doc, analyzer);

            doc  = new Document();
            text = "And there's a threshold question that he has to answer for the American people and " +
                   "that's whether he is prepared to be commander-in-chief,\" she continued. \"As we look to the past events, we " +
                   "know that this raises some questions about his preparedness and we'll see how the rest of his trip goes.\"";
            doc.Add(new Field(textFieldName, text, ft));
            doc.Add(new Field(categoryFieldName, "politics", ft));
            doc.Add(new Field(booleanFieldName, "true", ft));
            indexWriter.AddDocument(doc, analyzer);

            doc  = new Document();
            text = "Still, when it comes to gun policy, many congressional Democrats have \"decided to " +
                   "keep quiet and not go there,\" said Alan Lizotte, dean and professor at the State University of New York at " +
                   "Albany's School of Criminal Justice.";
            doc.Add(new Field(textFieldName, text, ft));
            doc.Add(new Field(categoryFieldName, "politics", ft));
            doc.Add(new Field(booleanFieldName, "true", ft));
            indexWriter.AddDocument(doc, analyzer);

            doc  = new Document();
            text = "Standing amongst the thousands of people at the state Capitol, Jorstad, director of " +
                   "technology at the University of Wisconsin-La Crosse, documented the historic moment and shared it with the " +
                   "world through the Internet.";
            doc.Add(new Field(textFieldName, text, ft));
            doc.Add(new Field(categoryFieldName, "technology", ft));
            doc.Add(new Field(booleanFieldName, "false", ft));
            indexWriter.AddDocument(doc, analyzer);

            doc  = new Document();
            text = "So, about all those experts and analysts who've spent the past year or so saying " +
                   "Facebook was going to make a phone. A new expert has stepped forward to say it's not going to happen.";
            doc.Add(new Field(textFieldName, text, ft));
            doc.Add(new Field(categoryFieldName, "technology", ft));
            doc.Add(new Field(booleanFieldName, "false", ft));
            indexWriter.AddDocument(doc, analyzer);

            doc  = new Document();
            text = "More than 400 million people trust Google with their e-mail, and 50 million store files" +
                   " in the cloud using the Dropbox service. People manage their bank accounts, pay bills, trade stocks and " +
                   "generally transfer or store huge volumes of personal data online.";
            doc.Add(new Field(textFieldName, text, ft));
            doc.Add(new Field(categoryFieldName, "technology", ft));
            doc.Add(new Field(booleanFieldName, "false", ft));
            indexWriter.AddDocument(doc, analyzer);

            doc  = new Document();
            text = "unlabeled doc";
            doc.Add(new Field(textFieldName, text, ft));
            indexWriter.AddDocument(doc, analyzer);

            indexWriter.Commit();
        }

Beispiel #34

0

Datei anzeigen

Datei: TestFieldCache.cs Projekt: zalintyre/lucenenet

        public override void BeforeClass()
        {
            base.BeforeClass();

            NUM_DOCS  = AtLeast(500);
            NUM_ORDS  = AtLeast(2);
            directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random, directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy()));
            long   theLong           = long.MaxValue;
            double theDouble         = double.MaxValue;
            sbyte  theByte           = sbyte.MaxValue;
            short  theShort          = short.MaxValue;
            int    theInt            = int.MaxValue;
            float  theFloat          = float.MaxValue;

            unicodeStrings = new string[NUM_DOCS];
            //MultiValued = new BytesRef[NUM_DOCS, NUM_ORDS];
            multiValued = RectangularArrays.ReturnRectangularArray <BytesRef>(NUM_DOCS, NUM_ORDS);
            if (Verbose)
            {
                Console.WriteLine("TEST: setUp");
            }
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("theLong", (theLong--).ToString(CultureInfo.InvariantCulture), Field.Store.NO));
                doc.Add(NewStringField("theDouble", (theDouble--).ToString("R", CultureInfo.InvariantCulture), Field.Store.NO));
                doc.Add(NewStringField("theByte", (theByte--).ToString(CultureInfo.InvariantCulture), Field.Store.NO));
                doc.Add(NewStringField("theShort", (theShort--).ToString(CultureInfo.InvariantCulture), Field.Store.NO));
                doc.Add(NewStringField("theInt", (theInt--).ToString(CultureInfo.InvariantCulture), Field.Store.NO));
                doc.Add(NewStringField("theFloat", (theFloat--).ToString("R", CultureInfo.InvariantCulture), Field.Store.NO));
                if (i % 2 == 0)
                {
                    doc.Add(NewStringField("sparse", (i).ToString(CultureInfo.InvariantCulture), Field.Store.NO));
                }

                if (i % 2 == 0)
                {
                    doc.Add(new Int32Field("numInt", i, Field.Store.NO));
                }

                // sometimes skip the field:
                if (Random.Next(40) != 17)
                {
                    unicodeStrings[i] = GenerateString(i);
                    doc.Add(NewStringField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES));
                }

                // sometimes skip the field:
                if (Random.Next(10) != 8)
                {
                    for (int j = 0; j < NUM_ORDS; j++)
                    {
                        string newValue = GenerateString(i);
                        multiValued[i][j] = new BytesRef(newValue);
                        doc.Add(NewStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES));
                    }
                    Array.Sort(multiValued[i]);
                }
                writer.AddDocument(doc);
            }
            IndexReader r = writer.GetReader();

            reader = SlowCompositeReaderWrapper.Wrap(r);
            writer.Dispose();
        }

Beispiel #35

0

Datei anzeigen

Datei: GroupFacetCollectorTest.cs Projekt: proazr/lucenenet

        public void TestSimple()
        {
            string    groupField = "hotel";
            FieldType customType = new FieldType();

            customType.IsStored = true;

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
                Random,
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy()));
            bool canUseDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
            bool useDv    = canUseDV && Random.nextBoolean();

            // 0
            Document doc = new Document();

            AddField(doc, groupField, "a", useDv);
            AddField(doc, "airport", "ams", useDv);
            AddField(doc, "duration", "5", useDv);
            w.AddDocument(doc);

            // 1
            doc = new Document();
            AddField(doc, groupField, "a", useDv);
            AddField(doc, "airport", "dus", useDv);
            AddField(doc, "duration", "10", useDv);
            w.AddDocument(doc);

            // 2
            doc = new Document();
            AddField(doc, groupField, "b", useDv);
            AddField(doc, "airport", "ams", useDv);
            AddField(doc, "duration", "10", useDv);
            w.AddDocument(doc);
            w.Commit(); // To ensure a second segment

            // 3
            doc = new Document();
            AddField(doc, groupField, "b", useDv);
            AddField(doc, "airport", "ams", useDv);
            AddField(doc, "duration", "5", useDv);
            w.AddDocument(doc);

            // 4
            doc = new Document();
            AddField(doc, groupField, "b", useDv);
            AddField(doc, "airport", "ams", useDv);
            AddField(doc, "duration", "5", useDv);
            w.AddDocument(doc);

            IndexSearcher indexSearcher = NewSearcher(w.GetReader());

            IList <TermGroupFacetCollector.FacetEntry> entries       = null;
            AbstractGroupFacetCollector groupedAirportFacetCollector = null;

            TermGroupFacetCollector.GroupedFacetResult airportResult = null;

            foreach (int limit in new int[] { 2, 10, 100, int.MaxValue })
            {
                // any of these limits is plenty for the data we have

                groupedAirportFacetCollector = CreateRandomCollector
                                                   (useDv ? "hotel_dv" : "hotel",
                                                   useDv ? "airport_dv" : "airport", null, false);
                indexSearcher.Search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
                int maxOffset = 5;
                airportResult = groupedAirportFacetCollector.MergeSegmentResults
                                    (int.MaxValue == limit ? limit : maxOffset + limit, 0, false);


                assertEquals(3, airportResult.TotalCount);
                assertEquals(0, airportResult.TotalMissingCount);

                entries = airportResult.GetFacetEntries(maxOffset, limit);
                assertEquals(0, entries.size());

                entries = airportResult.GetFacetEntries(0, limit);
                assertEquals(2, entries.size());
                assertEquals("ams", entries[0].Value.Utf8ToString());
                assertEquals(2, entries[0].Count);
                assertEquals("dus", entries[1].Value.Utf8ToString());
                assertEquals(1, entries[1].Count);

                entries = airportResult.GetFacetEntries(1, limit);
                assertEquals(1, entries.size());
                assertEquals("dus", entries[0].Value.Utf8ToString());
                assertEquals(1, entries[0].Count);
            }

            AbstractGroupFacetCollector groupedDurationFacetCollector = CreateRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", null, false);

            indexSearcher.Search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
            TermGroupFacetCollector.GroupedFacetResult durationResult = groupedDurationFacetCollector.MergeSegmentResults(10, 0, false);
            assertEquals(4, durationResult.TotalCount);
            assertEquals(0, durationResult.TotalMissingCount);

            entries = durationResult.GetFacetEntries(0, 10);
            assertEquals(2, entries.size());
            assertEquals("10", entries[0].Value.Utf8ToString());
            assertEquals(2, entries[0].Count);
            assertEquals("5", entries[1].Value.Utf8ToString());
            assertEquals(2, entries[1].Count);

            // 5
            doc = new Document();
            AddField(doc, groupField, "b", useDv);
            // missing airport
            if (useDv)
            {
                AddField(doc, "airport", "", useDv);
            }
            AddField(doc, "duration", "5", useDv);
            w.AddDocument(doc);

            // 6
            doc = new Document();
            AddField(doc, groupField, "b", useDv);
            AddField(doc, "airport", "bru", useDv);
            AddField(doc, "duration", "10", useDv);
            w.AddDocument(doc);

            // 7
            doc = new Document();
            AddField(doc, groupField, "b", useDv);
            AddField(doc, "airport", "bru", useDv);
            AddField(doc, "duration", "15", useDv);
            w.AddDocument(doc);

            // 8
            doc = new Document();
            AddField(doc, groupField, "a", useDv);
            AddField(doc, "airport", "bru", useDv);
            AddField(doc, "duration", "10", useDv);
            w.AddDocument(doc);

            indexSearcher.IndexReader.Dispose();
            indexSearcher = NewSearcher(w.GetReader());
            groupedAirportFacetCollector = CreateRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "airport_dv" : "airport", null, !useDv);
            indexSearcher.Search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
            airportResult = groupedAirportFacetCollector.MergeSegmentResults(3, 0, true);
            entries       = airportResult.GetFacetEntries(1, 2);
            assertEquals(2, entries.size());
            if (useDv)
            {
                assertEquals(6, airportResult.TotalCount);
                assertEquals(0, airportResult.TotalMissingCount);
                assertEquals("bru", entries[0].Value.Utf8ToString());
                assertEquals(2, entries[0].Count);
                assertEquals("", entries[1].Value.Utf8ToString());
                assertEquals(1, entries[1].Count);
            }
            else
            {
                assertEquals(5, airportResult.TotalCount);
                assertEquals(1, airportResult.TotalMissingCount);
                assertEquals("bru", entries[0].Value.Utf8ToString());
                assertEquals(2, entries[0].Count);
                assertEquals("dus", entries[1].Value.Utf8ToString());
                assertEquals(1, entries[1].Count);
            }

            groupedDurationFacetCollector = CreateRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", null, false);
            indexSearcher.Search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
            durationResult = groupedDurationFacetCollector.MergeSegmentResults(10, 2, true);
            assertEquals(5, durationResult.TotalCount);
            assertEquals(0, durationResult.TotalMissingCount);

            entries = durationResult.GetFacetEntries(1, 1);
            assertEquals(1, entries.size());
            assertEquals("5", entries[0].Value.Utf8ToString());
            assertEquals(2, entries[0].Count);

            // 9
            doc = new Document();
            AddField(doc, groupField, "c", useDv);
            AddField(doc, "airport", "bru", useDv);
            AddField(doc, "duration", "15", useDv);
            w.AddDocument(doc);

            // 10
            doc = new Document();
            AddField(doc, groupField, "c", useDv);
            AddField(doc, "airport", "dus", useDv);
            AddField(doc, "duration", "10", useDv);
            w.AddDocument(doc);

            indexSearcher.IndexReader.Dispose();
            indexSearcher = NewSearcher(w.GetReader());
            groupedAirportFacetCollector = CreateRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "airport_dv" : "airport", null, false);
            indexSearcher.Search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
            airportResult = groupedAirportFacetCollector.MergeSegmentResults(10, 0, false);
            entries       = airportResult.GetFacetEntries(0, 10);
            if (useDv)
            {
                assertEquals(8, airportResult.TotalCount);
                assertEquals(0, airportResult.TotalMissingCount);
                assertEquals(4, entries.size());
                assertEquals("", entries[0].Value.Utf8ToString());
                assertEquals(1, entries[0].Count);
                assertEquals("ams", entries[1].Value.Utf8ToString());
                assertEquals(2, entries[1].Count);
                assertEquals("bru", entries[2].Value.Utf8ToString());
                assertEquals(3, entries[2].Count);
                assertEquals("dus", entries[3].Value.Utf8ToString());
                assertEquals(2, entries[3].Count);
            }
            else
            {
                assertEquals(7, airportResult.TotalCount);
                assertEquals(1, airportResult.TotalMissingCount);
                assertEquals(3, entries.size());
                assertEquals("ams", entries[0].Value.Utf8ToString());
                assertEquals(2, entries[0].Count);
                assertEquals("bru", entries[1].Value.Utf8ToString());
                assertEquals(3, entries[1].Count);
                assertEquals("dus", entries[2].Value.Utf8ToString());
                assertEquals(2, entries[2].Count);
            }

            groupedDurationFacetCollector = CreateRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", "1", false);
            indexSearcher.Search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
            durationResult = groupedDurationFacetCollector.MergeSegmentResults(10, 0, true);
            assertEquals(5, durationResult.TotalCount);
            assertEquals(0, durationResult.TotalMissingCount);

            entries = durationResult.GetFacetEntries(0, 10);
            assertEquals(2, entries.size());
            assertEquals("10", entries[0].Value.Utf8ToString());
            assertEquals(3, entries[0].Count);
            assertEquals("15", entries[1].Value.Utf8ToString());
            assertEquals(2, entries[1].Count);

            w.Dispose();
            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }

Beispiel #36

0

Datei anzeigen

        public virtual void TestRandom()
        {
            AssumeTrue("Test requires SortedSetDV support", DefaultCodecSupportsSortedSet());
            string[]  tokens   = GetRandomTokens(10);
            Directory indexDir = NewDirectory();
            Directory taxoDir  = NewDirectory();

            RandomIndexWriter w      = new RandomIndexWriter(Random(), indexDir, Similarity, TimeZone);
            FacetsConfig      config = new FacetsConfig();
            int             numDocs  = AtLeast(1000);
            int             numDims  = TestUtil.NextInt(Random(), 1, 7);
            IList <TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims);

            foreach (TestDoc testDoc in testDocs)
            {
                Document doc = new Document();
                doc.Add(NewStringField("content", testDoc.content, Field.Store.NO));
                for (int j = 0; j < numDims; j++)
                {
                    if (testDoc.dims[j] != null)
                    {
                        doc.Add(new SortedSetDocValuesFacetField("dim" + j, testDoc.dims[j]));
                    }
                }
                w.AddDocument(config.Build(doc));
            }

            // NRT open
            IndexSearcher searcher = NewSearcher(w.Reader);

            // Per-top-reader state:
            SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.IndexReader);

            int iters = AtLeast(100);

            for (int iter = 0; iter < iters; iter++)
            {
                string searchToken = tokens[Random().Next(tokens.Length)];
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter content=" + searchToken);
                }
                FacetsCollector fc = new FacetsCollector();
                FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc);
                Facets facets = new SortedSetDocValuesFacetCounts(state, fc);

                // Slow, yet hopefully bug-free, faceting:
                var expectedCounts = new List <Dictionary <string, int?> >();
                for (int i = 0; i < numDims; i++)
                {
                    expectedCounts.Add(new Dictionary <string, int?>());
                }

                foreach (TestDoc doc in testDocs)
                {
                    if (doc.content.Equals(searchToken))
                    {
                        for (int j = 0; j < numDims; j++)
                        {
                            if (doc.dims[j] != null)
                            {
                                int?v;

                                if (!expectedCounts[j].TryGetValue(doc.dims[j], out v))
                                {
                                    expectedCounts[j][doc.dims[j]] = 1;
                                }
                                else
                                {
                                    expectedCounts[j][doc.dims[j]] = (int)v + 1;
                                }
                            }
                        }
                    }
                }

                List <FacetResult> expected = new List <FacetResult>();
                for (int i = 0; i < numDims; i++)
                {
                    List <LabelAndValue> labelValues = new List <LabelAndValue>();
                    int totCount = 0;
                    foreach (KeyValuePair <string, int?> ent in expectedCounts[i])
                    {
                        labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value));
                        totCount += ent.Value.Value;
                    }
                    SortLabelValues(labelValues);
                    if (totCount > 0)
                    {
                        expected.Add(new FacetResult("dim" + i, new string[0], totCount, labelValues.ToArray(), labelValues.Count));
                    }
                }

                // Sort by highest value, tie break by value:
                SortFacetResults(expected);

                IList <FacetResult> actual = facets.GetAllDims(10);

                // Messy: fixup ties
                //sortTies(actual);

                CollectionAssert.AreEqual(expected, actual);
            }

            IOUtils.Dispose(w, searcher.IndexReader, indexDir, taxoDir);
        }

Beispiel #37

0

Datei anzeigen

        public virtual void TestSimple()
        {
            Random random = Random;

            DocValuesType[] dvTypes = new DocValuesType[] {
                DocValuesType.NUMERIC,
                DocValuesType.BINARY,
                DocValuesType.SORTED,
            };
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
                random,
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy()));
            bool          canUseDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
            DocValuesType dvType   = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : DocValuesType.NONE;

            Document doc = new Document();

            AddField(doc, groupField, "1", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "random text", Field.Store.NO));
            doc.Add(new StringField("id", "1", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            AddField(doc, groupField, "1", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "some more random text blob", Field.Store.NO));
            doc.Add(new StringField("id", "2", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            AddField(doc, groupField, "1", dvType);
            AddField(doc, countField, "2", dvType);
            doc.Add(new TextField("content", "some more random textual data", Field.Store.NO));
            doc.Add(new StringField("id", "3", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit(); // To ensure a second segment

            // 3
            doc = new Document();
            AddField(doc, groupField, "2", dvType);
            doc.Add(new TextField("content", "some random text", Field.Store.NO));
            doc.Add(new StringField("id", "4", Field.Store.NO));
            w.AddDocument(doc);

            // 4
            doc = new Document();
            AddField(doc, groupField, "3", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "some more random text", Field.Store.NO));
            doc.Add(new StringField("id", "5", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            AddField(doc, groupField, "3", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "random blob", Field.Store.NO));
            doc.Add(new StringField("id", "6", Field.Store.NO));
            w.AddDocument(doc);

            // 6 -- no author field
            doc = new Document();
            doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
            AddField(doc, countField, "1", dvType);
            doc.Add(new StringField("id", "6", Field.Store.NO));
            w.AddDocument(doc);

            IndexSearcher indexSearcher = NewSearcher(w.GetReader());

            w.Dispose();

            var cmp = Comparer <AbstractDistinctValuesCollector.IGroupCount <IComparable> > .Create((groupCount1, groupCount2) => {
                if (groupCount1.GroupValue == null)
                {
                    if (groupCount2.GroupValue == null)
                    {
                        return(0);
                    }
                    return(-1);
                }
                else if (groupCount2.GroupValue == null)
                {
                    return(1);
                }
                else
                {
                    return(groupCount1.GroupValue.CompareTo(groupCount2.GroupValue));
                }
            });

            // === Search for content:random
            IAbstractFirstPassGroupingCollector <IComparable> firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10);

            indexSearcher.Search(new TermQuery(new Term("content", "random")), firstCollector);
            IAbstractDistinctValuesCollector <AbstractDistinctValuesCollector.IGroupCount <IComparable> > distinctValuesCollector
                = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType);

            indexSearcher.Search(new TermQuery(new Term("content", "random")), distinctValuesCollector);

            //var gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>;
            // LUCENENET TODO: Try to work out how to do this without an O(n) operation
            var gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups);

            gcs.Sort(cmp);
            assertEquals(4, gcs.Count);

            CompareNull(gcs[0].GroupValue);
            List <IComparable> countValues = new List <IComparable>(gcs[0].UniqueValues);

            assertEquals(1, countValues.size());
            Compare("1", countValues[0]);

            Compare("1", gcs[1].GroupValue);
            countValues = new List <IComparable>(gcs[1].UniqueValues);
            countValues.Sort(nullComparer);
            assertEquals(2, countValues.size());
            Compare("1", countValues[0]);
            Compare("2", countValues[1]);

            Compare("2", gcs[2].GroupValue);
            countValues = new List <IComparable>(gcs[2].UniqueValues);
            assertEquals(1, countValues.size());
            CompareNull(countValues[0]);

            Compare("3", gcs[3].GroupValue);
            countValues = new List <IComparable>(gcs[3].UniqueValues);
            assertEquals(1, countValues.size());
            Compare("1", countValues[0]);

            // === Search for content:some
            firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
            indexSearcher.Search(new TermQuery(new Term("content", "some")), firstCollector);
            distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType);
            indexSearcher.Search(new TermQuery(new Term("content", "some")), distinctValuesCollector);

            // LUCENENET TODO: Try to work out how to do this without an O(n) operation
            //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>;
            gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups);
            gcs.Sort(cmp);
            assertEquals(3, gcs.Count);

            Compare("1", gcs[0].GroupValue);
            countValues = new List <IComparable>(gcs[0].UniqueValues);
            assertEquals(2, countValues.size());
            countValues.Sort(nullComparer);
            Compare("1", countValues[0]);
            Compare("2", countValues[1]);

            Compare("2", gcs[1].GroupValue);
            countValues = new List <IComparable>(gcs[1].UniqueValues);
            assertEquals(1, countValues.size());
            CompareNull(countValues[0]);

            Compare("3", gcs[2].GroupValue);
            countValues = new List <IComparable>(gcs[2].UniqueValues);
            assertEquals(1, countValues.size());
            Compare("1", countValues[0]);

            // === Search for content:blob
            firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
            indexSearcher.Search(new TermQuery(new Term("content", "blob")), firstCollector);
            distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType);
            indexSearcher.Search(new TermQuery(new Term("content", "blob")), distinctValuesCollector);

            // LUCENENET TODO: Try to work out how to do this without an O(n) operation
            //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>;
            gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups);
            gcs.Sort(cmp);
            assertEquals(2, gcs.Count);

            Compare("1", gcs[0].GroupValue);
            countValues = new List <IComparable>(gcs[0].UniqueValues);
            // B/c the only one document matched with blob inside the author 1 group
            assertEquals(1, countValues.Count);
            Compare("1", countValues[0]);

            Compare("3", gcs[1].GroupValue);
            countValues = new List <IComparable>(gcs[1].UniqueValues);
            assertEquals(1, countValues.Count);
            Compare("1", countValues[0]);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }

Beispiel #38

0

Datei anzeigen

        public void TestMinShouldMatch()
        {
            Directory         dir      = NewDirectory();
            MockAnalyzer      analyzer = new MockAnalyzer(Random);
            RandomIndexWriter w        = new RandomIndexWriter(Random, dir, analyzer);

            string[] docs = new string[]
            {
                @"this is the end of the world right",
                @"is this it or maybe not",
                @"this is the end of the universe as we know it",
                @"there is the famous restaurant at the end of the universe"
            };

            for (int i = 0; i < docs.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField(@"id", @"" + i, Field.Store.YES));
                doc.Add(NewTextField(@"field", docs[i], Field.Store.NO));
                w.AddDocument(doc);
            }

            IndexReader   r = w.GetReader();
            IndexSearcher s = NewSearcher(r);

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 0.5f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 1);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 2.0f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 1);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 0.49f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
                assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 1.0f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
                assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
                assertTrue(search.ScoreDocs[1].Score > search.ScoreDocs[2].Score);
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch  = 1.0f;
                query.HighFreqMinimumNumberShouldMatch = 4.0f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(search.ScoreDocs[1].Score, search.ScoreDocs[2].Score, 0.0f);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                // doc 2 and 3 only get a score from low freq terms
                assertEquals(
                    new JCG.HashSet <string> {
                    @"2", @"3"
                },
                    new JCG.HashSet <string> {
                    r.Document(search.ScoreDocs[1].Doc).Get(@"id"),
                    r.Document(search.ScoreDocs[2].Doc).Get(@"id")
                },
                    aggressive: false);
            }

            {
                // only high freq terms around - check that min should match is applied
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "the"));
                query.LowFreqMinimumNumberShouldMatch  = 1.0f;
                query.HighFreqMinimumNumberShouldMatch = 2.0f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 4);
            }

            {
                // only high freq terms around - check that min should match is applied
                CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "the"));
                query.LowFreqMinimumNumberShouldMatch  = 1.0f;
                query.HighFreqMinimumNumberShouldMatch = 2.0f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 2);
                assertEquals(
                    new JCG.HashSet <string> {
                    @"0", @"2"
                },
                    new JCG.HashSet <string> {
                    r.Document(search.ScoreDocs[0].Doc).Get(@"id"),
                    r.Document(search.ScoreDocs[1].Doc).Get(@"id")
                },
                    aggressive: false);
            }

            r.Dispose();
            w.Dispose();
            dir.Dispose();
        }

Beispiel #39

0

Datei anzeigen

Datei: TestWordBreakSpellChecker.cs Projekt: wwb/lucenenet

        public void GRandom()
        {
            int numDocs = TestUtil.NextInt(Random(), (10 * RANDOM_MULTIPLIER),
                                           (100 * RANDOM_MULTIPLIER));
            Directory         dir    = null;
            RandomIndexWriter writer = null;
            IndexReader       ir     = null;

            try
            {
                dir    = NewDirectory();
                writer = new RandomIndexWriter(Random(), dir, new MockAnalyzer(Random(),
                                                                               MockTokenizer.WHITESPACE, false), Similarity, TimeZone);
                int             maxLength = TestUtil.NextInt(Random(), 5, 50);
                List <string>   originals = new List <string>(numDocs);
                List <string[]> breaks    = new List <string[]>(numDocs);
                for (int i = 0; i < numDocs; i++)
                {
                    string orig = "";
                    if (Random().nextBoolean())
                    {
                        while (!GoodTestString(orig))
                        {
                            orig = TestUtil.RandomSimpleString(Random(), maxLength);
                        }
                    }
                    else
                    {
                        while (!GoodTestString(orig))
                        {
                            orig = TestUtil.RandomUnicodeString(Random(), maxLength);
                        }
                    }
                    originals.Add(orig);
                    int totalLength = orig.CodePointCount(0, orig.Length);
                    int breakAt     = orig.OffsetByCodePoints(0,
                                                              TestUtil.NextInt(Random(), 1, totalLength - 1));
                    string[] broken = new string[2];
                    broken[0] = orig.Substring(0, breakAt - 0);
                    broken[1] = orig.Substring(breakAt);
                    breaks.Add(broken);
                    Document doc = new Document();
                    doc.Add(NewTextField("random_break", broken[0] + " " + broken[1],
                                         Field.Store.NO));
                    doc.Add(NewTextField("random_combine", orig, Field.Store.NO));
                    writer.AddDocument(doc);
                }
                writer.Commit();
                writer.Dispose();

                ir = DirectoryReader.Open(dir);
                WordBreakSpellChecker wbsp = new WordBreakSpellChecker();
                wbsp.MaxChanges             = (1);
                wbsp.MinBreakWordLength     = (1);
                wbsp.MinSuggestionFrequency = (1);
                wbsp.MaxCombineWordLength   = (maxLength);
                for (int i = 0; i < originals.size(); i++)
                {
                    string orig  = originals[i];
                    string left  = breaks[i][0];
                    string right = breaks[i][1];
                    {
                        Term term = new Term("random_break", orig);

                        SuggestWord[][] sw = wbsp.SuggestWordBreaks(term, originals.size(),
                                                                    ir, SuggestMode.SUGGEST_ALWAYS,
                                                                    WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
                        bool failed = true;
                        foreach (SuggestWord[] sw1 in sw)
                        {
                            assertTrue(sw1.Length == 2);
                            if (sw1[0].String.equals(left) && sw1[1].String.equals(right))
                            {
                                failed = false;
                            }
                        }
                        assertFalse("Failed getting break suggestions\n >Original: "
                                    + orig + "\n >Left: " + left + "\n >Right: " + right, failed);
                    }
                    {
                        Term[] terms = { new Term("random_combine", left),
                                         new Term("random_combine", right) };
                        CombineSuggestion[] cs = wbsp.SuggestWordCombinations(terms,
                                                                              originals.size(), ir, SuggestMode.SUGGEST_ALWAYS);
                        bool failed = true;
                        foreach (CombineSuggestion cs1 in cs)
                        {
                            assertTrue(cs1.OriginalTermIndexes.Length == 2);
                            if (cs1.Suggestion.String.equals(left + right))
                            {
                                failed = false;
                            }
                        }
                        assertFalse("Failed getting combine suggestions\n >Original: "
                                    + orig + "\n >Left: " + left + "\n >Right: " + right, failed);
                    }
                }
            }
            catch (Exception e)
            {
                throw e;
            }
            finally
            {
                try
                {
                    ir.Dispose();
                }
                catch (Exception /*e1*/) { }
                try
                {
                    writer.Dispose();
                }
                catch (Exception /*e1*/) { }
                try
                {
                    dir.Dispose();
                }
                catch (Exception /*e1*/) { }
            }
        }

Beispiel #40

0

Datei anzeigen

        public virtual void TestParsingAndSearching()
        {
            string field = "content";

            string[] docs = new string[] { "\\ abcdefg1", "\\x00079 hijklmn1", "\\\\ opqrstu1" };

            // queries that should find all docs
            Query[] matchAll = new Query[] { new WildcardQuery(new Term(field, "*")), new WildcardQuery(new Term(field, "*1")), new WildcardQuery(new Term(field, "**1")), new WildcardQuery(new Term(field, "*?")), new WildcardQuery(new Term(field, "*?1")), new WildcardQuery(new Term(field, "?*1")), new WildcardQuery(new Term(field, "**")), new WildcardQuery(new Term(field, "***")), new WildcardQuery(new Term(field, "\\\\*")) };

            // queries that should find no docs
            Query[] matchNone = new Query[] { new WildcardQuery(new Term(field, "a*h")), new WildcardQuery(new Term(field, "a?h")), new WildcardQuery(new Term(field, "*a*h")), new WildcardQuery(new Term(field, "?a")), new WildcardQuery(new Term(field, "a?")) };

            PrefixQuery[][] matchOneDocPrefix = new PrefixQuery[][] { new PrefixQuery[] { new PrefixQuery(new Term(field, "a")), new PrefixQuery(new Term(field, "ab")), new PrefixQuery(new Term(field, "abc")) }, new PrefixQuery[] { new PrefixQuery(new Term(field, "h")), new PrefixQuery(new Term(field, "hi")), new PrefixQuery(new Term(field, "hij")), new PrefixQuery(new Term(field, "\\x0007")) }, new PrefixQuery[] { new PrefixQuery(new Term(field, "o")), new PrefixQuery(new Term(field, "op")), new PrefixQuery(new Term(field, "opq")), new PrefixQuery(new Term(field, "\\\\")) } };

            WildcardQuery[][] matchOneDocWild = new WildcardQuery[][] { new WildcardQuery[] { new WildcardQuery(new Term(field, "*a*")), new WildcardQuery(new Term(field, "*ab*")), new WildcardQuery(new Term(field, "*abc**")), new WildcardQuery(new Term(field, "ab*e*")), new WildcardQuery(new Term(field, "*g?")), new WildcardQuery(new Term(field, "*f?1")) }, new WildcardQuery[] { new WildcardQuery(new Term(field, "*h*")), new WildcardQuery(new Term(field, "*hi*")), new WildcardQuery(new Term(field, "*hij**")), new WildcardQuery(new Term(field, "hi*k*")), new WildcardQuery(new Term(field, "*n?")), new WildcardQuery(new Term(field, "*m?1")), new WildcardQuery(new Term(field, "hij**")) }, new WildcardQuery[] { new WildcardQuery(new Term(field, "*o*")), new WildcardQuery(new Term(field, "*op*")), new WildcardQuery(new Term(field, "*opq**")), new WildcardQuery(new Term(field, "op*q*")), new WildcardQuery(new Term(field, "*u?")), new WildcardQuery(new Term(field, "*t?1")), new WildcardQuery(new Term(field, "opq**")) } };

            // prepare the index
            Directory         dir = NewDirectory();
            RandomIndexWriter iw  = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy()));

            for (int i = 0; i < docs.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField(field, docs[i], Field.Store.NO));
                iw.AddDocument(doc);
            }
            iw.Dispose();

            IndexReader   reader   = DirectoryReader.Open(dir);
            IndexSearcher searcher = NewSearcher(reader);

            // test queries that must find all
            foreach (Query q in matchAll)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("matchAll: q=" + q + " " + q.GetType().Name);
                }
                ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs;
                Assert.AreEqual(docs.Length, hits.Length);
            }

            // test queries that must find none
            foreach (Query q in matchNone)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("matchNone: q=" + q + " " + q.GetType().Name);
                }
                ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs;
                Assert.AreEqual(0, hits.Length);
            }

            // thest the prefi queries find only one doc
            for (int i = 0; i < matchOneDocPrefix.Length; i++)
            {
                for (int j = 0; j < matchOneDocPrefix[i].Length; j++)
                {
                    Query q = matchOneDocPrefix[i][j];
                    if (VERBOSE)
                    {
                        Console.WriteLine("match 1 prefix: doc=" + docs[i] + " q=" + q + " " + q.GetType().Name);
                    }
                    ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs;
                    Assert.AreEqual(1, hits.Length);
                    Assert.AreEqual(i, hits[0].Doc);
                }
            }

            // test the wildcard queries find only one doc
            for (int i = 0; i < matchOneDocWild.Length; i++)
            {
                for (int j = 0; j < matchOneDocWild[i].Length; j++)
                {
                    Query q = matchOneDocWild[i][j];
                    if (VERBOSE)
                    {
                        Console.WriteLine("match 1 wild: doc=" + docs[i] + " q=" + q + " " + q.GetType().Name);
                    }
                    ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs;
                    Assert.AreEqual(1, hits.Length);
                    Assert.AreEqual(i, hits[0].Doc);
                }
            }

            reader.Dispose();
            dir.Dispose();
        }

Beispiel #41

0

Datei anzeigen

Datei: TestPositionIncrement.cs Projekt: freemsly/lucenenet

        public virtual void TestSetPosition()
        {
            Analyzer          analyzer = new AnalyzerAnonymousInnerClassHelper(this);
            Directory         store    = NewDirectory();
            RandomIndexWriter writer   = new RandomIndexWriter(Random(), store, analyzer);
            Document          d        = new Document();

            d.Add(NewTextField("field", "bogus", Field.Store.YES));
            writer.AddDocument(d);
            IndexReader reader = writer.Reader;

            writer.Dispose();

            IndexSearcher searcher = NewSearcher(reader);

            DocsAndPositionsEnum pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("1"));

            pos.NextDoc();
            // first token should be at position 0
            Assert.AreEqual(0, pos.NextPosition());

            pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("2"));
            pos.NextDoc();
            // second token should be at position 2
            Assert.AreEqual(2, pos.NextPosition());

            PhraseQuery q;

            ScoreDoc[] hits;

            q = new PhraseQuery();
            q.Add(new Term("field", "1"));
            q.Add(new Term("field", "2"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // same as previous, just specify positions explicitely.
            q = new PhraseQuery();
            q.Add(new Term("field", "1"), 0);
            q.Add(new Term("field", "2"), 1);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // specifying correct positions should find the phrase.
            q = new PhraseQuery();
            q.Add(new Term("field", "1"), 0);
            q.Add(new Term("field", "2"), 2);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "3"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "3"));
            q.Add(new Term("field", "4"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // phrase query would find it when correct positions are specified.
            q = new PhraseQuery();
            q.Add(new Term("field", "3"), 0);
            q.Add(new Term("field", "4"), 0);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            // phrase query should fail for non existing searched term
            // even if there exist another searched terms in the same searched position.
            q = new PhraseQuery();
            q.Add(new Term("field", "3"), 0);
            q.Add(new Term("field", "9"), 0);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // multi-phrase query should succed for non existing searched term
            // because there exist another searched terms in the same searched position.
            MultiPhraseQuery mq = new MultiPhraseQuery();

            mq.Add(new Term[] { new Term("field", "3"), new Term("field", "9") }, 0);
            hits = searcher.Search(mq, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "4"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "3"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "4"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            reader.Dispose();
            store.Dispose();
        }

Beispiel #42

0

Datei anzeigen

        public void TestExtend()
        {
            Directory         dir      = NewDirectory();
            MockAnalyzer      analyzer = new MockAnalyzer(Random);
            RandomIndexWriter w        = new RandomIndexWriter(Random, dir, analyzer);
            var docs = new string[]
            {
                @"this is the end of the world right",
                @"is this it or maybe not",
                @"this is the end of the universe as we know it",
                @"there is the famous restaurant at the end of the universe"
            };

            for (int i = 0; i < docs.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField(@"id", @"" + i, Field.Store.YES));
                doc.Add(NewTextField(@"field", docs[i], Field.Store.NO));
                w.AddDocument(doc);
            }

            IndexReader   r = w.GetReader();
            IndexSearcher s = NewSearcher(r);

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
                assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
            }

            {
                // this one boosts the termQuery("field" "universe") by 10x
                CommonTermsQuery query = new ExtendedCommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                                      Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(@"2", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"3", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
                assertEquals(@"0", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
            }

            r.Dispose();
            w.Dispose();
            dir.Dispose();
        }

Beispiel #43

0

Datei anzeigen

Datei: TestQueryRescorer.cs Projekt: willCode2Surf/lucenenet

        public virtual void TestBasic()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Document doc = new Document();

            doc.Add(NewStringField("id", "0", Field.Store.YES));
            doc.Add(NewTextField("field", "wizard the the the the the oz", Field.Store.NO));
            w.AddDocument(doc);
            doc = new Document();
            doc.Add(NewStringField("id", "1", Field.Store.YES));
            // 1 extra token, but wizard and oz are close;
            doc.Add(NewTextField("field", "wizard oz the the the the the the", Field.Store.NO));
            w.AddDocument(doc);
            IndexReader r = w.Reader;

            w.Dispose();

            // Do ordinary BooleanQuery:
            BooleanQuery bq = new BooleanQuery();

            bq.Add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
            bq.Add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
            IndexSearcher searcher = GetSearcher(r);

            searcher.Similarity = new DefaultSimilarity();

            TopDocs hits = searcher.Search(bq, 10);

            Assert.AreEqual(2, hits.TotalHits);
            Assert.AreEqual("0", searcher.Doc(hits.ScoreDocs[0].Doc).Get("id"));
            Assert.AreEqual("1", searcher.Doc(hits.ScoreDocs[1].Doc).Get("id"));

            // Now, resort using PhraseQuery:
            PhraseQuery pq = new PhraseQuery();

            pq.Slop = 5;
            pq.Add(new Term("field", "wizard"));
            pq.Add(new Term("field", "oz"));

            TopDocs hits2 = QueryRescorer.Rescore(searcher, hits, pq, 2.0, 10);

            // Resorting changed the order:
            Assert.AreEqual(2, hits2.TotalHits);
            Assert.AreEqual("1", searcher.Doc(hits2.ScoreDocs[0].Doc).Get("id"));
            Assert.AreEqual("0", searcher.Doc(hits2.ScoreDocs[1].Doc).Get("id"));

            // Resort using SpanNearQuery:
            SpanTermQuery t1  = new SpanTermQuery(new Term("field", "wizard"));
            SpanTermQuery t2  = new SpanTermQuery(new Term("field", "oz"));
            SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] { t1, t2 }, 0, true);

            TopDocs hits3 = QueryRescorer.Rescore(searcher, hits, snq, 2.0, 10);

            // Resorting changed the order:
            Assert.AreEqual(2, hits3.TotalHits);
            Assert.AreEqual("1", searcher.Doc(hits3.ScoreDocs[0].Doc).Get("id"));
            Assert.AreEqual("0", searcher.Doc(hits3.ScoreDocs[1].Doc).Get("id"));

            r.Dispose();
            dir.Dispose();
        }

Beispiel #44

0

Datei anzeigen

Datei: TestDrillDownQuery.cs Projekt: ywscr/lucenenet

        public override void BeforeClass() // LUCENENET specific - renamed from BeforeClassDrillDownQueryTest() to ensure calling order
        {
            base.BeforeClass();

            dir = NewDirectory();
            Random            r      = Random;
            RandomIndexWriter writer = new RandomIndexWriter(r, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(r, MockTokenizer.KEYWORD, false)));

            taxoDir = NewDirectory();
            ITaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

            config = new FacetsConfig();

            // Randomize the per-dim config:
            config.SetHierarchical("a", Random.NextBoolean());
            config.SetMultiValued("a", Random.NextBoolean());
            if (Random.NextBoolean())
            {
                config.SetIndexFieldName("a", "$a");
            }
            config.SetRequireDimCount("a", true);

            config.SetHierarchical("b", Random.NextBoolean());
            config.SetMultiValued("b", Random.NextBoolean());
            if (Random.NextBoolean())
            {
                config.SetIndexFieldName("b", "$b");
            }
            config.SetRequireDimCount("b", true);

            for (int i = 0; i < 100; i++)
            {
                Document doc = new Document();
                if (i % 2 == 0) // 50
                {
                    doc.Add(new TextField("content", "foo", Field.Store.NO));
                }
                if (i % 3 == 0) // 33
                {
                    doc.Add(new TextField("content", "bar", Field.Store.NO));
                }
                if (i % 4 == 0) // 25
                {
                    if (r.NextBoolean())
                    {
                        doc.Add(new FacetField("a", "1"));
                    }
                    else
                    {
                        doc.Add(new FacetField("a", "2"));
                    }
                }
                if (i % 5 == 0) // 20
                {
                    doc.Add(new FacetField("b", "1"));
                }
                writer.AddDocument(config.Build(taxoWriter, doc));
            }

            taxoWriter.Dispose();
            reader = writer.GetReader();
            writer.Dispose();

            taxo = new DirectoryTaxonomyReader(taxoDir);
        }

Beispiel #45

0

Datei anzeigen

Datei: TestFieldCache.cs Projekt: zalintyre/lucenenet

        public virtual void TestDocValuesIntegration()
        {
            AssumeTrue("3.x does not support docvalues", DefaultCodecSupportsDocValues);
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null);
            RandomIndexWriter iw  = new RandomIndexWriter(Random, dir, iwc);
            Document          doc = new Document();

            doc.Add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
            doc.Add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
            doc.Add(new NumericDocValuesField("numeric", 42));
            if (DefaultCodecSupportsSortedSet)
            {
                doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
                doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
            }
            iw.AddDocument(doc);
            DirectoryReader ir = iw.GetReader();

            iw.Dispose();
            AtomicReader ar = GetOnlySegmentReader(ir);

            BytesRef scratch = new BytesRef();

            // Binary type: can be retrieved via getTerms()
            try
            {
                FieldCache.DEFAULT.GetInt32s(ar, "binary", false);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            BinaryDocValues binary = FieldCache.DEFAULT.GetTerms(ar, "binary", true);
            binary.Get(0, scratch);
            Assert.AreEqual("binary value", scratch.Utf8ToString());

            try
            {
                FieldCache.DEFAULT.GetTermsIndex(ar, "binary");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                FieldCache.DEFAULT.GetDocTermOrds(ar, "binary");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                new DocTermOrds(ar, null, "binary");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            IBits bits = FieldCache.DEFAULT.GetDocsWithField(ar, "binary");
            Assert.IsTrue(bits.Get(0));

            // Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
            try
            {
                FieldCache.DEFAULT.GetInt32s(ar, "sorted", false);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                new DocTermOrds(ar, null, "sorted");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            binary = FieldCache.DEFAULT.GetTerms(ar, "sorted", true);
            binary.Get(0, scratch);
            Assert.AreEqual("sorted value", scratch.Utf8ToString());

            SortedDocValues sorted = FieldCache.DEFAULT.GetTermsIndex(ar, "sorted");
            Assert.AreEqual(0, sorted.GetOrd(0));
            Assert.AreEqual(1, sorted.ValueCount);
            sorted.Get(0, scratch);
            Assert.AreEqual("sorted value", scratch.Utf8ToString());

            SortedSetDocValues sortedSet = FieldCache.DEFAULT.GetDocTermOrds(ar, "sorted");
            sortedSet.SetDocument(0);
            Assert.AreEqual(0, sortedSet.NextOrd());
            Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd());
            Assert.AreEqual(1, sortedSet.ValueCount);

            bits = FieldCache.DEFAULT.GetDocsWithField(ar, "sorted");
            Assert.IsTrue(bits.Get(0));

            // Numeric type: can be retrieved via getInts() and so on
            Int32s numeric = FieldCache.DEFAULT.GetInt32s(ar, "numeric", false);
            Assert.AreEqual(42, numeric.Get(0));

            try
            {
                FieldCache.DEFAULT.GetTerms(ar, "numeric", true);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                FieldCache.DEFAULT.GetTermsIndex(ar, "numeric");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                FieldCache.DEFAULT.GetDocTermOrds(ar, "numeric");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                new DocTermOrds(ar, null, "numeric");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            bits = FieldCache.DEFAULT.GetDocsWithField(ar, "numeric");
            Assert.IsTrue(bits.Get(0));

            // SortedSet type: can be retrieved via getDocTermOrds()
            if (DefaultCodecSupportsSortedSet)
            {
                try
                {
                    FieldCache.DEFAULT.GetInt32s(ar, "sortedset", false);
                    Assert.Fail();
                }
#pragma warning disable 168
                catch (InvalidOperationException expected)
#pragma warning restore 168
                {
                }

                try
                {
                    FieldCache.DEFAULT.GetTerms(ar, "sortedset", true);
                    Assert.Fail();
                }
#pragma warning disable 168
                catch (InvalidOperationException expected)
#pragma warning restore 168
                {
                }

                try
                {
                    FieldCache.DEFAULT.GetTermsIndex(ar, "sortedset");
                    Assert.Fail();
                }
#pragma warning disable 168
                catch (InvalidOperationException expected)
#pragma warning restore 168
                {
                }

                try
                {
                    new DocTermOrds(ar, null, "sortedset");
                    Assert.Fail();
                }
#pragma warning disable 168
                catch (InvalidOperationException expected)
#pragma warning restore 168
                {
                }

                sortedSet = FieldCache.DEFAULT.GetDocTermOrds(ar, "sortedset");
                sortedSet.SetDocument(0);
                Assert.AreEqual(0, sortedSet.NextOrd());
                Assert.AreEqual(1, sortedSet.NextOrd());
                Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd());
                Assert.AreEqual(2, sortedSet.ValueCount);

                bits = FieldCache.DEFAULT.GetDocsWithField(ar, "sortedset");
                Assert.IsTrue(bits.Get(0));
            }

            ir.Dispose();
            dir.Dispose();
        }

Beispiel #46

0

Datei anzeigen

        public virtual void TestTransitionAPI()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Documents.Document doc = new Documents.Document();
            doc.Add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED));
            doc.Add(new Field("tokenized_reader", new StringReader("abc xyz")));
            doc.Add(new Field("tokenized_tokenstream", w.w.Analyzer.TokenStream("tokenized_tokenstream", new StringReader("abc xyz"))));
            doc.Add(new Field("binary", new byte[10]));
            doc.Add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
            doc.Add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
            doc.Add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS));
            doc.Add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
            w.AddDocument(doc);
            IndexReader r = w.Reader;

            w.Dispose();

            doc = r.Document(0);
            // 4 stored fields
            Assert.AreEqual(4, doc.Fields.Count);
            Assert.AreEqual("abc", doc.Get("stored"));
            Assert.AreEqual("abc xyz", doc.Get("stored_indexed"));
            Assert.AreEqual("abc xyz", doc.Get("stored_tokenized"));
            BytesRef br = doc.GetBinaryValue("binary");

            Assert.IsNotNull(br);
            Assert.AreEqual(10, br.Length);

            IndexSearcher s = new IndexSearcher(r);

            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("indexed", "abc xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).TotalHits);

            foreach (string field in new string[] { "tv", "tv_pos", "tv_off", "tv_pos_off" })
            {
                Fields tvFields = r.GetTermVectors(0);
                Terms  tvs      = tvFields.Terms(field);
                Assert.IsNotNull(tvs);
                Assert.AreEqual(2, tvs.Size());
                TermsEnum tvsEnum = tvs.Iterator(null);
                Assert.AreEqual(new BytesRef("abc"), tvsEnum.Next());
                DocsAndPositionsEnum dpEnum = tvsEnum.DocsAndPositions(null, null);
                if (field.Equals("tv"))
                {
                    Assert.IsNull(dpEnum);
                }
                else
                {
                    Assert.IsNotNull(dpEnum);
                }
                Assert.AreEqual(new BytesRef("xyz"), tvsEnum.Next());
                Assert.IsNull(tvsEnum.Next());
            }

            r.Dispose();
            dir.Dispose();
        }

Beispiel #47

0

Datei anzeigen

Datei: GroupFacetCollectorTest.cs Projekt: proazr/lucenenet

        private IndexContext CreateIndexContext(bool multipleFacetValuesPerDocument)
        {
            Random random    = Random;
            int    numDocs   = TestUtil.NextInt32(random, 138, 1145) * RANDOM_MULTIPLIER;
            int    numGroups = TestUtil.NextInt32(random, 1, numDocs / 4);
            int    numFacets = TestUtil.NextInt32(random, 1, numDocs / 6);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
            }

            List <string> groups = new List <string>();

            for (int i = 0; i < numGroups; i++)
            {
                groups.Add(GenerateRandomNonEmptyString());
            }
            List <string> facetValues = new List <string>();

            for (int i = 0; i < numFacets; i++)
            {
                facetValues.Add(GenerateRandomNonEmptyString());
            }
            string[] contentBrs = new string[TestUtil.NextInt32(random, 2, 20)];
            if (VERBOSE)
            {
                Console.WriteLine("TEST: create fake content");
            }
            for (int contentIDX = 0; contentIDX < contentBrs.Length; contentIDX++)
            {
                contentBrs[contentIDX] = GenerateRandomNonEmptyString();
                if (VERBOSE)
                {
                    Console.WriteLine("  content=" + contentBrs[contentIDX]);
                }
            }

            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(
                random,
                dir,
                NewIndexWriterConfig(
                    TEST_VERSION_CURRENT,
                    new MockAnalyzer(random)
                    )
                );
            bool canUseDV = !"Lucene3x".Equals(writer.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
            bool useDv    = canUseDV && !multipleFacetValuesPerDocument && random.nextBoolean();

            Document doc               = new Document();
            Document docNoGroup        = new Document();
            Document docNoFacet        = new Document();
            Document docNoGroupNoFacet = new Document();
            Field    group             = NewStringField("group", "", Field.Store.NO);
            Field    groupDc           = new SortedDocValuesField("group_dv", new BytesRef());

            if (useDv)
            {
                doc.Add(groupDc);
                docNoFacet.Add(groupDc);
            }
            doc.Add(group);
            docNoFacet.Add(group);
            Field[] facetFields;
            if (useDv)
            {
                Debug.Assert(!multipleFacetValuesPerDocument);
                facetFields    = new Field[2];
                facetFields[0] = NewStringField("facet", "", Field.Store.NO);
                doc.Add(facetFields[0]);
                docNoGroup.Add(facetFields[0]);
                facetFields[1] = new SortedDocValuesField("facet_dv", new BytesRef());
                doc.Add(facetFields[1]);
                docNoGroup.Add(facetFields[1]);
            }
            else
            {
                facetFields = multipleFacetValuesPerDocument ? new Field[2 + random.nextInt(6)] : new Field[1];
                for (int i = 0; i < facetFields.Length; i++)
                {
                    facetFields[i] = NewStringField("facet", "", Field.Store.NO);
                    doc.Add(facetFields[i]);
                    docNoGroup.Add(facetFields[i]);
                }
            }
            Field content = NewStringField("content", "", Field.Store.NO);

            doc.Add(content);
            docNoGroup.Add(content);
            docNoFacet.Add(content);
            docNoGroupNoFacet.Add(content);

            ISet <string> uniqueFacetValues = new JCG.SortedSet <string>(Comparer <string> .Create((a, b) => {
                if (a == b)
                {
                    return(0);
                }
                else if (a == null)
                {
                    return(-1);
                }
                else if (b == null)
                {
                    return(1);
                }
                else
                {
                    return(a.CompareToOrdinal(b));
                }
            }));

            // LUCENENET NOTE: Need JCG.Dictionary here because of null keys
            IDictionary <string, JCG.Dictionary <string, ISet <string> > > searchTermToFacetToGroups = new Dictionary <string, JCG.Dictionary <string, ISet <string> > >();
            int facetWithMostGroups = 0;

            for (int i = 0; i < numDocs; i++)
            {
                string groupValue;
                if (random.nextInt(24) == 17)
                {
                    // So we test the "doc doesn't have the group'd
                    // field" case:
                    if (useDv)
                    {
                        groupValue = "";
                    }
                    else
                    {
                        groupValue = null;
                    }
                }
                else
                {
                    groupValue = groups[random.nextInt(groups.size())];
                }

                string contentStr = contentBrs[random.nextInt(contentBrs.Length)];
                if (!searchTermToFacetToGroups.TryGetValue(contentStr, out JCG.Dictionary <string, ISet <string> > facetToGroups))
                {
                    searchTermToFacetToGroups[contentStr] = facetToGroups = new JCG.Dictionary <string, ISet <string> >();
                }

                List <string> facetVals = new List <string>();
                if (useDv || random.nextInt(24) != 18)
                {
                    if (useDv)
                    {
                        string facetValue = facetValues[random.nextInt(facetValues.size())];
                        uniqueFacetValues.Add(facetValue);
                        if (!facetToGroups.TryGetValue(facetValue, out ISet <string> groupsInFacet))
                        {
                            facetToGroups[facetValue] = groupsInFacet = new JCG.HashSet <string>();
                        }
                        groupsInFacet.add(groupValue);
                        if (groupsInFacet.size() > facetWithMostGroups)
                        {
                            facetWithMostGroups = groupsInFacet.size();
                        }
                        facetFields[0].SetStringValue(facetValue);
                        facetFields[1].SetBytesValue(new BytesRef(facetValue));
                        facetVals.Add(facetValue);
                    }
                    else
                    {
                        foreach (Field facetField in facetFields)
                        {
                            string facetValue = facetValues[random.nextInt(facetValues.size())];
                            uniqueFacetValues.Add(facetValue);
                            if (!facetToGroups.TryGetValue(facetValue, out ISet <string> groupsInFacet))
                            {
                                facetToGroups[facetValue] = groupsInFacet = new JCG.HashSet <string>();
                            }
                            groupsInFacet.add(groupValue);
                            if (groupsInFacet.size() > facetWithMostGroups)
                            {
                                facetWithMostGroups = groupsInFacet.size();
                            }
                            facetField.SetStringValue(facetValue);
                            facetVals.Add(facetValue);
                        }
                    }
                }
                else
                {
                    uniqueFacetValues.Add(null);
                    if (!facetToGroups.TryGetValue(null, out ISet <string> groupsInFacet))
                    {
                        facetToGroups[null] = groupsInFacet = new JCG.HashSet <string>();
                    }
                    groupsInFacet.add(groupValue);
                    if (groupsInFacet.size() > facetWithMostGroups)
                    {
                        facetWithMostGroups = groupsInFacet.size();
                    }
                }

                if (VERBOSE)
                {
                    Console.WriteLine("  doc content=" + contentStr + " group=" + (groupValue == null ? "null" : groupValue) + " facetVals=" + Collections.ToString(facetVals));
                }

                if (groupValue != null)
                {
                    if (useDv)
                    {
                        groupDc.SetBytesValue(new BytesRef(groupValue));
                    }
                    group.SetStringValue(groupValue);
                }
                else if (useDv)
                {
                    // DV cannot have missing values:
                    groupDc.SetBytesValue(new BytesRef());
                }
                content.SetStringValue(contentStr);
                if (groupValue == null && !facetVals.Any())
                {
                    writer.AddDocument(docNoGroupNoFacet);
                }
                else if (!facetVals.Any())
                {
                    writer.AddDocument(docNoFacet);
                }
                else if (groupValue == null)
                {
                    writer.AddDocument(docNoGroup);
                }
                else
                {
                    writer.AddDocument(doc);
                }
            }

            DirectoryReader reader = writer.GetReader();

            writer.Dispose();

            return(new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues, useDv));
        }

Beispiel #48

0

Datei anzeigen

Datei: TestRandomSamplingFacetsCollector.cs Projekt: willCode2Surf/lucenenet

        public virtual void TestRandomSampling()
        {
            Directory dir     = NewDirectory();
            Directory taxoDir = NewDirectory();

            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            RandomIndexWriter       writer     = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            FacetsConfig config = new FacetsConfig();

            int numDocs = AtLeast(10000);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new StringField("EvenOdd", (i % 2 == 0) ? "even" : "odd", Store.NO));
                doc.Add(new FacetField("iMod10", Convert.ToString(i % 10)));
                writer.AddDocument(config.Build(taxoWriter, doc));
            }
            Random random = Random();

            // NRT open
            IndexSearcher searcher   = NewSearcher(writer.Reader);
            var           taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            IOUtils.Close(writer, taxoWriter);

            // Test empty results
            RandomSamplingFacetsCollector collectRandomZeroResults = new RandomSamplingFacetsCollector(numDocs / 10, random.NextLong());

            // There should be no divisions by zero
            searcher.Search(new TermQuery(new Term("EvenOdd", "NeverMatches")), collectRandomZeroResults);

            // There should be no divisions by zero and no null result
            Assert.NotNull(collectRandomZeroResults.GetMatchingDocs);

            // There should be no results at all
            foreach (MatchingDocs doc in collectRandomZeroResults.GetMatchingDocs)
            {
                Assert.AreEqual(0, doc.totalHits);
            }

            // Now start searching and retrieve results.

            // Use a query to select half of the documents.
            TermQuery query = new TermQuery(new Term("EvenOdd", "even"));

            // there will be 5 facet values (0, 2, 4, 6 and 8), as only the even (i %
            // 10) are hits.
            // there is a REAL small chance that one of the 5 values will be missed when
            // sampling.
            // but is that 0.8 (chance not to take a value) ^ 2000 * 5 (any can be
            // missing) ~ 10^-193
            // so that is probably not going to happen.
            int maxNumChildren = 5;

            RandomSamplingFacetsCollector random100Percent = new RandomSamplingFacetsCollector(numDocs, random.NextLong());      // no sampling
            RandomSamplingFacetsCollector random10Percent  = new RandomSamplingFacetsCollector(numDocs / 10, random.NextLong()); // 10 % of total docs, 20% of the hits

            FacetsCollector fc = new FacetsCollector();

            searcher.Search(query, MultiCollector.Wrap(fc, random100Percent, random10Percent));

            FastTaxonomyFacetCounts random10FacetCounts  = new FastTaxonomyFacetCounts(taxoReader, config, random10Percent);
            FastTaxonomyFacetCounts random100FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random100Percent);
            FastTaxonomyFacetCounts exactFacetCounts     = new FastTaxonomyFacetCounts(taxoReader, config, fc);

            FacetResult random10Result  = random10Percent.AmortizeFacetCounts(random10FacetCounts.GetTopChildren(10, "iMod10"), config, searcher);
            FacetResult random100Result = random100FacetCounts.GetTopChildren(10, "iMod10");
            FacetResult exactResult     = exactFacetCounts.GetTopChildren(10, "iMod10");

            Assert.AreEqual(random100Result, exactResult);

            // we should have five children, but there is a small chance we have less.
            // (see above).
            Assert.True(random10Result.ChildCount <= maxNumChildren);
            // there should be one child at least.
            Assert.True(random10Result.ChildCount >= 1);

            // now calculate some statistics to determine if the sampled result is 'ok'.
            // because random sampling is used, the results will vary each time.
            int sum = 0;

            foreach (LabelAndValue lav in random10Result.LabelValues)
            {
                sum += (int)lav.value;
            }
            float mu = (float)sum / (float)maxNumChildren;

            float variance = 0;

            foreach (LabelAndValue lav in random10Result.LabelValues)
            {
                variance += (float)Math.Pow((mu - (int)lav.value), 2);
            }
            variance = variance / maxNumChildren;
            float sigma = (float)Math.Sqrt(variance);

            // we query only half the documents and have 5 categories. The average
            // number of docs in a category will thus be the total divided by 5*2
            float targetMu = numDocs / (5.0f * 2.0f);

            // the average should be in the range and the standard deviation should not
            // be too great
            Assert.True(sigma < 200);
            Assert.True(targetMu - 3 * sigma < mu && mu < targetMu + 3 * sigma);

            IOUtils.Close(searcher.IndexReader, taxoReader, dir, taxoDir);
        }

Beispiel #49

0

Datei anzeigen

Datei: GroupFacetCollectorTest.cs Projekt: proazr/lucenenet

        public void TestMVGroupedFacetingWithDeletes()
        {
            string    groupField = "hotel";
            FieldType customType = new FieldType();

            customType.IsStored = (true);

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
                Random,
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
            bool useDv = false;

            // Cannot assert this since we use NoMergePolicy:
            w.DoRandomForceMergeAssert = (false);

            // 0
            Document doc = new Document();

            doc.Add(new StringField("x", "x", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            AddField(doc, groupField, "a", useDv);
            doc.Add(new StringField("airport", "ams", Field.Store.NO));
            w.AddDocument(doc);

            w.Commit();
            w.DeleteDocuments(new TermQuery(new Term("airport", "ams")));

            // 2
            doc = new Document();
            AddField(doc, groupField, "a", useDv);
            doc.Add(new StringField("airport", "ams", Field.Store.NO));
            w.AddDocument(doc);

            // 3
            doc = new Document();
            AddField(doc, groupField, "a", useDv);
            doc.Add(new StringField("airport", "dus", Field.Store.NO));

            w.AddDocument(doc);

            // 4
            doc = new Document();
            AddField(doc, groupField, "b", useDv);
            doc.Add(new StringField("airport", "ams", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            AddField(doc, groupField, "b", useDv);
            doc.Add(new StringField("airport", "ams", Field.Store.NO));
            w.AddDocument(doc);

            // 6
            doc = new Document();
            AddField(doc, groupField, "b", useDv);
            doc.Add(new StringField("airport", "ams", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit();

            // 7
            doc = new Document();
            doc.Add(new StringField("x", "x", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit();

            w.Dispose();
            IndexSearcher indexSearcher = NewSearcher(DirectoryReader.Open(dir));
            AbstractGroupFacetCollector groupedAirportFacetCollector = CreateRandomCollector(groupField, "airport", null, true);

            indexSearcher.Search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
            TermGroupFacetCollector.GroupedFacetResult airportResult = groupedAirportFacetCollector.MergeSegmentResults(10, 0, false);
            assertEquals(3, airportResult.TotalCount);
            assertEquals(1, airportResult.TotalMissingCount);

            IList <TermGroupFacetCollector.FacetEntry> entries = airportResult.GetFacetEntries(0, 10);

            assertEquals(2, entries.size());
            assertEquals("ams", entries[0].Value.Utf8ToString());
            assertEquals(2, entries[0].Count);
            assertEquals("dus", entries[1].Value.Utf8ToString());
            assertEquals(1, entries[1].Count);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }

Beispiel #50

0

Datei anzeigen

Datei: TestSortRandom.cs Projekt: yohikofox/lucenenet

        public virtual void TestRandomStringSort()
        {
            Random random = new Random(Random().Next());

            int               NUM_DOCS  = AtLeast(100);
            Directory         dir       = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(random, dir, Similarity, TimeZone);
            bool              allowDups = random.NextBoolean();
            HashSet <string>  seen      = new HashSet <string>();
            int               maxLength = TestUtil.NextInt(random, 5, 100);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
            }

            int numDocs = 0;
            IList <BytesRef> docValues = new List <BytesRef>();

            // TODO: deletions
            while (numDocs < NUM_DOCS)
            {
                Document doc = new Document();

                // 10% of the time, the document is missing the value:
                BytesRef br;
                if (Random().Next(10) != 7)
                {
                    string s;
                    if (random.NextBoolean())
                    {
                        s = TestUtil.RandomSimpleString(random, maxLength);
                    }
                    else
                    {
                        s = TestUtil.RandomUnicodeString(random, maxLength);
                    }

                    if (!allowDups)
                    {
                        if (seen.Contains(s))
                        {
                            continue;
                        }
                        seen.Add(s);
                    }

                    if (VERBOSE)
                    {
                        Console.WriteLine("  " + numDocs + ": s=" + s);
                    }

                    br = new BytesRef(s);
                    if (DefaultCodecSupportsDocValues())
                    {
                        doc.Add(new SortedDocValuesField("stringdv", br));
                        doc.Add(new NumericDocValuesField("id", numDocs));
                    }
                    else
                    {
                        doc.Add(NewStringField("id", Convert.ToString(numDocs), Field.Store.NO));
                    }
                    doc.Add(NewStringField("string", s, Field.Store.NO));
                    docValues.Add(br);
                }
                else
                {
                    br = null;
                    if (VERBOSE)
                    {
                        Console.WriteLine("  " + numDocs + ": <missing>");
                    }
                    docValues.Add(null);
                    if (DefaultCodecSupportsDocValues())
                    {
                        doc.Add(new NumericDocValuesField("id", numDocs));
                    }
                    else
                    {
                        doc.Add(NewStringField("id", Convert.ToString(numDocs), Field.Store.NO));
                    }
                }

                doc.Add(new StoredField("id", numDocs));
                writer.AddDocument(doc);
                numDocs++;

                if (random.Next(40) == 17)
                {
                    // force flush
                    writer.Reader.Dispose();
                }
            }

            IndexReader r = writer.Reader;

            writer.Dispose();
            if (VERBOSE)
            {
                Console.WriteLine("  reader=" + r);
            }

            IndexSearcher idxS  = NewSearcher(r, false, Similarity);
            int           ITERS = AtLeast(100);

            for (int iter = 0; iter < ITERS; iter++)
            {
                bool reverse = random.NextBoolean();

                TopFieldDocs hits;
                SortField    sf;
                bool         sortMissingLast;
                bool         missingIsNull;
                if (DefaultCodecSupportsDocValues() && random.NextBoolean())
                {
                    sf = new SortField("stringdv", SortFieldType.STRING, reverse);
                    // Can only use sort missing if the DVFormat
                    // supports docsWithField:
                    sortMissingLast = DefaultCodecSupportsDocsWithField() && Random().NextBoolean();
                    missingIsNull   = DefaultCodecSupportsDocsWithField();
                }
                else
                {
                    sf = new SortField("string", SortFieldType.STRING, reverse);
                    sortMissingLast = Random().NextBoolean();
                    missingIsNull   = true;
                }
                if (sortMissingLast)
                {
                    sf.MissingValue = SortField.STRING_LAST;
                }

                Sort sort;
                if (random.NextBoolean())
                {
                    sort = new Sort(sf);
                }
                else
                {
                    sort = new Sort(sf, SortField.FIELD_DOC);
                }
                int          hitCount  = TestUtil.NextInt(random, 1, r.MaxDoc + 20);
                RandomFilter f         = new RandomFilter(random, (float)random.NextDouble(), docValues);
                int          queryType = random.Next(3);
                if (queryType == 0)
                {
                    // force out of order
                    BooleanQuery bq = new BooleanQuery();
                    // Add a Query with SHOULD, since bw.Scorer() returns BooleanScorer2
                    // which delegates to BS if there are no mandatory clauses.
                    bq.Add(new MatchAllDocsQuery(), Occur.SHOULD);
                    // Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return
                    // the clause instead of BQ.
                    bq.MinimumNumberShouldMatch = 1;
                    hits = idxS.Search(bq, f, hitCount, sort, random.NextBoolean(), random.NextBoolean());
                }
                else if (queryType == 1)
                {
                    hits = idxS.Search(new ConstantScoreQuery(f), null, hitCount, sort, random.NextBoolean(), random.NextBoolean());
                }
                else
                {
                    hits = idxS.Search(new MatchAllDocsQuery(), f, hitCount, sort, random.NextBoolean(), random.NextBoolean());
                }

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter=" + iter + " " + hits.TotalHits + " hits; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort);
                }

                // Compute expected results:
                var expected = f.MatchValues.ToList();
                expected.Sort(new ComparerAnonymousInnerClassHelper(this, sortMissingLast));
                if (reverse)
                {
                    expected.Reverse();
                }

                if (VERBOSE)
                {
                    Console.WriteLine("  expected:");
                    for (int idx = 0; idx < expected.Count; idx++)
                    {
                        BytesRef br = expected[idx];
                        if (br == null && missingIsNull == false)
                        {
                            br = new BytesRef();
                        }
                        Console.WriteLine("    " + idx + ": " + (br == null ? "<missing>" : br.Utf8ToString()));
                        if (idx == hitCount - 1)
                        {
                            break;
                        }
                    }
                }

                if (VERBOSE)
                {
                    Console.WriteLine("  actual:");
                    for (int hitIDX = 0; hitIDX < hits.ScoreDocs.Length; hitIDX++)
                    {
                        FieldDoc fd = (FieldDoc)hits.ScoreDocs[hitIDX];
                        BytesRef br = (BytesRef)fd.Fields[0];

                        Console.WriteLine("    " + hitIDX + ": " + (br == null ? "<missing>" : br.Utf8ToString()) + " id=" + idxS.Doc(fd.Doc).Get("id"));
                    }
                }
                for (int hitIDX = 0; hitIDX < hits.ScoreDocs.Length; hitIDX++)
                {
                    FieldDoc fd = (FieldDoc)hits.ScoreDocs[hitIDX];
                    BytesRef br = expected[hitIDX];
                    if (br == null && missingIsNull == false)
                    {
                        br = new BytesRef();
                    }

                    // Normally, the old codecs (that don't support
                    // docsWithField via doc values) will always return
                    // an empty BytesRef for the missing case; however,
                    // if all docs in a given segment were missing, in
                    // that case it will return null!  So we must map
                    // null here, too:
                    BytesRef br2 = (BytesRef)fd.Fields[0];
                    if (br2 == null && missingIsNull == false)
                    {
                        br2 = new BytesRef();
                    }

                    Assert.AreEqual(br, br2, "hit=" + hitIDX + " has wrong sort value");
                }
            }

            r.Dispose();
            dir.Dispose();
        }

Beispiel #51

0

Datei anzeigen

        public void TestBasic()
        {
            string            groupField = "author";
            Directory         dir        = NewDirectory();
            RandomIndexWriter w          = new RandomIndexWriter(
                Random,
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy()));
            bool          canUseIDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
            DocValuesType valueType = vts[Random.nextInt(vts.Length)];

            // 0
            Document doc = new Document();

            AddGroupField(doc, groupField, "author1", canUseIDV, valueType);
            doc.Add(NewTextField("content", "random text", Field.Store.NO));
            doc.Add(NewStringField("id_1", "1", Field.Store.NO));
            doc.Add(NewStringField("id_2", "1", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            AddGroupField(doc, groupField, "author1", canUseIDV, valueType);
            doc.Add(NewTextField("content", "some more random text blob", Field.Store.NO));
            doc.Add(NewStringField("id_1", "2", Field.Store.NO));
            doc.Add(NewStringField("id_2", "2", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            AddGroupField(doc, groupField, "author1", canUseIDV, valueType);
            doc.Add(NewTextField("content", "some more random textual data", Field.Store.NO));
            doc.Add(NewStringField("id_1", "3", Field.Store.NO));
            doc.Add(NewStringField("id_2", "3", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit(); // To ensure a second segment

            // 3
            doc = new Document();
            AddGroupField(doc, groupField, "author2", canUseIDV, valueType);
            doc.Add(NewTextField("content", "some random text", Field.Store.NO));
            doc.Add(NewStringField("id_1", "4", Field.Store.NO));
            doc.Add(NewStringField("id_2", "4", Field.Store.NO));
            w.AddDocument(doc);

            // 4
            doc = new Document();
            AddGroupField(doc, groupField, "author3", canUseIDV, valueType);
            doc.Add(NewTextField("content", "some more random text", Field.Store.NO));
            doc.Add(NewStringField("id_1", "5", Field.Store.NO));
            doc.Add(NewStringField("id_2", "5", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            AddGroupField(doc, groupField, "author3", canUseIDV, valueType);
            doc.Add(NewTextField("content", "random blob", Field.Store.NO));
            doc.Add(NewStringField("id_1", "6", Field.Store.NO));
            doc.Add(NewStringField("id_2", "6", Field.Store.NO));
            w.AddDocument(doc);

            // 6 -- no author field
            doc = new Document();
            doc.Add(NewTextField("content", "random word stuck in alot of other text", Field.Store.NO));
            doc.Add(NewStringField("id_1", "6", Field.Store.NO));
            doc.Add(NewStringField("id_2", "6", Field.Store.NO));
            w.AddDocument(doc);

            // 7 -- no author field
            doc = new Document();
            doc.Add(NewTextField("content", "random word stuck in alot of other text", Field.Store.NO));
            doc.Add(NewStringField("id_1", "7", Field.Store.NO));
            doc.Add(NewStringField("id_2", "7", Field.Store.NO));
            w.AddDocument(doc);

            IndexReader   reader        = w.GetReader();
            IndexSearcher indexSearcher = NewSearcher(reader);

            w.Dispose();
            int maxDoc = reader.MaxDoc;

            Sort sortWithinGroup        = new Sort(new SortField("id_1", SortFieldType.INT32, true));
            var  allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType);

            indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
            assertTrue(ArrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads()));
            assertTrue(OpenBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));

            allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType);
            indexSearcher.Search(new TermQuery(new Term("content", "some")), allGroupHeadsCollector);
            assertTrue(ArrayContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.RetrieveGroupHeads()));
            assertTrue(OpenBitSetContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));

            allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType);
            indexSearcher.Search(new TermQuery(new Term("content", "blob")), allGroupHeadsCollector);
            assertTrue(ArrayContains(new int[] { 1, 5 }, allGroupHeadsCollector.RetrieveGroupHeads()));
            assertTrue(OpenBitSetContains(new int[] { 1, 5 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));

            // STRING sort type triggers different implementation
            Sort sortWithinGroup2 = new Sort(new SortField("id_2", SortFieldType.STRING, true));

            allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup2, canUseIDV, valueType);
            indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
            assertTrue(ArrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads()));
            assertTrue(OpenBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));

            Sort sortWithinGroup3 = new Sort(new SortField("id_2", SortFieldType.STRING, false));

            allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup3, canUseIDV, valueType);
            indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
            // 7 b/c higher doc id wins, even if order of field is in not in reverse.
            assertTrue(ArrayContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.RetrieveGroupHeads()));
            assertTrue(OpenBitSetContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }

Beispiel #52

0

Datei anzeigen

        private void CreateRandomIndexes()
        {
            dir1 = NewDirectory();
            dir2 = NewDirectory();
            int           numDocs     = AtLeast(150);
            int           numTerms    = TestUtil.NextInt32(Random, 1, numDocs / 5);
            ISet <string> randomTerms = new JCG.HashSet <string>();

            while (randomTerms.size() < numTerms)
            {
                randomTerms.add(TestUtil.RandomSimpleString(Random));
            }
            terms = new JCG.List <string>(randomTerms);
            long seed = Random.NextInt64();
            IndexWriterConfig iwc1 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new J2N.Randomizer(seed)));
            IndexWriterConfig iwc2 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new J2N.Randomizer(seed)));

            iwc2.SetMergePolicy(NewSortingMergePolicy(sort));
            RandomIndexWriter iw1 = new RandomIndexWriter(new J2N.Randomizer(seed), dir1, iwc1);
            RandomIndexWriter iw2 = new RandomIndexWriter(new J2N.Randomizer(seed), dir2, iwc2);

            for (int i = 0; i < numDocs; ++i)
            {
                if (Random.nextInt(5) == 0 && i != numDocs - 1)
                {
                    string term = RandomPicks.RandomFrom(Random, terms);
                    iw1.DeleteDocuments(new Term("s", term));
                    iw2.DeleteDocuments(new Term("s", term));
                }
                Document doc = randomDocument();
                iw1.AddDocument(doc);
                iw2.AddDocument(doc);
                if (Random.nextInt(8) == 0)
                {
                    iw1.Commit();
                    iw2.Commit();
                }
            }
            // Make sure we have something to merge
            iw1.Commit();
            iw2.Commit();
            Document doc2 = randomDocument();

            // NOTE: don't use RIW.addDocument directly, since it sometimes commits
            // which may trigger a merge, at which case forceMerge may not do anything.
            // With field updates this is a problem, since the updates can go into the
            // single segment in the index, and threefore the index won't be sorted.
            // This hurts the assumption of the test later on, that the index is sorted
            // by SortingMP.
            iw1.IndexWriter.AddDocument(doc2);
            iw2.IndexWriter.AddDocument(doc2);

            if (DefaultCodecSupportsFieldUpdates)
            {
                // update NDV of docs belonging to one term (covers many documents)
                long   value = Random.NextInt64();
                string term  = RandomPicks.RandomFrom(Random, terms);
                iw1.IndexWriter.UpdateNumericDocValue(new Term("s", term), "ndv", value);
                iw2.IndexWriter.UpdateNumericDocValue(new Term("s", term), "ndv", value);
            }

            iw1.ForceMerge(1);
            iw2.ForceMerge(1);
            iw1.Dispose();
            iw2.Dispose();
            reader       = DirectoryReader.Open(dir1);
            sortedReader = DirectoryReader.Open(dir2);
        }

Beispiel #53

0

Datei anzeigen

Datei: TestQueryRescorer.cs Projekt: willCode2Surf/lucenenet

        public virtual void TestRandom()
        {
            Directory         dir     = NewDirectory();
            int               numDocs = AtLeast(1000);
            RandomIndexWriter w       = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            int[] idToNum  = new int[numDocs];
            int   maxValue = TestUtil.NextInt(Random(), 10, 1000000);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("id", "" + i, Field.Store.YES));
                int           numTokens = TestUtil.NextInt(Random(), 1, 10);
                StringBuilder b         = new StringBuilder();
                for (int j = 0; j < numTokens; j++)
                {
                    b.Append("a ");
                }
                doc.Add(NewTextField("field", b.ToString(), Field.Store.NO));
                idToNum[i] = Random().Next(maxValue);
                doc.Add(new NumericDocValuesField("num", idToNum[i]));
                w.AddDocument(doc);
            }
            IndexReader r = w.Reader;

            w.Dispose();

            IndexSearcher s       = NewSearcher(r);
            int           numHits = TestUtil.NextInt(Random(), 1, numDocs);
            bool          reverse = Random().NextBoolean();

            //System.out.println("numHits=" + numHits + " reverse=" + reverse);
            TopDocs hits = s.Search(new TermQuery(new Term("field", "a")), numHits);

            TopDocs hits2 = new QueryRescorerAnonymousInnerClassHelper3(this, new FixedScoreQuery(idToNum, reverse))
                            .Rescore(s, hits, numHits);

            int[] expected = new int[numHits];
            for (int i = 0; i < numHits; i++)
            {
                expected[i] = hits.ScoreDocs[i].Doc;
            }

            int reverseInt = reverse ? -1 : 1;

            Array.Sort(expected, new ComparatorAnonymousInnerClassHelper(this, idToNum, r, reverseInt));

            bool fail = false;

            for (int i = 0; i < numHits; i++)
            {
                //System.out.println("expected=" + expected[i] + " vs " + hits2.ScoreDocs[i].Doc + " v=" + idToNum[Integer.parseInt(r.Document(expected[i]).Get("id"))]);
                if ((int)expected[i] != hits2.ScoreDocs[i].Doc)
                {
                    //System.out.println("  diff!");
                    fail = true;
                }
            }
            Assert.IsFalse(fail);

            r.Dispose();
            dir.Dispose();
        }

Beispiel #54

0

Datei anzeigen

        public override void BeforeClass()
        {
            base.BeforeClass();

            ANALYZER = new MockAnalyzer(Random);

            qp = new StandardQueryParser(ANALYZER);

            IDictionary <string, /*Number*/ object> randomNumberMap = new JCG.Dictionary <string, object>();

            /*SimpleDateFormat*/
            string dateFormat;
            long   randomDate;
            bool   dateFormatSanityCheckPass;
            int    count = 0;

            do
            {
                if (count > 100)
                {
                    fail("This test has problems to find a sane random DateFormat/NumberFormat. Stopped trying after 100 iterations.");
                }

                dateFormatSanityCheckPass = true;
                LOCALE     = RandomCulture(Random);
                TIMEZONE   = RandomTimeZone(Random);
                DATE_STYLE = randomDateStyle(Random);
                TIME_STYLE = randomDateStyle(Random);

                //// assumes localized date pattern will have at least year, month, day,
                //// hour, minute
                //dateFormat = (SimpleDateFormat)DateFormat.getDateTimeInstance(
                //    DATE_STYLE, TIME_STYLE, LOCALE);

                //// not all date patterns includes era, full year, timezone and second,
                //// so we add them here
                //dateFormat.applyPattern(dateFormat.toPattern() + " G s Z yyyy");
                //dateFormat.setTimeZone(TIMEZONE);

                // assumes localized date pattern will have at least year, month, day,
                // hour, minute
                DATE_FORMAT = new NumberDateFormat(DATE_STYLE, TIME_STYLE, LOCALE)
                {
                    TimeZone = TIMEZONE
                };

                // not all date patterns includes era, full year, timezone and second,
                // so we add them here
                DATE_FORMAT.SetDateFormat(DATE_FORMAT.GetDateFormat() + " g s z yyyy");


                dateFormat = DATE_FORMAT.GetDateFormat();

                do
                {
                    randomDate = Random.nextLong();

                    // prune date value so it doesn't pass in insane values to some
                    // calendars.
                    randomDate = randomDate % 3400000000000L;

                    // truncate to second
                    randomDate = (randomDate / 1000L) * 1000L;

                    // only positive values
                    randomDate = Math.Abs(randomDate);
                } while (randomDate == 0L);

                dateFormatSanityCheckPass &= checkDateFormatSanity(dateFormat, randomDate);

                dateFormatSanityCheckPass &= checkDateFormatSanity(dateFormat, 0);

                dateFormatSanityCheckPass &= checkDateFormatSanity(dateFormat,
                                                                   -randomDate);

                count++;
            } while (!dateFormatSanityCheckPass);

            //NUMBER_FORMAT = NumberFormat.getNumberInstance(LOCALE);
            //NUMBER_FORMAT.setMaximumFractionDigits((Random().nextInt() & 20) + 1);
            //NUMBER_FORMAT.setMinimumFractionDigits((Random().nextInt() & 20) + 1);
            //NUMBER_FORMAT.setMaximumIntegerDigits((Random().nextInt() & 20) + 1);
            //NUMBER_FORMAT.setMinimumIntegerDigits((Random().nextInt() & 20) + 1);

            NUMBER_FORMAT = new NumberFormat(LOCALE);

            double randomDouble;
            long   randomLong;
            int    randomInt;
            float  randomFloat;

            while ((randomLong = Convert.ToInt64(NormalizeNumber(Math.Abs(Random.nextLong()))
                                                 )) == 0L)
            {
                ;
            }
            while ((randomDouble = Convert.ToDouble(NormalizeNumber(Math.Abs(Random.NextDouble()))
                                                    )) == 0.0)
            {
                ;
            }
            while ((randomFloat = Convert.ToSingle(NormalizeNumber(Math.Abs(Random.nextFloat()))
                                                   )) == 0.0f)
            {
                ;
            }
            while ((randomInt = Convert.ToInt32(NormalizeNumber(Math.Abs(Random.nextInt())))) == 0)
            {
                ;
            }

            randomNumberMap.Put(NumericType.INT64.ToString(), randomLong);
            randomNumberMap.Put(NumericType.INT32.ToString(), randomInt);
            randomNumberMap.Put(NumericType.SINGLE.ToString(), randomFloat);
            randomNumberMap.Put(NumericType.DOUBLE.ToString(), randomDouble);
            randomNumberMap.Put(DATE_FIELD_NAME, randomDate);

            RANDOM_NUMBER_MAP = randomNumberMap.AsReadOnly();

            directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random, directory,
                                                             NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                                                             .SetMaxBufferedDocs(TestUtil.NextInt32(Random, 50, 1000))
                                                             .SetMergePolicy(NewLogMergePolicy()));

            Document doc = new Document();
            IDictionary <String, NumericConfig> numericConfigMap = new JCG.Dictionary <String, NumericConfig>();
            IDictionary <String, Field>         numericFieldMap  = new JCG.Dictionary <String, Field>();

            qp.NumericConfigMap = (numericConfigMap);

            foreach (NumericType type in Enum.GetValues(typeof(NumericType)))
            {
                if (type == NumericType.NONE)
                {
                    continue;
                }

                numericConfigMap.Put(type.ToString(), new NumericConfig(PRECISION_STEP,
                                                                        NUMBER_FORMAT, type));

                FieldType ft2 = new FieldType(Int32Field.TYPE_NOT_STORED);
                ft2.NumericType          = (type);
                ft2.IsStored             = (true);
                ft2.NumericPrecisionStep = (PRECISION_STEP);
                ft2.Freeze();
                Field field;

                switch (type)
                {
                case NumericType.INT32:
                    field = new Int32Field(type.ToString(), 0, ft2);
                    break;

                case NumericType.SINGLE:
                    field = new SingleField(type.ToString(), 0.0f, ft2);
                    break;

                case NumericType.INT64:
                    field = new Int64Field(type.ToString(), 0L, ft2);
                    break;

                case NumericType.DOUBLE:
                    field = new DoubleField(type.ToString(), 0.0, ft2);
                    break;

                default:
                    fail();
                    field = null;
                    break;
                }
                numericFieldMap.Put(type.ToString(), field);
                doc.Add(field);
            }

            numericConfigMap.Put(DATE_FIELD_NAME, new NumericConfig(PRECISION_STEP,
                                                                    DATE_FORMAT, NumericType.INT64));
            FieldType ft = new FieldType(Int64Field.TYPE_NOT_STORED);

            ft.IsStored             = (true);
            ft.NumericPrecisionStep = (PRECISION_STEP);
            Int64Field dateField = new Int64Field(DATE_FIELD_NAME, 0L, ft);

            numericFieldMap.Put(DATE_FIELD_NAME, dateField);
            doc.Add(dateField);

            foreach (NumberType numberType in Enum.GetValues(typeof(NumberType)))
            {
                setFieldValues(numberType, numericFieldMap);
                if (VERBOSE)
                {
                    Console.WriteLine("Indexing document: " + doc);
                }
                writer.AddDocument(doc);
            }

            reader   = writer.GetReader();
            searcher = NewSearcher(reader);
            writer.Dispose();
        }

Beispiel #55

0

Datei anzeigen

Datei: TestRangeFacetCounts.cs Projekt: clieben/lucenenet

        public virtual void TestCustomDoublesValueSource()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            Document doc = new Document();

            writer.AddDocument(doc);
            writer.AddDocument(doc);
            writer.AddDocument(doc);

            // Test wants 3 docs in one segment:
            writer.ForceMerge(1);

            var vs = new ValueSourceAnonymousInnerClassHelper(this, doc);

            FacetsConfig config = new FacetsConfig();

            FacetsCollector fc = new FacetsCollector();

            IndexReader   r = writer.GetReader();
            IndexSearcher s = NewSearcher(r);

            s.Search(new MatchAllDocsQuery(), fc);

            DoubleRange[] ranges = new DoubleRange[] { new DoubleRange("< 1", 0.0, true, 1.0, false), new DoubleRange("< 2", 0.0, true, 2.0, false), new DoubleRange("< 5", 0.0, true, 5.0, false), new DoubleRange("< 10", 0.0, true, 10.0, false), new DoubleRange("< 20", 0.0, true, 20.0, false), new DoubleRange("< 50", 0.0, true, 50.0, false) };

            Filter        fastMatchFilter;
            AtomicBoolean filterWasUsed = new AtomicBoolean();

            if (Random.NextBoolean())
            {
                // Sort of silly:
                fastMatchFilter = new CachingWrapperFilterAnonymousInnerClassHelper(this, new QueryWrapperFilter(new MatchAllDocsQuery()), filterWasUsed);
            }
            else
            {
                fastMatchFilter = null;
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: fastMatchFilter=" + fastMatchFilter);
            }

            Facets facets = new DoubleRangeFacetCounts("field", vs, fc, fastMatchFilter, ranges);

            Assert.AreEqual("dim=field path=[] value=3 childCount=6\n  < 1 (0)\n  < 2 (1)\n  < 5 (3)\n  < 10 (3)\n  < 20 (3)\n  < 50 (3)\n", facets.GetTopChildren(10, "field").ToString());
            Assert.True(fastMatchFilter == null || filterWasUsed);

            DrillDownQuery ddq = new DrillDownQuery(config);

            ddq.Add("field", ranges[1].GetFilter(fastMatchFilter, vs));

            // Test simple drill-down:
            Assert.AreEqual(1, s.Search(ddq, 10).TotalHits);

            // Test drill-sideways after drill-down
            DrillSideways ds = new DrillSidewaysAnonymousInnerClassHelper2(this, s, config, (TaxonomyReader)null, vs, ranges, fastMatchFilter);


            DrillSidewaysResult dsr = ds.Search(ddq, 10);

            Assert.AreEqual(1, dsr.Hits.TotalHits);
            Assert.AreEqual("dim=field path=[] value=3 childCount=6\n  < 1 (0)\n  < 2 (1)\n  < 5 (3)\n  < 10 (3)\n  < 20 (3)\n  < 50 (3)\n", dsr.Facets.GetTopChildren(10, "field").ToString());

            IOUtils.Dispose(r, writer, dir);
        }

Beispiel #56

0

Datei anzeigen

        public void TestRandom()
        {
            int numberOfRuns = TestUtil.NextInt32(Random, 3, 6);

            for (int iter = 0; iter < numberOfRuns; iter++)
            {
                if (Verbose)
                {
                    Console.WriteLine(string.Format("TEST: iter={0} total={1}", iter, numberOfRuns));
                }

                int numDocs   = TestUtil.NextInt32(Random, 100, 1000) * RandomMultiplier;
                int numGroups = TestUtil.NextInt32(Random, 1, numDocs);

                if (Verbose)
                {
                    Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
                }

                JCG.List <BytesRef> groups = new JCG.List <BytesRef>();
                for (int i = 0; i < numGroups; i++)
                {
                    string randomValue;
                    do
                    {
                        // B/c of DV based impl we can't see the difference between an empty string and a null value.
                        // For that reason we don't generate empty string groups.
                        randomValue = TestUtil.RandomRealisticUnicodeString(Random);
                    } while ("".Equals(randomValue, StringComparison.Ordinal));
                    groups.Add(new BytesRef(randomValue));
                }
                string[] contentStrings = new string[TestUtil.NextInt32(Random, 2, 20)];
                if (Verbose)
                {
                    Console.WriteLine("TEST: create fake content");
                }
                for (int contentIDX = 0; contentIDX < contentStrings.Length; contentIDX++)
                {
                    StringBuilder sb = new StringBuilder();
                    sb.append("real").append(Random.nextInt(3)).append(' ');
                    int fakeCount = Random.nextInt(10);
                    for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++)
                    {
                        sb.append("fake ");
                    }
                    contentStrings[contentIDX] = sb.toString();
                    if (Verbose)
                    {
                        Console.WriteLine("  content=" + sb.toString());
                    }
                }

                Directory         dir = NewDirectory();
                RandomIndexWriter w   = new RandomIndexWriter(
                    Random,
                    dir,
                    NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                         new MockAnalyzer(Random)));
                bool          preFlex   = "Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
                bool          canUseIDV = !preFlex;
                DocValuesType valueType = vts[Random.nextInt(vts.Length)];

                Document doc        = new Document();
                Document docNoGroup = new Document();
                Field    group      = NewStringField("group", "", Field.Store.NO);
                doc.Add(group);
                Field valuesField = null;
                if (canUseIDV)
                {
                    switch (valueType)
                    {
                    case DocValuesType.BINARY:
                        valuesField = new BinaryDocValuesField("group_dv", new BytesRef());
                        break;

                    case DocValuesType.SORTED:
                        valuesField = new SortedDocValuesField("group_dv", new BytesRef());
                        break;

                    default:
                        fail("unhandled type");
                        break;
                    }
                    doc.Add(valuesField);
                }
                Field sort1 = NewStringField("sort1", "", Field.Store.NO);
                doc.Add(sort1);
                docNoGroup.Add(sort1);
                Field sort2 = NewStringField("sort2", "", Field.Store.NO);
                doc.Add(sort2);
                docNoGroup.Add(sort2);
                Field sort3 = NewStringField("sort3", "", Field.Store.NO);
                doc.Add(sort3);
                docNoGroup.Add(sort3);
                Field content = NewTextField("content", "", Field.Store.NO);
                doc.Add(content);
                docNoGroup.Add(content);
                Int32Field id = new Int32Field("id", 0, Field.Store.NO);
                doc.Add(id);
                docNoGroup.Add(id);
                GroupDoc[] groupDocs = new GroupDoc[numDocs];
                for (int i = 0; i < numDocs; i++)
                {
                    BytesRef groupValue;
                    if (Random.nextInt(24) == 17)
                    {
                        // So we test the "doc doesn't have the group'd
                        // field" case:
                        groupValue = null;
                    }
                    else
                    {
                        groupValue = groups[Random.nextInt(groups.size())];
                    }

                    GroupDoc groupDoc = new GroupDoc(
                        i,
                        groupValue,
                        groups[Random.nextInt(groups.size())],
                        groups[Random.nextInt(groups.size())],
                        new BytesRef(string.Format(CultureInfo.InvariantCulture, "{0:D5}", i)),
                        contentStrings[Random.nextInt(contentStrings.Length)]
                        );

                    if (Verbose)
                    {
                        Console.WriteLine("  doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group is null ? "null" : groupDoc.group.Utf8ToString()) + " sort1=" + groupDoc.sort1.Utf8ToString() + " sort2=" + groupDoc.sort2.Utf8ToString() + " sort3=" + groupDoc.sort3.Utf8ToString());
                    }

                    groupDocs[i] = groupDoc;
                    if (groupDoc.group != null)
                    {
                        group.SetStringValue(groupDoc.group.Utf8ToString());
                        if (canUseIDV)
                        {
                            valuesField.SetBytesValue(new BytesRef(groupDoc.group.Utf8ToString()));
                        }
                    }
                    sort1.SetStringValue(groupDoc.sort1.Utf8ToString());
                    sort2.SetStringValue(groupDoc.sort2.Utf8ToString());
                    sort3.SetStringValue(groupDoc.sort3.Utf8ToString());
                    content.SetStringValue(groupDoc.content);
                    id.SetInt32Value(groupDoc.id);
                    if (groupDoc.group is null)
                    {
                        w.AddDocument(docNoGroup);
                    }
                    else
                    {
                        w.AddDocument(doc);
                    }
                }

                DirectoryReader r = w.GetReader();
                w.Dispose();

                // NOTE: intentional but temporary field cache insanity!
                FieldCache.Int32s docIdToFieldId = FieldCache.DEFAULT.GetInt32s(SlowCompositeReaderWrapper.Wrap(r), "id", false);
                int[]             fieldIdToDocID = new int[numDocs];
                for (int i = 0; i < numDocs; i++)
                {
                    int fieldId = docIdToFieldId.Get(i);
                    fieldIdToDocID[fieldId] = i;
                }

                try
                {
                    IndexSearcher s = NewSearcher(r);
                    if (typeof(SlowCompositeReaderWrapper).IsAssignableFrom(s.IndexReader.GetType()))
                    {
                        canUseIDV = false;
                    }
                    else
                    {
                        canUseIDV = !preFlex;
                    }

                    for (int contentID = 0; contentID < 3; contentID++)
                    {
                        ScoreDoc[] hits = s.Search(new TermQuery(new Term("content", "real" + contentID)), numDocs).ScoreDocs;
                        foreach (ScoreDoc hit in hits)
                        {
                            GroupDoc gd = groupDocs[docIdToFieldId.Get(hit.Doc)];
                            assertTrue(gd.score == 0.0);
                            gd.score = hit.Score;
                            int docId = gd.id;
                            assertEquals(docId, docIdToFieldId.Get(hit.Doc));
                        }
                    }

                    foreach (GroupDoc gd in groupDocs)
                    {
                        assertTrue(gd.score != 0.0);
                    }

                    for (int searchIter = 0; searchIter < 100; searchIter++)
                    {
                        if (Verbose)
                        {
                            Console.WriteLine("TEST: searchIter=" + searchIter);
                        }

                        string searchTerm      = "real" + Random.nextInt(3);
                        bool   sortByScoreOnly = Random.nextBoolean();
                        Sort   sortWithinGroup = GetRandomSort(sortByScoreOnly);
                        AbstractAllGroupHeadsCollector allGroupHeadsCollector = CreateRandomCollector("group", sortWithinGroup, canUseIDV, valueType);
                        s.Search(new TermQuery(new Term("content", searchTerm)), allGroupHeadsCollector);
                        int[] expectedGroupHeads = CreateExpectedGroupHeads(searchTerm, groupDocs, sortWithinGroup, sortByScoreOnly, fieldIdToDocID);
                        int[] actualGroupHeads   = allGroupHeadsCollector.RetrieveGroupHeads();
                        // The actual group heads contains Lucene ids. Need to change them into our id value.
                        for (int i = 0; i < actualGroupHeads.Length; i++)
                        {
                            actualGroupHeads[i] = docIdToFieldId.Get(actualGroupHeads[i]);
                        }
                        // Allows us the easily iterate and assert the actual and expected results.
                        Array.Sort(expectedGroupHeads);
                        Array.Sort(actualGroupHeads);

                        if (Verbose)
                        {
                            Console.WriteLine("Collector: " + allGroupHeadsCollector.GetType().Name);
                            Console.WriteLine("Sort within group: " + sortWithinGroup);
                            Console.WriteLine("Num group: " + numGroups);
                            Console.WriteLine("Num doc: " + numDocs);
                            Console.WriteLine("\n=== Expected: \n");
                            foreach (int expectedDocId in expectedGroupHeads)
                            {
                                GroupDoc expectedGroupDoc = groupDocs[expectedDocId];
                                string   expectedGroup    = expectedGroupDoc.group is null ? null : expectedGroupDoc.group.Utf8ToString();
                                Console.WriteLine(
                                    string.Format(CultureInfo.InvariantCulture,
                                                  "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}",
                                                  expectedGroup, expectedGroupDoc.score, expectedGroupDoc.sort1.Utf8ToString(),
                                                  expectedGroupDoc.sort2.Utf8ToString(), expectedGroupDoc.sort3.Utf8ToString(), expectedDocId)
                                    );
                            }
                            Console.WriteLine("\n=== Actual: \n");
                            foreach (int actualDocId in actualGroupHeads)
                            {
                                GroupDoc actualGroupDoc = groupDocs[actualDocId];
                                string   actualGroup    = actualGroupDoc.group is null ? null : actualGroupDoc.group.Utf8ToString();
                                Console.WriteLine(
                                    string.Format(CultureInfo.InvariantCulture,
                                                  "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}",
                                                  actualGroup, actualGroupDoc.score, actualGroupDoc.sort1.Utf8ToString(),
                                                  actualGroupDoc.sort2.Utf8ToString(), actualGroupDoc.sort3.Utf8ToString(), actualDocId)
                                    );
                            }
                            Console.WriteLine("\n===================================================================================");
                        }

                        assertArrayEquals(expectedGroupHeads, actualGroupHeads);
                    }
                }
                finally
                {
                    QueryUtils.PurgeFieldCache(r);
                }

                r.Dispose();
                dir.Dispose();
            }
        }

Beispiel #57

0

Datei anzeigen

Datei: TestRangeFacetCounts.cs Projekt: clieben/lucenenet

        public virtual void TestMixedRangeAndNonRangeTaxonomy()
        {
            Directory         d = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, d);
            Directory td = NewDirectory();
            DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, OpenMode.CREATE);

            FacetsConfig config = new FacetsConfig();

            for (long l = 0; l < 100; l++)
            {
                Document doc = new Document();
                // For computing range facet counts:
                doc.Add(new NumericDocValuesField("field", l));
                // For drill down by numeric range:
                doc.Add(new Int64Field("field", l, Field.Store.NO));

                if ((l & 3) == 0)
                {
                    doc.Add(new FacetField("dim", "a"));
                }
                else
                {
                    doc.Add(new FacetField("dim", "b"));
                }
                w.AddDocument(config.Build(tw, doc));
            }

            IndexReader r = w.GetReader();

            var tr = new DirectoryTaxonomyReader(tw);

            IndexSearcher s = NewSearcher(r);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: searcher=" + s);
            }

            DrillSideways ds = new DrillSidewaysAnonymousInnerClassHelper(this, s, config, tr);

            // First search, no drill downs:
            DrillDownQuery      ddq = new DrillDownQuery(config);
            DrillSidewaysResult dsr = ds.Search(null, ddq, 10);

            Assert.AreEqual(100, dsr.Hits.TotalHits);
            Assert.AreEqual("dim=dim path=[] value=100 childCount=2\n  b (75)\n  a (25)\n", dsr.Facets.GetTopChildren(10, "dim").ToString());
            Assert.AreEqual("dim=field path=[] value=21 childCount=5\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString());

            // Second search, drill down on dim=b:
            ddq = new DrillDownQuery(config);
            ddq.Add("dim", "b");
            dsr = ds.Search(null, ddq, 10);

            Assert.AreEqual(75, dsr.Hits.TotalHits);
            Assert.AreEqual("dim=dim path=[] value=100 childCount=2\n  b (75)\n  a (25)\n", dsr.Facets.GetTopChildren(10, "dim").ToString());
            Assert.AreEqual("dim=field path=[] value=16 childCount=5\n  less than 10 (7)\n  less than or equal to 10 (8)\n  over 90 (7)\n  90 or above (8)\n  over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString());

            // Third search, drill down on "less than or equal to 10":
            ddq = new DrillDownQuery(config);
            ddq.Add("field", NumericRangeQuery.NewInt64Range("field", 0L, 10L, true, true));
            dsr = ds.Search(null, ddq, 10);

            Assert.AreEqual(11, dsr.Hits.TotalHits);
            Assert.AreEqual("dim=dim path=[] value=11 childCount=2\n  b (8)\n  a (3)\n", dsr.Facets.GetTopChildren(10, "dim").ToString());
            Assert.AreEqual("dim=field path=[] value=21 childCount=5\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (0)\n", dsr.Facets.GetTopChildren(10, "field").ToString());
            IOUtils.Dispose(tw, tr, td, w, r, d);
        }

Beispiel #58

0

Datei anzeigen

Datei: TestRangeFacetCounts.cs Projekt: clieben/lucenenet

        public virtual void TestRandomDoubles()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            int numDocs = AtLeast(1000);

            double[] values   = new double[numDocs];
            double   minValue = double.PositiveInfinity;
            double   maxValue = double.NegativeInfinity;

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                double   v   = Random.NextDouble();
                values[i] = v;
                doc.Add(new DoubleDocValuesField("field", v));
                doc.Add(new DoubleField("field", v, Field.Store.NO));
                w.AddDocument(doc);
                minValue = Math.Min(minValue, v);
                maxValue = Math.Max(maxValue, v);
            }
            IndexReader r = w.GetReader();

            IndexSearcher s      = NewSearcher(r);
            FacetsConfig  config = new FacetsConfig();

            int numIters = AtLeast(10);

            for (int iter = 0; iter < numIters; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + iter);
                }
                int           numRange         = TestUtil.NextInt32(Random, 1, 5);
                DoubleRange[] ranges           = new DoubleRange[numRange];
                int[]         expectedCounts   = new int[numRange];
                double        minAcceptedValue = double.PositiveInfinity;
                double        maxAcceptedValue = double.NegativeInfinity;
                for (int rangeID = 0; rangeID < numRange; rangeID++)
                {
                    double min;
                    if (rangeID > 0 && Random.Next(10) == 7)
                    {
                        // Use an existing boundary:
                        DoubleRange prevRange = ranges[Random.Next(rangeID)];
                        if (Random.NextBoolean())
                        {
                            min = prevRange.Min;
                        }
                        else
                        {
                            min = prevRange.Max;
                        }
                    }
                    else
                    {
                        min = Random.NextDouble();
                    }
                    double max;
                    if (rangeID > 0 && Random.Next(10) == 7)
                    {
                        // Use an existing boundary:
                        DoubleRange prevRange = ranges[Random.Next(rangeID)];
                        if (Random.NextBoolean())
                        {
                            max = prevRange.Min;
                        }
                        else
                        {
                            max = prevRange.Max;
                        }
                    }
                    else
                    {
                        max = Random.NextDouble();
                    }

                    if (min > max)
                    {
                        double x = min;
                        min = max;
                        max = x;
                    }

                    bool minIncl;
                    bool maxIncl;
                    if (min == max)
                    {
                        minIncl = true;
                        maxIncl = true;
                    }
                    else
                    {
                        minIncl = Random.NextBoolean();
                        maxIncl = Random.NextBoolean();
                    }
                    ranges[rangeID] = new DoubleRange("r" + rangeID, min, minIncl, max, maxIncl);

                    // Do "slow but hopefully correct" computation of
                    // expected count:
                    for (int i = 0; i < numDocs; i++)
                    {
                        bool accept = true;
                        if (minIncl)
                        {
                            accept &= values[i] >= min;
                        }
                        else
                        {
                            accept &= values[i] > min;
                        }
                        if (maxIncl)
                        {
                            accept &= values[i] <= max;
                        }
                        else
                        {
                            accept &= values[i] < max;
                        }
                        if (accept)
                        {
                            expectedCounts[rangeID]++;
                            minAcceptedValue = Math.Min(minAcceptedValue, values[i]);
                            maxAcceptedValue = Math.Max(maxAcceptedValue, values[i]);
                        }
                    }
                }

                FacetsCollector sfc = new FacetsCollector();
                s.Search(new MatchAllDocsQuery(), sfc);
                Filter fastMatchFilter;
                if (Random.NextBoolean())
                {
                    if (Random.NextBoolean())
                    {
                        fastMatchFilter = NumericRangeFilter.NewDoubleRange("field", minValue, maxValue, true, true);
                    }
                    else
                    {
                        fastMatchFilter = NumericRangeFilter.NewDoubleRange("field", minAcceptedValue, maxAcceptedValue, true, true);
                    }
                }
                else
                {
                    fastMatchFilter = null;
                }
                ValueSource vs     = new DoubleFieldSource("field");
                Facets      facets = new DoubleRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges);
                FacetResult result = facets.GetTopChildren(10, "field");
                Assert.AreEqual(numRange, result.LabelValues.Length);
                for (int rangeID = 0; rangeID < numRange; rangeID++)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("  range " + rangeID + " expectedCount=" + expectedCounts[rangeID]);
                    }
                    LabelAndValue subNode = result.LabelValues[rangeID];
                    Assert.AreEqual("r" + rangeID, subNode.Label);
                    Assert.AreEqual(expectedCounts[rangeID], (int)subNode.Value);

                    DoubleRange range = ranges[rangeID];

                    // Test drill-down:
                    DrillDownQuery ddq = new DrillDownQuery(config);
                    if (Random.NextBoolean())
                    {
                        if (Random.NextBoolean())
                        {
                            ddq.Add("field", NumericRangeFilter.NewDoubleRange("field", range.Min, range.Max, range.MinInclusive, range.MaxInclusive));
                        }
                        else
                        {
                            ddq.Add("field", NumericRangeQuery.NewDoubleRange("field", range.Min, range.Max, range.MinInclusive, range.MaxInclusive));
                        }
                    }
                    else
                    {
                        ddq.Add("field", range.GetFilter(fastMatchFilter, vs));
                    }

                    Assert.AreEqual(expectedCounts[rangeID], s.Search(ddq, 10).TotalHits);
                }
            }

            IOUtils.Dispose(w, r, dir);
        }

Beispiel #59

0

Datei anzeigen

Datei: TestPositionIncrement.cs Projekt: freemsly/lucenenet

        public virtual void TestPayloadsPos0()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockPayloadAnalyzer());
            Document          doc    = new Document();

            doc.Add(new TextField("content", new StringReader("a a b c d e a f g h i j a b k k")));
            writer.AddDocument(doc);

            IndexReader  readerFromWriter = writer.Reader;
            AtomicReader r = SlowCompositeReaderWrapper.Wrap(readerFromWriter);

            DocsAndPositionsEnum tp = r.TermPositionsEnum(new Term("content", "a"));

            int count = 0;

            Assert.IsTrue(tp.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            // "a" occurs 4 times
            Assert.AreEqual(4, tp.Freq());
            Assert.AreEqual(0, tp.NextPosition());
            Assert.AreEqual(1, tp.NextPosition());
            Assert.AreEqual(3, tp.NextPosition());
            Assert.AreEqual(6, tp.NextPosition());

            // only one doc has "a"
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, tp.NextDoc());

            IndexSearcher @is = NewSearcher(readerFromWriter);

            SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
            SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));

            SpanQuery[]   sqs = new SpanQuery[] { stq1, stq2 };
            SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);

            count = 0;
            bool sawZero = false;

            if (VERBOSE)
            {
                Console.WriteLine("\ngetPayloadSpans test");
            }
            Search.Spans.Spans pspans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq);
            while (pspans.Next())
            {
                if (VERBOSE)
                {
                    Console.WriteLine("doc " + pspans.Doc() + ": span " + pspans.Start() + " to " + pspans.End());
                }
                var payloads = pspans.Payload;
                sawZero |= pspans.Start() == 0;
                foreach (var bytes in payloads)
                {
                    count++;
                    if (VERBOSE)
                    {
                        Console.WriteLine("  payload: " + Encoding.UTF8.GetString((byte[])(Array)bytes));
                    }
                }
            }
            Assert.IsTrue(sawZero);
            Assert.AreEqual(5, count);

            // System.out.println("\ngetSpans test");
            Search.Spans.Spans spans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq);
            count   = 0;
            sawZero = false;
            while (spans.Next())
            {
                count++;
                sawZero |= spans.Start() == 0;
                // System.out.println(spans.Doc() + " - " + spans.Start() + " - " +
                // spans.End());
            }
            Assert.AreEqual(4, count);
            Assert.IsTrue(sawZero);

            // System.out.println("\nPayloadSpanUtil test");

            sawZero = false;
            PayloadSpanUtil psu = new PayloadSpanUtil(@is.TopReaderContext);
            var             pls = psu.GetPayloadsForQuery(snq);

            count = pls.Count;
            foreach (var bytes in pls)
            {
                string s = Encoding.UTF8.GetString(bytes);
                //System.out.println(s);
                sawZero |= s.Equals("pos: 0");
            }
            Assert.AreEqual(5, count);
            Assert.IsTrue(sawZero);
            writer.Dispose();
            @is.IndexReader.Dispose();
            dir.Dispose();
        }

Beispiel #60

0

Datei anzeigen

Datei: TestTopDocsMerge.cs Projekt: clieben/lucenenet

        internal virtual void TestSort(bool useFrom, bool VERBOSE)
        {
            IndexReader reader = null;
            Directory   dir    = null;

            if (!VERBOSE)
            {
                Console.WriteLine("Verbosity disabled. Enable manually if needed.");
            }

            int numDocs = VERBOSE ? AtLeast(50) : AtLeast(1000);

            //final int numDocs = AtLeast(50);

            string[] tokens = new string[] { "a", "b", "c", "d", "e" };

            if (VERBOSE)
            {
                Console.WriteLine("TEST: make index");
            }

            {
                dir = NewDirectory();
                RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                    this,
#endif
                    Random, dir);
                // w.setDoRandomForceMerge(false);

                // w.w.getConfig().SetMaxBufferedDocs(AtLeast(100));

                string[] content = new string[AtLeast(20)];

                for (int contentIDX = 0; contentIDX < content.Length; contentIDX++)
                {
                    StringBuilder sb        = new StringBuilder();
                    int           numTokens = TestUtil.NextInt32(Random, 1, 10);
                    for (int tokenIDX = 0; tokenIDX < numTokens; tokenIDX++)
                    {
                        sb.Append(tokens[Random.Next(tokens.Length)]).Append(' ');
                    }
                    content[contentIDX] = sb.ToString();
                }

                for (int docIDX = 0; docIDX < numDocs; docIDX++)
                {
                    Document doc = new Document();
                    doc.Add(NewStringField("string", TestUtil.RandomRealisticUnicodeString(Random), Field.Store.NO));
                    doc.Add(NewTextField("text", content[Random.Next(content.Length)], Field.Store.NO));
                    doc.Add(new SingleField("float", (float)Random.NextDouble(), Field.Store.NO));
                    int intValue;
                    if (Random.Next(100) == 17)
                    {
                        intValue = int.MinValue;
                    }
                    else if (Random.Next(100) == 17)
                    {
                        intValue = int.MaxValue;
                    }
                    else
                    {
                        intValue = Random.Next();
                    }
                    doc.Add(new Int32Field("int", intValue, Field.Store.NO));
                    if (VERBOSE)
                    {
                        Console.WriteLine("  doc=" + doc);
                    }
                    w.AddDocument(doc);
                }

                reader = w.GetReader();
                w.Dispose();
            }

            // NOTE: sometimes reader has just one segment, which is
            // important to test
            IndexSearcher      searcher = NewSearcher(reader);
            IndexReaderContext ctx      = searcher.TopReaderContext;

            ShardSearcher[] subSearchers;
            int[]           docStarts;

            if (ctx is AtomicReaderContext)
            {
                subSearchers    = new ShardSearcher[1];
                docStarts       = new int[1];
                subSearchers[0] = new ShardSearcher((AtomicReaderContext)ctx, ctx);
                docStarts[0]    = 0;
            }
            else
            {
                CompositeReaderContext compCTX = (CompositeReaderContext)ctx;
                int size = compCTX.Leaves.Count;
                subSearchers = new ShardSearcher[size];
                docStarts    = new int[size];
                int docBase = 0;
                for (int searcherIDX = 0; searcherIDX < subSearchers.Length; searcherIDX++)
                {
                    AtomicReaderContext leave = compCTX.Leaves[searcherIDX];
                    subSearchers[searcherIDX] = new ShardSearcher(leave, compCTX);
                    docStarts[searcherIDX]    = docBase;
                    docBase += leave.Reader.MaxDoc;
                }
            }

            IList <SortField> sortFields = new List <SortField>();

            sortFields.Add(new SortField("string", SortFieldType.STRING, true));
            sortFields.Add(new SortField("string", SortFieldType.STRING, false));
            sortFields.Add(new SortField("int", SortFieldType.INT32, true));
            sortFields.Add(new SortField("int", SortFieldType.INT32, false));
            sortFields.Add(new SortField("float", SortFieldType.SINGLE, true));
            sortFields.Add(new SortField("float", SortFieldType.SINGLE, false));
            sortFields.Add(new SortField(null, SortFieldType.SCORE, true));
            sortFields.Add(new SortField(null, SortFieldType.SCORE, false));
            sortFields.Add(new SortField(null, SortFieldType.DOC, true));
            sortFields.Add(new SortField(null, SortFieldType.DOC, false));

            for (int iter = 0; iter < 1000 * RANDOM_MULTIPLIER; iter++)
            {
                // TODO: custom FieldComp...
                Query query = new TermQuery(new Term("text", tokens[Random.Next(tokens.Length)]));

                Sort sort;
                if (Random.Next(10) == 4)
                {
                    // Sort by score
                    sort = null;
                }
                else
                {
                    SortField[] randomSortFields = new SortField[TestUtil.NextInt32(Random, 1, 3)];
                    for (int sortIDX = 0; sortIDX < randomSortFields.Length; sortIDX++)
                    {
                        randomSortFields[sortIDX] = sortFields[Random.Next(sortFields.Count)];
                    }
                    sort = new Sort(randomSortFields);
                }

                int numHits = TestUtil.NextInt32(Random, 1, numDocs + 5);
                //final int numHits = 5;

                if (VERBOSE)
                {
                    Console.WriteLine("TEST: search query=" + query + " sort=" + sort + " numHits=" + numHits);
                }

                int from = -1;
                int size = -1;
                // First search on whole index:
                TopDocs topHits;
                if (sort == null)
                {
                    if (useFrom)
                    {
                        TopScoreDocCollector c = TopScoreDocCollector.Create(numHits, Random.NextBoolean());
                        searcher.Search(query, c);
                        from = TestUtil.NextInt32(Random, 0, numHits - 1);
                        size = numHits - from;
                        TopDocs tempTopHits = c.GetTopDocs();
                        if (from < tempTopHits.ScoreDocs.Length)
                        {
                            // Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount
                            // than TopDocs#merge currently has
                            ScoreDoc[] newScoreDocs = new ScoreDoc[Math.Min(size, tempTopHits.ScoreDocs.Length - from)];
                            Array.Copy(tempTopHits.ScoreDocs, from, newScoreDocs, 0, newScoreDocs.Length);
                            tempTopHits.ScoreDocs = newScoreDocs;
                            topHits = tempTopHits;
                        }
                        else
                        {
                            topHits = new TopDocs(tempTopHits.TotalHits, new ScoreDoc[0], tempTopHits.MaxScore);
                        }
                    }
                    else
                    {
                        topHits = searcher.Search(query, numHits);
                    }
                }
                else
                {
                    TopFieldCollector c = TopFieldCollector.Create(sort, numHits, true, true, true, Random.NextBoolean());
                    searcher.Search(query, c);
                    if (useFrom)
                    {
                        from = TestUtil.NextInt32(Random, 0, numHits - 1);
                        size = numHits - from;
                        TopDocs tempTopHits = c.GetTopDocs();
                        if (from < tempTopHits.ScoreDocs.Length)
                        {
                            // Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount
                            // than TopDocs#merge currently has
                            ScoreDoc[] newScoreDocs = new ScoreDoc[Math.Min(size, tempTopHits.ScoreDocs.Length - from)];
                            Array.Copy(tempTopHits.ScoreDocs, from, newScoreDocs, 0, newScoreDocs.Length);
                            tempTopHits.ScoreDocs = newScoreDocs;
                            topHits = tempTopHits;
                        }
                        else
                        {
                            topHits = new TopDocs(tempTopHits.TotalHits, new ScoreDoc[0], tempTopHits.MaxScore);
                        }
                    }
                    else
                    {
                        topHits = c.GetTopDocs(0, numHits);
                    }
                }

                if (VERBOSE)
                {
                    if (useFrom)
                    {
                        Console.WriteLine("from=" + from + " size=" + size);
                    }
                    Console.WriteLine("  top search: " + topHits.TotalHits + " totalHits; hits=" + (topHits.ScoreDocs == null ? "null" : topHits.ScoreDocs.Length + " maxScore=" + topHits.MaxScore));
                    if (topHits.ScoreDocs != null)
                    {
                        for (int hitIDX = 0; hitIDX < topHits.ScoreDocs.Length; hitIDX++)
                        {
                            ScoreDoc sd = topHits.ScoreDocs[hitIDX];
                            Console.WriteLine("    doc=" + sd.Doc + " score=" + sd.Score);
                        }
                    }
                }

                // ... then all shards:
                Weight w = searcher.CreateNormalizedWeight(query);

                TopDocs[] shardHits = new TopDocs[subSearchers.Length];
                for (int shardIDX = 0; shardIDX < subSearchers.Length; shardIDX++)
                {
                    TopDocs       subHits;
                    ShardSearcher subSearcher = subSearchers[shardIDX];
                    if (sort == null)
                    {
                        subHits = subSearcher.Search(w, numHits);
                    }
                    else
                    {
                        TopFieldCollector c = TopFieldCollector.Create(sort, numHits, true, true, true, Random.NextBoolean());
                        subSearcher.Search(w, c);
                        subHits = c.GetTopDocs(0, numHits);
                    }

                    shardHits[shardIDX] = subHits;
                    if (VERBOSE)
                    {
                        Console.WriteLine("  shard=" + shardIDX + " " + subHits.TotalHits + " totalHits hits=" + (subHits.ScoreDocs == null ? "null" : subHits.ScoreDocs.Length.ToString()));
                        if (subHits.ScoreDocs != null)
                        {
                            foreach (ScoreDoc sd in subHits.ScoreDocs)
                            {
                                Console.WriteLine("    doc=" + sd.Doc + " score=" + sd.Score);
                            }
                        }
                    }
                }

                // Merge:
                TopDocs mergedHits;
                if (useFrom)
                {
                    mergedHits = TopDocs.Merge(sort, from, size, shardHits);
                }
                else
                {
                    mergedHits = TopDocs.Merge(sort, numHits, shardHits);
                }

                if (mergedHits.ScoreDocs != null)
                {
                    // Make sure the returned shards are correct:
                    for (int hitIDX = 0; hitIDX < mergedHits.ScoreDocs.Length; hitIDX++)
                    {
                        ScoreDoc sd = mergedHits.ScoreDocs[hitIDX];
                        Assert.AreEqual(ReaderUtil.SubIndex(sd.Doc, docStarts), sd.ShardIndex, "doc=" + sd.Doc + " wrong shard");
                    }
                }

                TestUtil.AssertEquals(topHits, mergedHits);
            }
            reader.Dispose();
            dir.Dispose();
        }

C# (CSharp) RandomIndexWriter.AddDocument Beispiele