Beispiel #1
0
        public virtual void TestRandom()
        {
            string[]        tokens   = GetRandomTokens(10);
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir  = NewDirectory();

            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, indexDir);
            var             tw       = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig    config   = new FacetsConfig();
            int             numDocs  = AtLeast(1000);
            int             numDims  = TestUtil.NextInt32(Random, 1, 7);
            IList <TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims);

            foreach (TestDoc testDoc in testDocs)
            {
                Document doc = new Document();
                doc.Add(NewStringField("content", testDoc.content, Field.Store.NO));
                testDoc.value = Random.NextSingle();
                doc.Add(new SingleDocValuesField("value", testDoc.value));
                for (int j = 0; j < numDims; j++)
                {
                    if (testDoc.dims[j] != null)
                    {
                        doc.Add(new FacetField("dim" + j, testDoc.dims[j]));
                    }
                }
                w.AddDocument(config.Build(tw, doc));
            }

            // NRT open
            IndexSearcher searcher = NewSearcher(w.GetReader());

            // NRT open
            var tr = new DirectoryTaxonomyReader(tw);

            ValueSource values = new SingleFieldSource("value");

            int iters = AtLeast(100);

            for (int iter = 0; iter < iters; iter++)
            {
                string searchToken = tokens[Random.Next(tokens.Length)];
                if (Verbose)
                {
                    Console.WriteLine("\nTEST: iter content=" + searchToken);
                }
                FacetsCollector fc = new FacetsCollector();
                FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc);
                Facets facets = new TaxonomyFacetSumValueSource(tr, config, fc, values);

                // Slow, yet hopefully bug-free, faceting:
                var expectedValues = new JCG.List <Dictionary <string, float?> >(numDims);
                for (int i = 0; i < numDims; i++)
                {
                    expectedValues.Add(new Dictionary <string, float?>());
                }

                foreach (TestDoc doc in testDocs)
                {
                    if (doc.content.Equals(searchToken, StringComparison.Ordinal))
                    {
                        for (int j = 0; j < numDims; j++)
                        {
                            if (doc.dims[j] != null)
                            {
                                if (!expectedValues[j].TryGetValue(doc.dims[j], out float?v) || v == null)
                                {
                                    expectedValues[j][doc.dims[j]] = doc.value;
                                }
                                else
                                {
                                    expectedValues[j][doc.dims[j]] = (float)v + doc.value;
                                }
                            }
                        }
                    }
                }

                JCG.List <FacetResult> expected = new JCG.List <FacetResult>();
                for (int i = 0; i < numDims; i++)
                {
                    JCG.List <LabelAndValue> labelValues = new JCG.List <LabelAndValue>();
                    float totValue = 0;
                    foreach (KeyValuePair <string, float?> ent in expectedValues[i])
                    {
                        labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value));
                        totValue += ent.Value.Value;
                    }
                    SortLabelValues(labelValues);
                    if (totValue > 0)
                    {
                        expected.Add(new FacetResult("dim" + i, new string[0], totValue, labelValues.ToArray(), labelValues.Count));
                    }
                }

                // Sort by highest value, tie break by value:
                SortFacetResults(expected);

                IList <FacetResult> actual = facets.GetAllDims(10);

                // Messy: fixup ties
                SortTies(actual);

                if (Verbose)
                {
                    Console.WriteLine("expected=\n" + expected.ToString());
                    Console.WriteLine("actual=\n" + actual.ToString());
                }

                AssertFloatValuesEquals(expected, actual);
            }

            IOUtils.Dispose(w, tw, searcher.IndexReader, tr, indexDir, taxoDir);
        }
Beispiel #2
0
        public override void BeforeClass()
        {
            base.BeforeClass();

            noDocs    = AtLeast(4096);
            distance  = (1L << 60) / noDocs;
            directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random, directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(TestUtil.NextInt32(Random, 100, 1000)).SetMergePolicy(NewLogMergePolicy()));

            FieldType storedLong = new FieldType(Int64Field.TYPE_NOT_STORED);

            storedLong.IsStored = true;
            storedLong.Freeze();

            FieldType storedLong8 = new FieldType(storedLong);

            storedLong8.NumericPrecisionStep = 8;

            FieldType storedLong4 = new FieldType(storedLong);

            storedLong4.NumericPrecisionStep = 4;

            FieldType storedLong6 = new FieldType(storedLong);

            storedLong6.NumericPrecisionStep = 6;

            FieldType storedLong2 = new FieldType(storedLong);

            storedLong2.NumericPrecisionStep = 2;

            FieldType storedLongNone = new FieldType(storedLong);

            storedLongNone.NumericPrecisionStep = int.MaxValue;

            FieldType unstoredLong = Int64Field.TYPE_NOT_STORED;

            FieldType unstoredLong8 = new FieldType(unstoredLong);

            unstoredLong8.NumericPrecisionStep = 8;

            FieldType unstoredLong6 = new FieldType(unstoredLong);

            unstoredLong6.NumericPrecisionStep = 6;

            FieldType unstoredLong4 = new FieldType(unstoredLong);

            unstoredLong4.NumericPrecisionStep = 4;

            FieldType unstoredLong2 = new FieldType(unstoredLong);

            unstoredLong2.NumericPrecisionStep = 2;

            Int64Field field8 = new Int64Field("field8", 0L, storedLong8), field6 = new Int64Field("field6", 0L, storedLong6), field4 = new Int64Field("field4", 0L, storedLong4), field2 = new Int64Field("field2", 0L, storedLong2), fieldNoTrie = new Int64Field("field" + int.MaxValue, 0L, storedLongNone), ascfield8 = new Int64Field("ascfield8", 0L, unstoredLong8), ascfield6 = new Int64Field("ascfield6", 0L, unstoredLong6), ascfield4 = new Int64Field("ascfield4", 0L, unstoredLong4), ascfield2 = new Int64Field("ascfield2", 0L, unstoredLong2);

            Document doc = new Document();

            // add fields, that have a distance to test general functionality
            doc.Add(field8);
            doc.Add(field6);
            doc.Add(field4);
            doc.Add(field2);
            doc.Add(fieldNoTrie);
            // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive
            doc.Add(ascfield8);
            doc.Add(ascfield6);
            doc.Add(ascfield4);
            doc.Add(ascfield2);

            // Add a series of noDocs docs with increasing long values, by updating the fields
            for (int l = 0; l < noDocs; l++)
            {
                long val = distance * l + startOffset;
                field8.SetInt64Value(val);
                field6.SetInt64Value(val);
                field4.SetInt64Value(val);
                field2.SetInt64Value(val);
                fieldNoTrie.SetInt64Value(val);

                val = l - (noDocs / 2);
                ascfield8.SetInt64Value(val);
                ascfield6.SetInt64Value(val);
                ascfield4.SetInt64Value(val);
                ascfield2.SetInt64Value(val);
                writer.AddDocument(doc);
            }
            reader   = writer.GetReader();
            searcher = NewSearcher(reader);
            writer.Dispose();
        }
Beispiel #3
0
        public virtual void TestDocValuesIntegration()
        {
            AssumeTrue("3.x does not support docvalues", DefaultCodecSupportsDocValues);
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null);
            RandomIndexWriter iw  = new RandomIndexWriter(Random, dir, iwc);
            Document          doc = new Document();

            doc.Add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
            doc.Add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
            doc.Add(new NumericDocValuesField("numeric", 42));
            if (DefaultCodecSupportsSortedSet)
            {
                doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
                doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
            }
            iw.AddDocument(doc);
            DirectoryReader ir = iw.GetReader();

            iw.Dispose();
            AtomicReader ar = GetOnlySegmentReader(ir);

            BytesRef scratch = new BytesRef();

            // Binary type: can be retrieved via getTerms()
            try
            {
                FieldCache.DEFAULT.GetInt32s(ar, "binary", false);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            BinaryDocValues binary = FieldCache.DEFAULT.GetTerms(ar, "binary", true);
            binary.Get(0, scratch);
            Assert.AreEqual("binary value", scratch.Utf8ToString());

            try
            {
                FieldCache.DEFAULT.GetTermsIndex(ar, "binary");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                FieldCache.DEFAULT.GetDocTermOrds(ar, "binary");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                new DocTermOrds(ar, null, "binary");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            IBits bits = FieldCache.DEFAULT.GetDocsWithField(ar, "binary");
            Assert.IsTrue(bits.Get(0));

            // Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
            try
            {
                FieldCache.DEFAULT.GetInt32s(ar, "sorted", false);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                new DocTermOrds(ar, null, "sorted");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            binary = FieldCache.DEFAULT.GetTerms(ar, "sorted", true);
            binary.Get(0, scratch);
            Assert.AreEqual("sorted value", scratch.Utf8ToString());

            SortedDocValues sorted = FieldCache.DEFAULT.GetTermsIndex(ar, "sorted");
            Assert.AreEqual(0, sorted.GetOrd(0));
            Assert.AreEqual(1, sorted.ValueCount);
            sorted.Get(0, scratch);
            Assert.AreEqual("sorted value", scratch.Utf8ToString());

            SortedSetDocValues sortedSet = FieldCache.DEFAULT.GetDocTermOrds(ar, "sorted");
            sortedSet.SetDocument(0);
            Assert.AreEqual(0, sortedSet.NextOrd());
            Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd());
            Assert.AreEqual(1, sortedSet.ValueCount);

            bits = FieldCache.DEFAULT.GetDocsWithField(ar, "sorted");
            Assert.IsTrue(bits.Get(0));

            // Numeric type: can be retrieved via getInts() and so on
            Int32s numeric = FieldCache.DEFAULT.GetInt32s(ar, "numeric", false);
            Assert.AreEqual(42, numeric.Get(0));

            try
            {
                FieldCache.DEFAULT.GetTerms(ar, "numeric", true);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                FieldCache.DEFAULT.GetTermsIndex(ar, "numeric");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                FieldCache.DEFAULT.GetDocTermOrds(ar, "numeric");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                new DocTermOrds(ar, null, "numeric");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            bits = FieldCache.DEFAULT.GetDocsWithField(ar, "numeric");
            Assert.IsTrue(bits.Get(0));

            // SortedSet type: can be retrieved via getDocTermOrds()
            if (DefaultCodecSupportsSortedSet)
            {
                try
                {
                    FieldCache.DEFAULT.GetInt32s(ar, "sortedset", false);
                    Assert.Fail();
                }
#pragma warning disable 168
                catch (InvalidOperationException expected)
#pragma warning restore 168
                {
                }

                try
                {
                    FieldCache.DEFAULT.GetTerms(ar, "sortedset", true);
                    Assert.Fail();
                }
#pragma warning disable 168
                catch (InvalidOperationException expected)
#pragma warning restore 168
                {
                }

                try
                {
                    FieldCache.DEFAULT.GetTermsIndex(ar, "sortedset");
                    Assert.Fail();
                }
#pragma warning disable 168
                catch (InvalidOperationException expected)
#pragma warning restore 168
                {
                }

                try
                {
                    new DocTermOrds(ar, null, "sortedset");
                    Assert.Fail();
                }
#pragma warning disable 168
                catch (InvalidOperationException expected)
#pragma warning restore 168
                {
                }

                sortedSet = FieldCache.DEFAULT.GetDocTermOrds(ar, "sortedset");
                sortedSet.SetDocument(0);
                Assert.AreEqual(0, sortedSet.NextOrd());
                Assert.AreEqual(1, sortedSet.NextOrd());
                Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd());
                Assert.AreEqual(2, sortedSet.ValueCount);

                bits = FieldCache.DEFAULT.GetDocsWithField(ar, "sortedset");
                Assert.IsTrue(bits.Get(0));
            }

            ir.Dispose();
            dir.Dispose();
        }
Beispiel #4
0
        private IndexContext CreateIndexContext(bool multipleFacetValuesPerDocument)
        {
            Random random    = Random;
            int    numDocs   = TestUtil.NextInt32(random, 138, 1145) * RandomMultiplier;
            int    numGroups = TestUtil.NextInt32(random, 1, numDocs / 4);
            int    numFacets = TestUtil.NextInt32(random, 1, numDocs / 6);

            if (Verbose)
            {
                Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
            }

            List <string> groups = new List <string>();

            for (int i = 0; i < numGroups; i++)
            {
                groups.Add(GenerateRandomNonEmptyString());
            }
            List <string> facetValues = new List <string>();

            for (int i = 0; i < numFacets; i++)
            {
                facetValues.Add(GenerateRandomNonEmptyString());
            }
            string[] contentBrs = new string[TestUtil.NextInt32(random, 2, 20)];
            if (Verbose)
            {
                Console.WriteLine("TEST: create fake content");
            }
            for (int contentIDX = 0; contentIDX < contentBrs.Length; contentIDX++)
            {
                contentBrs[contentIDX] = GenerateRandomNonEmptyString();
                if (Verbose)
                {
                    Console.WriteLine("  content=" + contentBrs[contentIDX]);
                }
            }

            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(
                random,
                dir,
                NewIndexWriterConfig(
                    TEST_VERSION_CURRENT,
                    new MockAnalyzer(random)
                    )
                );
            bool canUseDV = !"Lucene3x".Equals(writer.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
            bool useDv    = canUseDV && !multipleFacetValuesPerDocument && random.nextBoolean();

            Document doc               = new Document();
            Document docNoGroup        = new Document();
            Document docNoFacet        = new Document();
            Document docNoGroupNoFacet = new Document();
            Field    group             = NewStringField("group", "", Field.Store.NO);
            Field    groupDc           = new SortedDocValuesField("group_dv", new BytesRef());

            if (useDv)
            {
                doc.Add(groupDc);
                docNoFacet.Add(groupDc);
            }
            doc.Add(group);
            docNoFacet.Add(group);
            Field[] facetFields;
            if (useDv)
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(!multipleFacetValuesPerDocument);
                }
                facetFields    = new Field[2];
                facetFields[0] = NewStringField("facet", "", Field.Store.NO);
                doc.Add(facetFields[0]);
                docNoGroup.Add(facetFields[0]);
                facetFields[1] = new SortedDocValuesField("facet_dv", new BytesRef());
                doc.Add(facetFields[1]);
                docNoGroup.Add(facetFields[1]);
            }
            else
            {
                facetFields = multipleFacetValuesPerDocument ? new Field[2 + random.nextInt(6)] : new Field[1];
                for (int i = 0; i < facetFields.Length; i++)
                {
                    facetFields[i] = NewStringField("facet", "", Field.Store.NO);
                    doc.Add(facetFields[i]);
                    docNoGroup.Add(facetFields[i]);
                }
            }
            Field content = NewStringField("content", "", Field.Store.NO);

            doc.Add(content);
            docNoGroup.Add(content);
            docNoFacet.Add(content);
            docNoGroupNoFacet.Add(content);

            ISet <string> uniqueFacetValues = new JCG.SortedSet <string>(Comparer <string> .Create((a, b) => {
                if (a == b)
                {
                    return(0);
                }
                else if (a == null)
                {
                    return(-1);
                }
                else if (b == null)
                {
                    return(1);
                }
                else
                {
                    return(a.CompareToOrdinal(b));
                }
            }));

            // LUCENENET NOTE: Need JCG.Dictionary here because of null keys
            IDictionary <string, JCG.Dictionary <string, ISet <string> > > searchTermToFacetToGroups = new Dictionary <string, JCG.Dictionary <string, ISet <string> > >();
            int facetWithMostGroups = 0;

            for (int i = 0; i < numDocs; i++)
            {
                string groupValue;
                if (random.nextInt(24) == 17)
                {
                    // So we test the "doc doesn't have the group'd
                    // field" case:
                    if (useDv)
                    {
                        groupValue = "";
                    }
                    else
                    {
                        groupValue = null;
                    }
                }
                else
                {
                    groupValue = groups[random.nextInt(groups.size())];
                }

                string contentStr = contentBrs[random.nextInt(contentBrs.Length)];
                if (!searchTermToFacetToGroups.TryGetValue(contentStr, out JCG.Dictionary <string, ISet <string> > facetToGroups))
                {
                    searchTermToFacetToGroups[contentStr] = facetToGroups = new JCG.Dictionary <string, ISet <string> >();
                }

                List <string> facetVals = new List <string>();
                if (useDv || random.nextInt(24) != 18)
                {
                    if (useDv)
                    {
                        string facetValue = facetValues[random.nextInt(facetValues.size())];
                        uniqueFacetValues.Add(facetValue);
                        if (!facetToGroups.TryGetValue(facetValue, out ISet <string> groupsInFacet))
                        {
                            facetToGroups[facetValue] = groupsInFacet = new JCG.HashSet <string>();
                        }
                        groupsInFacet.add(groupValue);
                        if (groupsInFacet.size() > facetWithMostGroups)
                        {
                            facetWithMostGroups = groupsInFacet.size();
                        }
                        facetFields[0].SetStringValue(facetValue);
                        facetFields[1].SetBytesValue(new BytesRef(facetValue));
                        facetVals.Add(facetValue);
                    }
                    else
                    {
                        foreach (Field facetField in facetFields)
                        {
                            string facetValue = facetValues[random.nextInt(facetValues.size())];
                            uniqueFacetValues.Add(facetValue);
                            if (!facetToGroups.TryGetValue(facetValue, out ISet <string> groupsInFacet))
                            {
                                facetToGroups[facetValue] = groupsInFacet = new JCG.HashSet <string>();
                            }
                            groupsInFacet.add(groupValue);
                            if (groupsInFacet.size() > facetWithMostGroups)
                            {
                                facetWithMostGroups = groupsInFacet.size();
                            }
                            facetField.SetStringValue(facetValue);
                            facetVals.Add(facetValue);
                        }
                    }
                }
                else
                {
                    uniqueFacetValues.Add(null);
                    if (!facetToGroups.TryGetValue(null, out ISet <string> groupsInFacet))
                    {
                        facetToGroups[null] = groupsInFacet = new JCG.HashSet <string>();
                    }
                    groupsInFacet.add(groupValue);
                    if (groupsInFacet.size() > facetWithMostGroups)
                    {
                        facetWithMostGroups = groupsInFacet.size();
                    }
                }

                if (Verbose)
                {
                    Console.WriteLine("  doc content=" + contentStr + " group=" + (groupValue ?? "null") + " facetVals=" + Collections.ToString(facetVals));
                }

                if (groupValue != null)
                {
                    if (useDv)
                    {
                        groupDc.SetBytesValue(new BytesRef(groupValue));
                    }
                    group.SetStringValue(groupValue);
                }
                else if (useDv)
                {
                    // DV cannot have missing values:
                    groupDc.SetBytesValue(new BytesRef());
                }
                content.SetStringValue(contentStr);
                if (groupValue == null && facetVals.Count == 0)
                {
                    writer.AddDocument(docNoGroupNoFacet);
                }
                else if (facetVals.Count == 0)
                {
                    writer.AddDocument(docNoFacet);
                }
                else if (groupValue == null)
                {
                    writer.AddDocument(docNoGroup);
                }
                else
                {
                    writer.AddDocument(doc);
                }
            }

            DirectoryReader reader = writer.GetReader();

            writer.Dispose();

            return(new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues, useDv));
        }
        public virtual void TestPayloadsPos0()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir, new MockPayloadAnalyzer());
            Document doc = new Document();

            doc.Add(new TextField("content", new StringReader("a a b c d e a f g h i j a b k k")));
            writer.AddDocument(doc);

            IndexReader  readerFromWriter = writer.GetReader();
            AtomicReader r = SlowCompositeReaderWrapper.Wrap(readerFromWriter);

            DocsAndPositionsEnum tp = r.GetTermPositionsEnum(new Term("content", "a"));

            int count = 0;

            Assert.IsTrue(tp.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            // "a" occurs 4 times
            Assert.AreEqual(4, tp.Freq);
            Assert.AreEqual(0, tp.NextPosition());
            Assert.AreEqual(1, tp.NextPosition());
            Assert.AreEqual(3, tp.NextPosition());
            Assert.AreEqual(6, tp.NextPosition());

            // only one doc has "a"
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, tp.NextDoc());

            IndexSearcher @is = NewSearcher(readerFromWriter);

            SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
            SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));

            SpanQuery[]   sqs = new SpanQuery[] { stq1, stq2 };
            SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);

            count = 0;
            bool sawZero = false;

            if (Verbose)
            {
                Console.WriteLine("\ngetPayloadSpans test");
            }
            Search.Spans.Spans pspans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq);
            while (pspans.Next())
            {
                if (Verbose)
                {
                    Console.WriteLine("doc " + pspans.Doc + ": span " + pspans.Start + " to " + pspans.End);
                }
                var payloads = pspans.GetPayload();
                sawZero |= pspans.Start == 0;
                foreach (var bytes in payloads)
                {
                    count++;
                    if (Verbose)
                    {
                        Console.WriteLine("  payload: " + Encoding.UTF8.GetString(bytes));
                    }
                }
            }
            Assert.IsTrue(sawZero);
            Assert.AreEqual(5, count);

            // System.out.println("\ngetSpans test");
            Search.Spans.Spans spans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq);
            count   = 0;
            sawZero = false;
            while (spans.Next())
            {
                count++;
                sawZero |= spans.Start == 0;
                // System.out.println(spans.Doc() + " - " + spans.Start() + " - " +
                // spans.End());
            }
            Assert.AreEqual(4, count);
            Assert.IsTrue(sawZero);

            // System.out.println("\nPayloadSpanUtil test");

            sawZero = false;
            PayloadSpanUtil psu = new PayloadSpanUtil(@is.TopReaderContext);
            var             pls = psu.GetPayloadsForQuery(snq);

            count = pls.Count;
            foreach (var bytes in pls)
            {
                string s = Encoding.UTF8.GetString(bytes);
                //System.out.println(s);
                sawZero |= s.Equals("pos: 0", StringComparison.Ordinal);
            }
            Assert.AreEqual(5, count);
            Assert.IsTrue(sawZero);
            writer.Dispose();
            @is.IndexReader.Dispose();
            dir.Dispose();
        }
Beispiel #6
0
        public virtual void TestSurrogatesOrder()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                                                          .SetCodec(new PreFlexRWCodec()));

            int numField = TestUtil.NextInt32(Random, 2, 5);

            int uniqueTermCount = 0;

            int tc = 0;

            var fieldTerms = new List <Term>();

            for (int f = 0; f < numField; f++)
            {
                string field    = "f" + f;
                int    numTerms = AtLeast(200);

                ISet <string> uniqueTerms = new HashSet <string>();

                for (int i = 0; i < numTerms; i++)
                {
                    string term = GetRandomString(Random) + "_ " + (tc++);
                    uniqueTerms.Add(term);
                    fieldTerms.Add(new Term(field, term));
                    Documents.Document doc = new Documents.Document();
                    doc.Add(NewStringField(field, term, Field.Store.NO));
                    w.AddDocument(doc);
                }
                uniqueTermCount += uniqueTerms.Count;
            }

            IndexReader reader = w.GetReader();

            if (VERBOSE)
            {
                fieldTerms.Sort(TermAsUTF16Comparer);

                Console.WriteLine("\nTEST: UTF16 order");
                foreach (Term t in fieldTerms)
                {
                    Console.WriteLine("  " + ToHexString(t));
                }
            }

            // sorts in code point order:
            fieldTerms.Sort();

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: codepoint order");
                foreach (Term t in fieldTerms)
                {
                    Console.WriteLine("  " + ToHexString(t));
                }
            }

            Term[] fieldTermsArray = fieldTerms.ToArray();

            //SegmentInfo si = makePreFlexSegment(r, "_0", dir, fieldInfos, codec, fieldTerms);

            //FieldsProducer fields = codec.fieldsProducer(new SegmentReadState(dir, si, fieldInfos, 1024, 1));
            //Assert.IsNotNull(fields);

            DoTestStraightEnum(fieldTerms, reader, uniqueTermCount);
            DoTestSeekExists(Random, fieldTerms, reader);
            DoTestSeekDoesNotExist(Random, numField, fieldTerms, fieldTermsArray, reader);

            reader.Dispose();
            w.Dispose();
            dir.Dispose();
        }
        public override void BeforeClass()
        {
            base.BeforeClass();

            NoDocs    = AtLeast(4096);
            Distance  = (1 << 30) / NoDocs;
            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random, Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(TestUtil.NextInt32(Random, 100, 1000)).SetMergePolicy(NewLogMergePolicy()));

            FieldType storedInt = new FieldType(Int32Field.TYPE_NOT_STORED);

            storedInt.IsStored = true;
            storedInt.Freeze();

            FieldType storedInt8 = new FieldType(storedInt);

            storedInt8.NumericPrecisionStep = 8;

            FieldType storedInt4 = new FieldType(storedInt);

            storedInt4.NumericPrecisionStep = 4;

            FieldType storedInt2 = new FieldType(storedInt);

            storedInt2.NumericPrecisionStep = 2;

            FieldType storedIntNone = new FieldType(storedInt);

            storedIntNone.NumericPrecisionStep = int.MaxValue;

            FieldType unstoredInt = Int32Field.TYPE_NOT_STORED;

            FieldType unstoredInt8 = new FieldType(unstoredInt);

            unstoredInt8.NumericPrecisionStep = 8;

            FieldType unstoredInt4 = new FieldType(unstoredInt);

            unstoredInt4.NumericPrecisionStep = 4;

            FieldType unstoredInt2 = new FieldType(unstoredInt);

            unstoredInt2.NumericPrecisionStep = 2;

            Int32Field field8 = new Int32Field("field8", 0, storedInt8), field4 = new Int32Field("field4", 0, storedInt4), field2 = new Int32Field("field2", 0, storedInt2), fieldNoTrie = new Int32Field("field" + int.MaxValue, 0, storedIntNone), ascfield8 = new Int32Field("ascfield8", 0, unstoredInt8), ascfield4 = new Int32Field("ascfield4", 0, unstoredInt4), ascfield2 = new Int32Field("ascfield2", 0, unstoredInt2);

            Document doc = new Document();

            // add fields, that have a distance to test general functionality
            doc.Add(field8);
            doc.Add(field4);
            doc.Add(field2);
            doc.Add(fieldNoTrie);
            // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive
            doc.Add(ascfield8);
            doc.Add(ascfield4);
            doc.Add(ascfield2);

            // Add a series of noDocs docs with increasing int values
            for (int l = 0; l < NoDocs; l++)
            {
                int val = Distance * l + StartOffset;
                field8.SetInt32Value(val);
                field4.SetInt32Value(val);
                field2.SetInt32Value(val);
                fieldNoTrie.SetInt32Value(val);

                val = l - (NoDocs / 2);
                ascfield8.SetInt32Value(val);
                ascfield4.SetInt32Value(val);
                ascfield2.SetInt32Value(val);
                writer.AddDocument(doc);
            }

            Reader   = writer.GetReader();
            Searcher = NewSearcher(Reader);
            writer.Dispose();
        }
Beispiel #8
0
        private void ExecuteRandomJoin(bool multipleValuesPerDocument, int maxIndexIter, int maxSearchIter,
                                       int numberOfDocumentsToIndex)
        {
            for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++)
            {
                if (Verbose)
                {
                    Console.WriteLine("indexIter=" + indexIter);
                }
                Directory         dir = NewDirectory();
                RandomIndexWriter w   = new RandomIndexWriter(Random, dir,
                                                              NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false))
                                                              .SetMergePolicy(NewLogMergePolicy()));
                bool scoreDocsInOrder         = TestJoinUtil.Random.NextBoolean();
                IndexIterationContext context = CreateContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument,
                                                              scoreDocsInOrder);

                IndexReader topLevelReader = w.GetReader();
                w.Dispose();
                for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++)
                {
                    if (Verbose)
                    {
                        Console.WriteLine("searchIter=" + searchIter);
                    }
                    IndexSearcher indexSearcher = NewSearcher(topLevelReader);

                    int         r              = Random.Next(context.RandomUniqueValues.Length);
                    bool        from           = context.RandomFrom[r];
                    string      randomValue    = context.RandomUniqueValues[r];
                    FixedBitSet expectedResult = CreateExpectedResult(randomValue, from, indexSearcher.IndexReader,
                                                                      context);

                    Query actualQuery = new TermQuery(new Term("value", randomValue));
                    if (Verbose)
                    {
                        Console.WriteLine("actualQuery=" + actualQuery);
                    }

                    var       scoreModeLength = Enum.GetNames(typeof(ScoreMode)).Length;
                    ScoreMode scoreMode       = (ScoreMode)Random.Next(scoreModeLength);
                    if (Verbose)
                    {
                        Console.WriteLine("scoreMode=" + scoreMode);
                    }

                    Query joinQuery;
                    if (from)
                    {
                        joinQuery = JoinUtil.CreateJoinQuery("from", multipleValuesPerDocument, "to", actualQuery,
                                                             indexSearcher, scoreMode);
                    }
                    else
                    {
                        joinQuery = JoinUtil.CreateJoinQuery("to", multipleValuesPerDocument, "from", actualQuery,
                                                             indexSearcher, scoreMode);
                    }
                    if (Verbose)
                    {
                        Console.WriteLine("joinQuery=" + joinQuery);
                    }

                    // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
                    FixedBitSet          actualResult         = new FixedBitSet(indexSearcher.IndexReader.MaxDoc);
                    TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(10, false);
                    indexSearcher.Search(joinQuery,
                                         new CollectorAnonymousInnerClassHelper2(scoreDocsInOrder, actualResult,
                                                                                 topScoreDocCollector));
                    // Asserting bit set...
                    if (Verbose)
                    {
                        Console.WriteLine("expected cardinality:" + expectedResult.Cardinality());
                        DocIdSetIterator iterator = expectedResult.GetIterator();
                        for (int doc = iterator.NextDoc();
                             doc != DocIdSetIterator.NO_MORE_DOCS;
                             doc = iterator.NextDoc())
                        {
                            Console.WriteLine(string.Format("Expected doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id")));
                        }
                        Console.WriteLine("actual cardinality:" + actualResult.Cardinality());
                        iterator = actualResult.GetIterator();
                        for (int doc = iterator.NextDoc();
                             doc != DocIdSetIterator.NO_MORE_DOCS;
                             doc = iterator.NextDoc())
                        {
                            Console.WriteLine(string.Format("Actual doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id")));
                        }
                    }
                    assertEquals(expectedResult, actualResult);

                    // Asserting TopDocs...
                    TopDocs expectedTopDocs = CreateExpectedTopDocs(randomValue, from, scoreMode, context);
                    TopDocs actualTopDocs   = topScoreDocCollector.GetTopDocs();
                    assertEquals(expectedTopDocs.TotalHits, actualTopDocs.TotalHits);
                    assertEquals(expectedTopDocs.ScoreDocs.Length, actualTopDocs.ScoreDocs.Length);
                    if (scoreMode == ScoreMode.None)
                    {
                        continue;
                    }

                    assertEquals(expectedTopDocs.MaxScore, actualTopDocs.MaxScore, 0.0f);
                    for (int i = 0; i < expectedTopDocs.ScoreDocs.Length; i++)
                    {
                        if (Verbose)
                        {
                            string.Format("Expected doc: {0} | Actual doc: {1}\n", expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc);
                            string.Format("Expected score: {0} | Actual score: {1}\n", expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score);
                        }
                        assertEquals(expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc);
                        assertEquals(expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score, 0.0f);
                        Explanation explanation = indexSearcher.Explain(joinQuery, expectedTopDocs.ScoreDocs[i].Doc);
                        assertEquals(expectedTopDocs.ScoreDocs[i].Score, explanation.Value, 0.0f);
                    }
                }
                topLevelReader.Dispose();
                dir.Dispose();
            }
        }
Beispiel #9
0
        private IndexIterationContext CreateContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter,
                                                    bool multipleValuesPerDocument, bool scoreDocsInOrder)
        {
            IndexIterationContext context = new IndexIterationContext();
            int numRandomValues           = nDocs / 2;

            context.RandomUniqueValues = new string[numRandomValues];
            ISet <string> trackSet = new JCG.HashSet <string>();

            context.RandomFrom = new bool[numRandomValues];
            for (int i = 0; i < numRandomValues; i++)
            {
                string uniqueRandomValue;
                do
                {
                    uniqueRandomValue = TestUtil.RandomRealisticUnicodeString(Random);
                    //        uniqueRandomValue = TestUtil.randomSimpleString(random);
                } while ("".Equals(uniqueRandomValue, StringComparison.Ordinal) || trackSet.Contains(uniqueRandomValue));
                // Generate unique values and empty strings aren't allowed.
                trackSet.Add(uniqueRandomValue);
                context.RandomFrom[i]         = Random.NextBoolean();
                context.RandomUniqueValues[i] = uniqueRandomValue;
            }

            RandomDoc[] docs = new RandomDoc[nDocs];
            for (int i = 0; i < nDocs; i++)
            {
                string   id       = Convert.ToString(i);
                int      randomI  = Random.Next(context.RandomUniqueValues.Length);
                string   value    = context.RandomUniqueValues[randomI];
                Document document = new Document();
                document.Add(NewTextField(Random, "id", id, Field.Store.NO));
                document.Add(NewTextField(Random, "value", value, Field.Store.NO));

                bool from = context.RandomFrom[randomI];
                int  numberOfLinkValues = multipleValuesPerDocument ? 2 + Random.Next(10) : 1;
                docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
                for (int j = 0; j < numberOfLinkValues; j++)
                {
                    string linkValue = context.RandomUniqueValues[Random.Next(context.RandomUniqueValues.Length)];
                    docs[i].linkValues.Add(linkValue);
                    if (from)
                    {
                        if (!context.FromDocuments.TryGetValue(linkValue, out IList <RandomDoc> fromDocs))
                        {
                            context.FromDocuments[linkValue] = fromDocs = new List <RandomDoc>();
                        }
                        if (!context.RandomValueFromDocs.TryGetValue(value, out IList <RandomDoc> randomValueFromDocs))
                        {
                            context.RandomValueFromDocs[value] = randomValueFromDocs = new List <RandomDoc>();
                        }

                        fromDocs.Add(docs[i]);
                        randomValueFromDocs.Add(docs[i]);
                        document.Add(NewTextField(Random, "from", linkValue, Field.Store.NO));
                    }
                    else
                    {
                        if (!context.ToDocuments.TryGetValue(linkValue, out IList <RandomDoc> toDocuments))
                        {
                            context.ToDocuments[linkValue] = toDocuments = new List <RandomDoc>();
                        }
                        if (!context.RandomValueToDocs.TryGetValue(value, out IList <RandomDoc> randomValueToDocs))
                        {
                            context.RandomValueToDocs[value] = randomValueToDocs = new List <RandomDoc>();
                        }

                        toDocuments.Add(docs[i]);
                        randomValueToDocs.Add(docs[i]);
                        document.Add(NewTextField(Random, "to", linkValue, Field.Store.NO));
                    }
                }

                RandomIndexWriter w;
                if (from)
                {
                    w = fromWriter;
                }
                else
                {
                    w = toWriter;
                }

                w.AddDocument(document);
                if (Random.Next(10) == 4)
                {
                    w.Commit();
                }
                if (Verbose)
                {
                    Console.WriteLine("Added document[" + docs[i].id + "]: " + document);
                }
            }

            // Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for
            // any ScoreMode.
            IndexSearcher fromSearcher = NewSearcher(fromWriter.GetReader());
            IndexSearcher toSearcher   = NewSearcher(toWriter.GetReader());

            for (int i = 0; i < context.RandomUniqueValues.Length; i++)
            {
                string uniqueRandomValue = context.RandomUniqueValues[i];
                string fromField;
                string toField;
                IDictionary <string, IDictionary <int, JoinScore> > queryVals;
                if (context.RandomFrom[i])
                {
                    fromField = "from";
                    toField   = "to";
                    queryVals = context.FromHitsToJoinScore;
                }
                else
                {
                    fromField = "to";
                    toField   = "from";
                    queryVals = context.ToHitsToJoinScore;
                }
                IDictionary <BytesRef, JoinScore> joinValueToJoinScores = new Dictionary <BytesRef, JoinScore>();
                if (multipleValuesPerDocument)
                {
                    fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)),
                                        new CollectorAnonymousInnerClassHelper3(fromField, joinValueToJoinScores));
                }
                else
                {
                    fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)),
                                        new CollectorAnonymousInnerClassHelper4(fromField, joinValueToJoinScores));
                }

                IDictionary <int, JoinScore> docToJoinScore = new Dictionary <int, JoinScore>();
                if (multipleValuesPerDocument)
                {
                    if (scoreDocsInOrder)
                    {
                        AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.Wrap(toSearcher.IndexReader);
                        Terms        terms = slowCompositeReader.GetTerms(toField);
                        if (terms != null)
                        {
                            DocsEnum  docsEnum  = null;
                            TermsEnum termsEnum = null;
                            JCG.SortedSet <BytesRef> joinValues =
                                new JCG.SortedSet <BytesRef>(BytesRef.UTF8SortedAsUnicodeComparer);
                            joinValues.UnionWith(joinValueToJoinScores.Keys);
                            foreach (BytesRef joinValue in joinValues)
                            {
                                termsEnum = terms.GetEnumerator(termsEnum);
                                if (termsEnum.SeekExact(joinValue))
                                {
                                    docsEnum = termsEnum.Docs(slowCompositeReader.LiveDocs, docsEnum, DocsFlags.NONE);
                                    JoinScore joinScore = joinValueToJoinScores[joinValue];

                                    for (int doc = docsEnum.NextDoc();
                                         doc != DocIdSetIterator.NO_MORE_DOCS;
                                         doc = docsEnum.NextDoc())
                                    {
                                        // First encountered join value determines the score.
                                        // Something to keep in mind for many-to-many relations.
                                        if (!docToJoinScore.ContainsKey(doc))
                                        {
                                            docToJoinScore[doc] = joinScore;
                                        }
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        toSearcher.Search(new MatchAllDocsQuery(),
                                          new CollectorAnonymousInnerClassHelper5(toField, joinValueToJoinScores,
                                                                                  docToJoinScore));
                    }
                }
                else
                {
                    toSearcher.Search(new MatchAllDocsQuery(),
                                      new CollectorAnonymousInnerClassHelper6(toField, joinValueToJoinScores,
                                                                              docToJoinScore));
                }
                queryVals[uniqueRandomValue] = docToJoinScore;
            }

            fromSearcher.IndexReader.Dispose();
            toSearcher.IndexReader.Dispose();

            return(context);
        }
Beispiel #10
0
        public void TestSimpleWithScoring()
        {
            const string idField = "id";
            const string toField = "movieId";

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir,
                                                          NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                                                          .SetMergePolicy(NewLogMergePolicy()));

            // 0
            Document doc = new Document();

            doc.Add(new TextField("description", "A random movie", Field.Store.NO));
            doc.Add(new TextField("name", "Movie 1", Field.Store.NO));
            doc.Add(new TextField(idField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            doc.Add(new TextField("subtitle", "The first subtitle of this movie", Field.Store.NO));
            doc.Add(new TextField(idField, "2", Field.Store.NO));
            doc.Add(new TextField(toField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            doc.Add(new TextField("subtitle", "random subtitle; random event movie", Field.Store.NO));
            doc.Add(new TextField(idField, "3", Field.Store.NO));
            doc.Add(new TextField(toField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 3
            doc = new Document();
            doc.Add(new TextField("description", "A second random movie", Field.Store.NO));
            doc.Add(new TextField("name", "Movie 2", Field.Store.NO));
            doc.Add(new TextField(idField, "4", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit();

            // 4
            doc = new Document();
            doc.Add(new TextField("subtitle", "a very random event happened during christmas night", Field.Store.NO));
            doc.Add(new TextField(idField, "5", Field.Store.NO));
            doc.Add(new TextField(toField, "4", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            doc.Add(new TextField("subtitle", "movie end movie test 123 test 123 random", Field.Store.NO));
            doc.Add(new TextField(idField, "6", Field.Store.NO));
            doc.Add(new TextField(toField, "4", Field.Store.NO));
            w.AddDocument(doc);

            IndexSearcher indexSearcher = new IndexSearcher(w.GetReader());

            w.Dispose();

            // Search for movie via subtitle
            Query joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField,
                                                       new TermQuery(new Term("subtitle", "random")), indexSearcher, ScoreMode.Max);
            TopDocs result = indexSearcher.Search(joinQuery, 10);

            assertEquals(2, result.TotalHits);
            assertEquals(0, result.ScoreDocs[0].Doc);
            assertEquals(3, result.ScoreDocs[1].Doc);

            // Score mode max.
            joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
                                                 indexSearcher, ScoreMode.Max);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(2, result.TotalHits);
            assertEquals(3, result.ScoreDocs[0].Doc);
            assertEquals(0, result.ScoreDocs[1].Doc);

            // Score mode total
            joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
                                                 indexSearcher, ScoreMode.Total);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(2, result.TotalHits);
            assertEquals(0, result.ScoreDocs[0].Doc);
            assertEquals(3, result.ScoreDocs[1].Doc);

            //Score mode avg
            joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")),
                                                 indexSearcher, ScoreMode.Avg);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(2, result.TotalHits);
            assertEquals(3, result.ScoreDocs[0].Doc);
            assertEquals(0, result.ScoreDocs[1].Doc);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
Beispiel #11
0
        public void TestSimple()
        {
            const string idField = "id";
            const string toField = "productId";

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir,
                                                          NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                                                          .SetMergePolicy(NewLogMergePolicy()));

            // 0
            Document doc = new Document();

            doc.Add(new TextField("description", "random text", Field.Store.NO));
            doc.Add(new TextField("name", "name1", Field.Store.NO));
            doc.Add(new TextField(idField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            doc.Add(new TextField("price", "10.0", Field.Store.NO));
            doc.Add(new TextField(idField, "2", Field.Store.NO));
            doc.Add(new TextField(toField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            doc.Add(new TextField("price", "20.0", Field.Store.NO));
            doc.Add(new TextField(idField, "3", Field.Store.NO));
            doc.Add(new TextField(toField, "1", Field.Store.NO));
            w.AddDocument(doc);

            // 3
            doc = new Document();
            doc.Add(new TextField("description", "more random text", Field.Store.NO));
            doc.Add(new TextField("name", "name2", Field.Store.NO));
            doc.Add(new TextField(idField, "4", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit();

            // 4
            doc = new Document();
            doc.Add(new TextField("price", "10.0", Field.Store.NO));
            doc.Add(new TextField(idField, "5", Field.Store.NO));
            doc.Add(new TextField(toField, "4", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            doc.Add(new TextField("price", "20.0", Field.Store.NO));
            doc.Add(new TextField(idField, "6", Field.Store.NO));
            doc.Add(new TextField(toField, "4", Field.Store.NO));
            w.AddDocument(doc);

            IndexSearcher indexSearcher = new IndexSearcher(w.GetReader());

            w.Dispose();

            // Search for product
            Query joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name2")),
                                                       indexSearcher, ScoreMode.None);

            TopDocs result = indexSearcher.Search(joinQuery, 10);

            assertEquals(2, result.TotalHits);
            assertEquals(4, result.ScoreDocs[0].Doc);
            assertEquals(5, result.ScoreDocs[1].Doc);

            joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name1")),
                                                 indexSearcher, ScoreMode.None);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(2, result.TotalHits);
            assertEquals(1, result.ScoreDocs[0].Doc);
            assertEquals(2, result.ScoreDocs[1].Doc);

            // Search for offer
            joinQuery = JoinUtil.CreateJoinQuery(toField, false, idField, new TermQuery(new Term("id", "5")),
                                                 indexSearcher, ScoreMode.None);
            result = indexSearcher.Search(joinQuery, 10);
            assertEquals(1, result.TotalHits);
            assertEquals(3, result.ScoreDocs[0].Doc);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
Beispiel #12
0
        public void TestInsideBooleanQuery()
        {
            const string idField = "id";
            const string toField = "productId";

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir,
                                                          NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                                                          .SetMergePolicy(NewLogMergePolicy()));

            // 0
            Document doc = new Document();

            doc.Add(new TextField("description", "random text", Field.Store.NO));
            doc.Add(new TextField("name", "name1", Field.Store.NO));
            doc.Add(new TextField(idField, "7", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            doc.Add(new TextField("price", "10.0", Field.Store.NO));
            doc.Add(new TextField(idField, "2", Field.Store.NO));
            doc.Add(new TextField(toField, "7", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            doc.Add(new TextField("price", "20.0", Field.Store.NO));
            doc.Add(new TextField(idField, "3", Field.Store.NO));
            doc.Add(new TextField(toField, "7", Field.Store.NO));
            w.AddDocument(doc);

            // 3
            doc = new Document();
            doc.Add(new TextField("description", "more random text", Field.Store.NO));
            doc.Add(new TextField("name", "name2", Field.Store.NO));
            doc.Add(new TextField(idField, "0", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit();

            // 4
            doc = new Document();
            doc.Add(new TextField("price", "10.0", Field.Store.NO));
            doc.Add(new TextField(idField, "5", Field.Store.NO));
            doc.Add(new TextField(toField, "0", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            doc.Add(new TextField("price", "20.0", Field.Store.NO));
            doc.Add(new TextField(idField, "6", Field.Store.NO));
            doc.Add(new TextField(toField, "0", Field.Store.NO));
            w.AddDocument(doc);

            w.ForceMerge(1);

            IndexSearcher indexSearcher = new IndexSearcher(w.GetReader());

            w.Dispose();

            // Search for product
            Query joinQuery = JoinUtil.CreateJoinQuery(idField, false, toField,
                                                       new TermQuery(new Term("description", "random")), indexSearcher, ScoreMode.Avg);

            BooleanQuery bq = new BooleanQuery();

            bq.Add(joinQuery, Occur.SHOULD);
            bq.Add(new TermQuery(new Term("id", "3")), Occur.SHOULD);

            indexSearcher.Search(bq, new CollectorAnonymousInnerClassHelper());

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
Beispiel #13
0
        public virtual void TestRandomSampling()
        {
            Directory dir     = NewDirectory();
            Directory taxoDir = NewDirectory();

            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            RandomIndexWriter       writer     = new RandomIndexWriter(Random, dir);

            FacetsConfig config = new FacetsConfig();

            int numDocs = AtLeast(10000);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new StringField("EvenOdd", (i % 2 == 0) ? "even" : "odd", Store.NO));
                doc.Add(new FacetField("iMod10", Convert.ToString(i % 10, CultureInfo.InvariantCulture)));
                writer.AddDocument(config.Build(taxoWriter, doc));
            }
            Random random = Random;

            // NRT open
            IndexSearcher searcher   = NewSearcher(writer.GetReader());
            var           taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            IOUtils.Dispose(writer, taxoWriter);

            // Test empty results
            RandomSamplingFacetsCollector collectRandomZeroResults = new RandomSamplingFacetsCollector(numDocs / 10, random.NextInt64());

            // There should be no divisions by zero
            searcher.Search(new TermQuery(new Term("EvenOdd", "NeverMatches")), collectRandomZeroResults);

            // There should be no divisions by zero and no null result
            Assert.IsNotNull(collectRandomZeroResults.GetMatchingDocs());

            // There should be no results at all
            foreach (MatchingDocs doc in collectRandomZeroResults.GetMatchingDocs())
            {
                Assert.AreEqual(0, doc.TotalHits);
            }

            // Now start searching and retrieve results.

            // Use a query to select half of the documents.
            TermQuery query = new TermQuery(new Term("EvenOdd", "even"));

            // there will be 5 facet values (0, 2, 4, 6 and 8), as only the even (i %
            // 10) are hits.
            // there is a REAL small chance that one of the 5 values will be missed when
            // sampling.
            // but is that 0.8 (chance not to take a value) ^ 2000 * 5 (any can be
            // missing) ~ 10^-193
            // so that is probably not going to happen.
            int maxNumChildren = 5;

            RandomSamplingFacetsCollector random100Percent = new RandomSamplingFacetsCollector(numDocs, random.NextInt64());      // no sampling
            RandomSamplingFacetsCollector random10Percent  = new RandomSamplingFacetsCollector(numDocs / 10, random.NextInt64()); // 10 % of total docs, 20% of the hits

            FacetsCollector fc = new FacetsCollector();

            searcher.Search(query, MultiCollector.Wrap(fc, random100Percent, random10Percent));

            FastTaxonomyFacetCounts random10FacetCounts  = new FastTaxonomyFacetCounts(taxoReader, config, random10Percent);
            FastTaxonomyFacetCounts random100FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random100Percent);
            FastTaxonomyFacetCounts exactFacetCounts     = new FastTaxonomyFacetCounts(taxoReader, config, fc);

            FacetResult random10Result  = random10Percent.AmortizeFacetCounts(random10FacetCounts.GetTopChildren(10, "iMod10"), config, searcher);
            FacetResult random100Result = random100FacetCounts.GetTopChildren(10, "iMod10");
            FacetResult exactResult     = exactFacetCounts.GetTopChildren(10, "iMod10");

            Assert.AreEqual(random100Result, exactResult);

            // we should have five children, but there is a small chance we have less.
            // (see above).
            Assert.IsTrue(random10Result.ChildCount <= maxNumChildren);
            // there should be one child at least.
            Assert.IsTrue(random10Result.ChildCount >= 1);

            // now calculate some statistics to determine if the sampled result is 'ok'.
            // because random sampling is used, the results will vary each time.
            int sum = 0;

            foreach (LabelAndValue lav in random10Result.LabelValues)
            {
                sum += (int)lav.Value;
            }
            float mu = (float)sum / (float)maxNumChildren;

            float variance = 0;

            foreach (LabelAndValue lav in random10Result.LabelValues)
            {
                variance += (float)Math.Pow((mu - (int)lav.Value), 2);
            }
            variance = variance / maxNumChildren;
            float sigma = (float)Math.Sqrt(variance);

            // we query only half the documents and have 5 categories. The average
            // number of docs in a category will thus be the total divided by 5*2
            float targetMu = numDocs / (5.0f * 2.0f);

            // the average should be in the range and the standard deviation should not
            // be too great
            Assert.IsTrue(sigma < 200);
            Assert.IsTrue(targetMu - 3 * sigma < mu && mu < targetMu + 3 * sigma);

            IOUtils.Dispose(searcher.IndexReader, taxoReader, dir, taxoDir);
        }
Beispiel #14
0
        public virtual void TestBasic()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            FacetsConfig config = new FacetsConfig();

            // Reused across documents, to add the necessary facet
            // fields:
            Document doc = new Document();

            doc.Add(new Int32Field("num", 10, Field.Store.NO));
            doc.Add(new FacetField("Author", "Bob"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new Int32Field("num", 20, Field.Store.NO));
            doc.Add(new FacetField("Author", "Lisa"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new Int32Field("num", 30, Field.Store.NO));
            doc.Add(new FacetField("Author", "Lisa"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new Int32Field("num", 40, Field.Store.NO));
            doc.Add(new FacetField("Author", "Susan"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            doc = new Document();
            doc.Add(new Int32Field("num", 45, Field.Store.NO));
            doc.Add(new FacetField("Author", "Frank"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            writer.Dispose();

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            taxoWriter.Dispose();

            // Aggregate the facet counts:
            FacetsCollector c = new FacetsCollector();

            // MatchAllDocsQuery is for "browsing" (counts facets
            // for all non-deleted docs in the index); normally
            // you'd use a "normal" query and one of the
            // Facets.search utility methods:
            searcher.Search(new MatchAllDocsQuery(), c);

            TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, new FacetsConfig(), c, new Int32FieldSource("num"));

            // Retrieve & verify results:
            Assert.AreEqual("dim=Author path=[] value=145.0 childCount=4\n  Lisa (50.0)\n  Frank (45.0)\n  Susan (40.0)\n  Bob (10.0)\n", facets.GetTopChildren(10, "Author").ToString());

            taxoReader.Dispose();
            searcher.IndexReader.Dispose();
            dir.Dispose();
            taxoDir.Dispose();
        }
Beispiel #15
0
        public virtual void TestSimple()
        {
            Random random = Random;

            DocValuesType[] dvTypes = new DocValuesType[] {
                DocValuesType.NUMERIC,
                DocValuesType.BINARY,
                DocValuesType.SORTED,
            };
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
                random,
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy()));
            bool          canUseDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
            DocValuesType dvType   = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : DocValuesType.NONE;

            Document doc = new Document();

            AddField(doc, groupField, "1", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "random text", Field.Store.NO));
            doc.Add(new StringField("id", "1", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            AddField(doc, groupField, "1", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "some more random text blob", Field.Store.NO));
            doc.Add(new StringField("id", "2", Field.Store.NO));
            w.AddDocument(doc);

            // 2
            doc = new Document();
            AddField(doc, groupField, "1", dvType);
            AddField(doc, countField, "2", dvType);
            doc.Add(new TextField("content", "some more random textual data", Field.Store.NO));
            doc.Add(new StringField("id", "3", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit(); // To ensure a second segment

            // 3
            doc = new Document();
            AddField(doc, groupField, "2", dvType);
            doc.Add(new TextField("content", "some random text", Field.Store.NO));
            doc.Add(new StringField("id", "4", Field.Store.NO));
            w.AddDocument(doc);

            // 4
            doc = new Document();
            AddField(doc, groupField, "3", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "some more random text", Field.Store.NO));
            doc.Add(new StringField("id", "5", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            AddField(doc, groupField, "3", dvType);
            AddField(doc, countField, "1", dvType);
            doc.Add(new TextField("content", "random blob", Field.Store.NO));
            doc.Add(new StringField("id", "6", Field.Store.NO));
            w.AddDocument(doc);

            // 6 -- no author field
            doc = new Document();
            doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
            AddField(doc, countField, "1", dvType);
            doc.Add(new StringField("id", "6", Field.Store.NO));
            w.AddDocument(doc);

            IndexSearcher indexSearcher = NewSearcher(w.GetReader());

            w.Dispose();

            var cmp = Comparer <AbstractDistinctValuesCollector.IGroupCount <IComparable> > .Create((groupCount1, groupCount2) => {
                if (groupCount1.GroupValue == null)
                {
                    if (groupCount2.GroupValue == null)
                    {
                        return(0);
                    }
                    return(-1);
                }
                else if (groupCount2.GroupValue == null)
                {
                    return(1);
                }
                else
                {
                    return(groupCount1.GroupValue.CompareTo(groupCount2.GroupValue));
                }
            });

            // === Search for content:random
            IAbstractFirstPassGroupingCollector <IComparable> firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10);

            indexSearcher.Search(new TermQuery(new Term("content", "random")), firstCollector);
            IAbstractDistinctValuesCollector <AbstractDistinctValuesCollector.IGroupCount <IComparable> > distinctValuesCollector
                = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType);

            indexSearcher.Search(new TermQuery(new Term("content", "random")), distinctValuesCollector);

            //var gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>;
            // LUCENENET TODO: Try to work out how to do this without an O(n) operation
            var gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups);

            gcs.Sort(cmp);
            assertEquals(4, gcs.Count);

            CompareNull(gcs[0].GroupValue);
            List <IComparable> countValues = new List <IComparable>(gcs[0].UniqueValues);

            assertEquals(1, countValues.size());
            Compare("1", countValues[0]);

            Compare("1", gcs[1].GroupValue);
            countValues = new List <IComparable>(gcs[1].UniqueValues);
            countValues.Sort(nullComparer);
            assertEquals(2, countValues.size());
            Compare("1", countValues[0]);
            Compare("2", countValues[1]);

            Compare("2", gcs[2].GroupValue);
            countValues = new List <IComparable>(gcs[2].UniqueValues);
            assertEquals(1, countValues.size());
            CompareNull(countValues[0]);

            Compare("3", gcs[3].GroupValue);
            countValues = new List <IComparable>(gcs[3].UniqueValues);
            assertEquals(1, countValues.size());
            Compare("1", countValues[0]);

            // === Search for content:some
            firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
            indexSearcher.Search(new TermQuery(new Term("content", "some")), firstCollector);
            distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType);
            indexSearcher.Search(new TermQuery(new Term("content", "some")), distinctValuesCollector);

            // LUCENENET TODO: Try to work out how to do this without an O(n) operation
            //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>;
            gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups);
            gcs.Sort(cmp);
            assertEquals(3, gcs.Count);

            Compare("1", gcs[0].GroupValue);
            countValues = new List <IComparable>(gcs[0].UniqueValues);
            assertEquals(2, countValues.size());
            countValues.Sort(nullComparer);
            Compare("1", countValues[0]);
            Compare("2", countValues[1]);

            Compare("2", gcs[1].GroupValue);
            countValues = new List <IComparable>(gcs[1].UniqueValues);
            assertEquals(1, countValues.size());
            CompareNull(countValues[0]);

            Compare("3", gcs[2].GroupValue);
            countValues = new List <IComparable>(gcs[2].UniqueValues);
            assertEquals(1, countValues.size());
            Compare("1", countValues[0]);

            // === Search for content:blob
            firstCollector = CreateRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
            indexSearcher.Search(new TermQuery(new Term("content", "blob")), firstCollector);
            distinctValuesCollector = CreateDistinctCountCollector(firstCollector, groupField, countField, dvType);
            indexSearcher.Search(new TermQuery(new Term("content", "blob")), distinctValuesCollector);

            // LUCENENET TODO: Try to work out how to do this without an O(n) operation
            //gcs = distinctValuesCollector.Groups as List<IGroupCount<IComparable>>;
            gcs = new List <AbstractDistinctValuesCollector.IGroupCount <IComparable> >(distinctValuesCollector.Groups);
            gcs.Sort(cmp);
            assertEquals(2, gcs.Count);

            Compare("1", gcs[0].GroupValue);
            countValues = new List <IComparable>(gcs[0].UniqueValues);
            // B/c the only one document matched with blob inside the author 1 group
            assertEquals(1, countValues.Count);
            Compare("1", countValues[0]);

            Compare("3", gcs[1].GroupValue);
            countValues = new List <IComparable>(gcs[1].UniqueValues);
            assertEquals(1, countValues.Count);
            Compare("1", countValues[0]);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
Beispiel #16
0
        public void TestOptions()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir,
                                                             new MockAnalyzer(Random, MockTokenizer.SIMPLE, true));

            Document doc = new Document();

            doc.Add(NewTextField("text", "foobar", Field.Store.NO));
            writer.AddDocument(doc);
            doc.Add(NewTextField("text", "foobar", Field.Store.NO));
            writer.AddDocument(doc);
            doc.Add(NewTextField("text", "foobaz", Field.Store.NO));
            writer.AddDocument(doc);
            doc.Add(NewTextField("text", "fobar", Field.Store.NO));
            writer.AddDocument(doc);

            IndexReader ir = writer.GetReader();

            DirectSpellChecker spellChecker = new DirectSpellChecker();

            spellChecker.MaxQueryFrequency = (0F);
            SuggestWord[] similar = spellChecker.SuggestSimilar(new Term("text",
                                                                         "fobar"), 1, ir, SuggestMode.SUGGEST_MORE_POPULAR);
            assertEquals(0, similar.Length);

            spellChecker = new DirectSpellChecker(); // reset defaults
            spellChecker.MinQueryLength = (5);
            similar = spellChecker.SuggestSimilar(new Term("text", "foba"), 1, ir,
                                                  SuggestMode.SUGGEST_MORE_POPULAR);
            assertEquals(0, similar.Length);

            spellChecker          = new DirectSpellChecker(); // reset defaults
            spellChecker.MaxEdits = (1);
            similar = spellChecker.SuggestSimilar(new Term("text", "foobazzz"), 1, ir,
                                                  SuggestMode.SUGGEST_MORE_POPULAR);
            assertEquals(0, similar.Length);

            spellChecker          = new DirectSpellChecker(); // reset defaults
            spellChecker.Accuracy = (0.9F);
            similar = spellChecker.SuggestSimilar(new Term("text", "foobazzz"), 1, ir,
                                                  SuggestMode.SUGGEST_MORE_POPULAR);
            assertEquals(0, similar.Length);

            spellChecker           = new DirectSpellChecker(); // reset defaults
            spellChecker.MinPrefix = (0);
            similar = spellChecker.SuggestSimilar(new Term("text", "roobaz"), 1, ir,
                                                  SuggestMode.SUGGEST_MORE_POPULAR);
            assertEquals(1, similar.Length);
            similar = spellChecker.SuggestSimilar(new Term("text", "roobaz"), 1, ir,
                                                  SuggestMode.SUGGEST_MORE_POPULAR);

            spellChecker           = new DirectSpellChecker(); // reset defaults
            spellChecker.MinPrefix = (1);
            similar = spellChecker.SuggestSimilar(new Term("text", "roobaz"), 1, ir,
                                                  SuggestMode.SUGGEST_MORE_POPULAR);
            assertEquals(0, similar.Length);

            spellChecker          = new DirectSpellChecker(); // reset defaults
            spellChecker.MaxEdits = (2);
            similar = spellChecker.SuggestSimilar(new Term("text", "fobar"), 2, ir,
                                                  SuggestMode.SUGGEST_ALWAYS);
            assertEquals(2, similar.Length);

            ir.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
Beispiel #17
0
        private IndexContext CreateIndexContext()
        {
            Random random = Random;

            DocValuesType[] dvTypes = new DocValuesType[] {
                DocValuesType.BINARY,
                DocValuesType.SORTED
            };

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
                random,
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy())
                );

            bool          canUseDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
            DocValuesType dvType   = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : DocValuesType.NONE;

            int numDocs = 86 + random.nextInt(1087) * RANDOM_MULTIPLIER;

            string[] groupValues = new string[numDocs / 5];
            string[] countValues = new string[numDocs / 10];
            for (int i = 0; i < groupValues.Length; i++)
            {
                groupValues[i] = GenerateRandomNonEmptyString();
            }
            for (int i = 0; i < countValues.Length; i++)
            {
                countValues[i] = GenerateRandomNonEmptyString();
            }

            List <string> contentStrings = new List <string>();
            IDictionary <string, IDictionary <string, ISet <string> > > searchTermToGroupCounts = new JCG.Dictionary <string, IDictionary <string, ISet <string> > >();

            for (int i = 1; i <= numDocs; i++)
            {
                string groupValue = random.nextInt(23) == 14 ? null : groupValues[random.nextInt(groupValues.Length)];
                string countValue = random.nextInt(21) == 13 ? null : countValues[random.nextInt(countValues.Length)];
                string content    = "random" + random.nextInt(numDocs / 20);
                IDictionary <string, ISet <string> > groupToCounts;
                if (!searchTermToGroupCounts.TryGetValue(content, out groupToCounts))
                {
                    // Groups sort always DOCID asc...
                    searchTermToGroupCounts.Add(content, groupToCounts = new JCG.LinkedDictionary <string, ISet <string> >());
                    contentStrings.Add(content);
                }

                ISet <string> countsVals;
                if (!groupToCounts.TryGetValue(groupValue, out countsVals))
                {
                    groupToCounts.Add(groupValue, countsVals = new JCG.HashSet <string>());
                }
                countsVals.Add(countValue);

                Document doc = new Document();
                doc.Add(new StringField("id", string.Format(CultureInfo.InvariantCulture, "{0:D9}", i), Field.Store.YES));
                if (groupValue != null)
                {
                    AddField(doc, groupField, groupValue, dvType);
                }
                if (countValue != null)
                {
                    AddField(doc, countField, countValue, dvType);
                }
                doc.Add(new TextField("content", content, Field.Store.YES));
                w.AddDocument(doc);
            }

            DirectoryReader reader = w.GetReader();

            if (VERBOSE)
            {
                for (int docID = 0; docID < reader.MaxDoc; docID++)
                {
                    Document doc = reader.Document(docID);
                    Console.WriteLine("docID=" + docID + " id=" + doc.Get("id") + " content=" + doc.Get("content") + " author=" + doc.Get("author") + " publisher=" + doc.Get("publisher"));
                }
            }

            w.Dispose();
            return(new IndexContext(dir, reader, dvType, searchTermToGroupCounts, contentStrings.ToArray(/*new String[contentStrings.size()]*/)));
        }
Beispiel #18
0
        public void TestSimpleExamples()
        {
            DirectSpellChecker spellChecker = new DirectSpellChecker();

            spellChecker.MinQueryLength = (0);
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir,
                                                             new MockAnalyzer(Random, MockTokenizer.SIMPLE, true));

            for (int i = 0; i < 20; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("numbers", English.Int32ToEnglish(i), Field.Store.NO));
                writer.AddDocument(doc);
            }

            IndexReader ir = writer.GetReader();

            SuggestWord[] similar = spellChecker.SuggestSimilar(new Term("numbers",
                                                                         "fvie"), 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertTrue(similar.Length > 0);
            assertEquals("five", similar[0].String);

            similar = spellChecker.SuggestSimilar(new Term("numbers", "five"), 2, ir,
                                                  SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            if (similar.Length > 0)
            {
                assertFalse(similar[0].String.Equals("five", StringComparison.Ordinal)); // don't suggest a word for itself
            }

            similar = spellChecker.SuggestSimilar(new Term("numbers", "fvie"), 2, ir,
                                                  SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertTrue(similar.Length > 0);
            assertEquals("five", similar[0].String);

            similar = spellChecker.SuggestSimilar(new Term("numbers", "fiv"), 2, ir,
                                                  SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertTrue(similar.Length > 0);
            assertEquals("five", similar[0].String);

            similar = spellChecker.SuggestSimilar(new Term("numbers", "fives"), 2, ir,
                                                  SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertTrue(similar.Length > 0);
            assertEquals("five", similar[0].String);

            assertTrue(similar.Length > 0);
            similar = spellChecker.SuggestSimilar(new Term("numbers", "fie"), 2, ir,
                                                  SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertEquals("five", similar[0].String);

            // add some more documents
            for (int i = 1000; i < 1100; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("numbers", English.Int32ToEnglish(i), Field.Store.NO));
                writer.AddDocument(doc);
            }

            ir.Dispose();
            ir = writer.GetReader();

            // look ma, no spellcheck index rebuild
            similar = spellChecker.SuggestSimilar(new Term("numbers", "tousand"), 10,
                                                  ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertTrue(similar.Length > 0);
            assertEquals("thousand", similar[0].String);

            ir.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
        public override void SetUp()
        {
            base.SetUp();

            // LUCENENET specific: Moved this logic here to ensure that it is executed
            // after the class is setup - a field is way to early to execute this.
            bool supportsDocValues = Codec.Default.Name.Equals("Lucene3x", StringComparison.Ordinal) == false;

            AllSortFields = new List <SortField>(Arrays.AsList(new SortField[] {
#pragma warning disable 612,618
                new SortField("byte", SortFieldType.BYTE, false),
                new SortField("short", SortFieldType.INT16, false),
#pragma warning restore 612,618
                new SortField("int", SortFieldType.INT32, false),
                new SortField("long", SortFieldType.INT64, false),
                new SortField("float", SortFieldType.SINGLE, false),
                new SortField("double", SortFieldType.DOUBLE, false),
                new SortField("bytes", SortFieldType.STRING, false),
                new SortField("bytesval", SortFieldType.STRING_VAL, false),
#pragma warning disable 612,618
                new SortField("byte", SortFieldType.BYTE, true),
                new SortField("short", SortFieldType.INT16, true),
#pragma warning restore 612,618
                new SortField("int", SortFieldType.INT32, true),
                new SortField("long", SortFieldType.INT64, true),
                new SortField("float", SortFieldType.SINGLE, true),
                new SortField("double", SortFieldType.DOUBLE, true),
                new SortField("bytes", SortFieldType.STRING, true),
                new SortField("bytesval", SortFieldType.STRING_VAL, true),
                SortField.FIELD_SCORE,
                SortField.FIELD_DOC
            }));

            if (supportsDocValues)
            {
                AllSortFields.AddRange(Arrays.AsList(new SortField[] {
                    new SortField("intdocvalues", SortFieldType.INT32, false),
                    new SortField("floatdocvalues", SortFieldType.SINGLE, false),
                    new SortField("sortedbytesdocvalues", SortFieldType.STRING, false),
                    new SortField("sortedbytesdocvaluesval", SortFieldType.STRING_VAL, false),
                    new SortField("straightbytesdocvalues", SortFieldType.STRING_VAL, false),
                    new SortField("intdocvalues", SortFieldType.INT32, true),
                    new SortField("floatdocvalues", SortFieldType.SINGLE, true),
                    new SortField("sortedbytesdocvalues", SortFieldType.STRING, true),
                    new SortField("sortedbytesdocvaluesval", SortFieldType.STRING_VAL, true),
                    new SortField("straightbytesdocvalues", SortFieldType.STRING_VAL, true)
                }));
            }

            // Also test missing first / last for the "string" sorts:
            foreach (string field in new string[] { "bytes", "sortedbytesdocvalues" })
            {
                for (int rev = 0; rev < 2; rev++)
                {
                    bool      reversed = rev == 0;
                    SortField sf       = new SortField(field, SortFieldType.STRING, reversed);
                    sf.MissingValue = SortField.STRING_FIRST;
                    AllSortFields.Add(sf);

                    sf = new SortField(field, SortFieldType.STRING, reversed);
                    sf.MissingValue = SortField.STRING_LAST;
                    AllSortFields.Add(sf);
                }
            }

            int limit = AllSortFields.Count;
            for (int i = 0; i < limit; i++)
            {
                SortField sf = AllSortFields[i];
                if (sf.Type == SortFieldType.INT32)
                {
                    SortField sf2 = new SortField(sf.Field, SortFieldType.INT32, sf.IsReverse);
                    sf2.MissingValue = Random.Next();
                    AllSortFields.Add(sf2);
                }
                else if (sf.Type == SortFieldType.INT64)
                {
                    SortField sf2 = new SortField(sf.Field, SortFieldType.INT64, sf.IsReverse);
                    sf2.MissingValue = Random.NextInt64();
                    AllSortFields.Add(sf2);
                }
                else if (sf.Type == SortFieldType.SINGLE)
                {
                    SortField sf2 = new SortField(sf.Field, SortFieldType.SINGLE, sf.IsReverse);
                    sf2.MissingValue = (float)Random.NextDouble();
                    AllSortFields.Add(sf2);
                }
                else if (sf.Type == SortFieldType.DOUBLE)
                {
                    SortField sf2 = new SortField(sf.Field, SortFieldType.DOUBLE, sf.IsReverse);
                    sf2.MissingValue = Random.NextDouble();
                    AllSortFields.Add(sf2);
                }
            }

            Dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random, Dir, Similarity, TimeZone);
            int numDocs          = AtLeast(200);
            for (int i = 0; i < numDocs; i++)
            {
                IList <Field> fields = new List <Field>();
                fields.Add(NewTextField("english", English.Int32ToEnglish(i), Field.Store.NO));
                fields.Add(NewTextField("oddeven", (i % 2 == 0) ? "even" : "odd", Field.Store.NO));
                fields.Add(NewStringField("byte", "" + ((sbyte)Random.Next()), Field.Store.NO));
                fields.Add(NewStringField("short", "" + ((short)Random.Next()), Field.Store.NO));
                fields.Add(new Int32Field("int", Random.Next(), Field.Store.NO));
                fields.Add(new Int64Field("long", Random.NextInt64(), Field.Store.NO));

                fields.Add(new SingleField("float", (float)Random.NextDouble(), Field.Store.NO));
                fields.Add(new DoubleField("double", Random.NextDouble(), Field.Store.NO));
                fields.Add(NewStringField("bytes", TestUtil.RandomRealisticUnicodeString(Random), Field.Store.NO));
                fields.Add(NewStringField("bytesval", TestUtil.RandomRealisticUnicodeString(Random), Field.Store.NO));
                fields.Add(new DoubleField("double", Random.NextDouble(), Field.Store.NO));

                if (supportsDocValues)
                {
                    fields.Add(new NumericDocValuesField("intdocvalues", Random.Next()));
                    fields.Add(new SingleDocValuesField("floatdocvalues", (float)Random.NextDouble()));
                    fields.Add(new SortedDocValuesField("sortedbytesdocvalues", new BytesRef(TestUtil.RandomRealisticUnicodeString(Random))));
                    fields.Add(new SortedDocValuesField("sortedbytesdocvaluesval", new BytesRef(TestUtil.RandomRealisticUnicodeString(Random))));
                    fields.Add(new BinaryDocValuesField("straightbytesdocvalues", new BytesRef(TestUtil.RandomRealisticUnicodeString(Random))));
                }
                Document document = new Document();
                document.Add(new StoredField("id", "" + i));
                if (isVerbose)
                {
                    Console.WriteLine("  add doc id=" + i);
                }
                foreach (Field field in fields)
                {
                    // So we are sometimes missing that field:
                    if (Random.Next(5) != 4)
                    {
                        document.Add(field);
                        if (isVerbose)
                        {
                            Console.WriteLine("    " + field);
                        }
                    }
                }

                iw.AddDocument(document);

                if (Random.Next(50) == 17)
                {
                    iw.Commit();
                }
            }
            Reader = iw.GetReader();
            iw.Dispose();
            Searcher = NewSearcher(Reader);
            if (isVerbose)
            {
                Console.WriteLine("  searcher=" + Searcher);
            }
        }
Beispiel #20
0
        internal virtual void TestSort(bool useFrom, bool VERBOSE)
        {
            IndexReader reader = null;
            Directory   dir    = null;

            if (!VERBOSE)
            {
                Console.WriteLine("Verbosity disabled. Enable manually if needed.");
            }

            int numDocs = VERBOSE ? AtLeast(50) : AtLeast(1000);

            //final int numDocs = AtLeast(50);

            string[] tokens = new string[] { "a", "b", "c", "d", "e" };

            if (VERBOSE)
            {
                Console.WriteLine("TEST: make index");
            }

            {
                dir = NewDirectory();
                RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                    this,
#endif
                    Random, dir);
                // w.setDoRandomForceMerge(false);

                // w.w.getConfig().SetMaxBufferedDocs(AtLeast(100));

                string[] content = new string[AtLeast(20)];

                for (int contentIDX = 0; contentIDX < content.Length; contentIDX++)
                {
                    StringBuilder sb        = new StringBuilder();
                    int           numTokens = TestUtil.NextInt32(Random, 1, 10);
                    for (int tokenIDX = 0; tokenIDX < numTokens; tokenIDX++)
                    {
                        sb.Append(tokens[Random.Next(tokens.Length)]).Append(' ');
                    }
                    content[contentIDX] = sb.ToString();
                }

                for (int docIDX = 0; docIDX < numDocs; docIDX++)
                {
                    Document doc = new Document();
                    doc.Add(NewStringField("string", TestUtil.RandomRealisticUnicodeString(Random), Field.Store.NO));
                    doc.Add(NewTextField("text", content[Random.Next(content.Length)], Field.Store.NO));
                    doc.Add(new SingleField("float", (float)Random.NextDouble(), Field.Store.NO));
                    int intValue;
                    if (Random.Next(100) == 17)
                    {
                        intValue = int.MinValue;
                    }
                    else if (Random.Next(100) == 17)
                    {
                        intValue = int.MaxValue;
                    }
                    else
                    {
                        intValue = Random.Next();
                    }
                    doc.Add(new Int32Field("int", intValue, Field.Store.NO));
                    if (VERBOSE)
                    {
                        Console.WriteLine("  doc=" + doc);
                    }
                    w.AddDocument(doc);
                }

                reader = w.GetReader();
                w.Dispose();
            }

            // NOTE: sometimes reader has just one segment, which is
            // important to test
            IndexSearcher      searcher = NewSearcher(reader);
            IndexReaderContext ctx      = searcher.TopReaderContext;

            ShardSearcher[] subSearchers;
            int[]           docStarts;

            if (ctx is AtomicReaderContext)
            {
                subSearchers    = new ShardSearcher[1];
                docStarts       = new int[1];
                subSearchers[0] = new ShardSearcher((AtomicReaderContext)ctx, ctx);
                docStarts[0]    = 0;
            }
            else
            {
                CompositeReaderContext compCTX = (CompositeReaderContext)ctx;
                int size = compCTX.Leaves.Count;
                subSearchers = new ShardSearcher[size];
                docStarts    = new int[size];
                int docBase = 0;
                for (int searcherIDX = 0; searcherIDX < subSearchers.Length; searcherIDX++)
                {
                    AtomicReaderContext leave = compCTX.Leaves[searcherIDX];
                    subSearchers[searcherIDX] = new ShardSearcher(leave, compCTX);
                    docStarts[searcherIDX]    = docBase;
                    docBase += leave.Reader.MaxDoc;
                }
            }

            IList <SortField> sortFields = new List <SortField>();

            sortFields.Add(new SortField("string", SortFieldType.STRING, true));
            sortFields.Add(new SortField("string", SortFieldType.STRING, false));
            sortFields.Add(new SortField("int", SortFieldType.INT32, true));
            sortFields.Add(new SortField("int", SortFieldType.INT32, false));
            sortFields.Add(new SortField("float", SortFieldType.SINGLE, true));
            sortFields.Add(new SortField("float", SortFieldType.SINGLE, false));
            sortFields.Add(new SortField(null, SortFieldType.SCORE, true));
            sortFields.Add(new SortField(null, SortFieldType.SCORE, false));
            sortFields.Add(new SortField(null, SortFieldType.DOC, true));
            sortFields.Add(new SortField(null, SortFieldType.DOC, false));

            for (int iter = 0; iter < 1000 * RANDOM_MULTIPLIER; iter++)
            {
                // TODO: custom FieldComp...
                Query query = new TermQuery(new Term("text", tokens[Random.Next(tokens.Length)]));

                Sort sort;
                if (Random.Next(10) == 4)
                {
                    // Sort by score
                    sort = null;
                }
                else
                {
                    SortField[] randomSortFields = new SortField[TestUtil.NextInt32(Random, 1, 3)];
                    for (int sortIDX = 0; sortIDX < randomSortFields.Length; sortIDX++)
                    {
                        randomSortFields[sortIDX] = sortFields[Random.Next(sortFields.Count)];
                    }
                    sort = new Sort(randomSortFields);
                }

                int numHits = TestUtil.NextInt32(Random, 1, numDocs + 5);
                //final int numHits = 5;

                if (VERBOSE)
                {
                    Console.WriteLine("TEST: search query=" + query + " sort=" + sort + " numHits=" + numHits);
                }

                int from = -1;
                int size = -1;
                // First search on whole index:
                TopDocs topHits;
                if (sort == null)
                {
                    if (useFrom)
                    {
                        TopScoreDocCollector c = TopScoreDocCollector.Create(numHits, Random.NextBoolean());
                        searcher.Search(query, c);
                        from = TestUtil.NextInt32(Random, 0, numHits - 1);
                        size = numHits - from;
                        TopDocs tempTopHits = c.GetTopDocs();
                        if (from < tempTopHits.ScoreDocs.Length)
                        {
                            // Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount
                            // than TopDocs#merge currently has
                            ScoreDoc[] newScoreDocs = new ScoreDoc[Math.Min(size, tempTopHits.ScoreDocs.Length - from)];
                            Array.Copy(tempTopHits.ScoreDocs, from, newScoreDocs, 0, newScoreDocs.Length);
                            tempTopHits.ScoreDocs = newScoreDocs;
                            topHits = tempTopHits;
                        }
                        else
                        {
                            topHits = new TopDocs(tempTopHits.TotalHits, new ScoreDoc[0], tempTopHits.MaxScore);
                        }
                    }
                    else
                    {
                        topHits = searcher.Search(query, numHits);
                    }
                }
                else
                {
                    TopFieldCollector c = TopFieldCollector.Create(sort, numHits, true, true, true, Random.NextBoolean());
                    searcher.Search(query, c);
                    if (useFrom)
                    {
                        from = TestUtil.NextInt32(Random, 0, numHits - 1);
                        size = numHits - from;
                        TopDocs tempTopHits = c.GetTopDocs();
                        if (from < tempTopHits.ScoreDocs.Length)
                        {
                            // Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount
                            // than TopDocs#merge currently has
                            ScoreDoc[] newScoreDocs = new ScoreDoc[Math.Min(size, tempTopHits.ScoreDocs.Length - from)];
                            Array.Copy(tempTopHits.ScoreDocs, from, newScoreDocs, 0, newScoreDocs.Length);
                            tempTopHits.ScoreDocs = newScoreDocs;
                            topHits = tempTopHits;
                        }
                        else
                        {
                            topHits = new TopDocs(tempTopHits.TotalHits, new ScoreDoc[0], tempTopHits.MaxScore);
                        }
                    }
                    else
                    {
                        topHits = c.GetTopDocs(0, numHits);
                    }
                }

                if (VERBOSE)
                {
                    if (useFrom)
                    {
                        Console.WriteLine("from=" + from + " size=" + size);
                    }
                    Console.WriteLine("  top search: " + topHits.TotalHits + " totalHits; hits=" + (topHits.ScoreDocs == null ? "null" : topHits.ScoreDocs.Length + " maxScore=" + topHits.MaxScore));
                    if (topHits.ScoreDocs != null)
                    {
                        for (int hitIDX = 0; hitIDX < topHits.ScoreDocs.Length; hitIDX++)
                        {
                            ScoreDoc sd = topHits.ScoreDocs[hitIDX];
                            Console.WriteLine("    doc=" + sd.Doc + " score=" + sd.Score);
                        }
                    }
                }

                // ... then all shards:
                Weight w = searcher.CreateNormalizedWeight(query);

                TopDocs[] shardHits = new TopDocs[subSearchers.Length];
                for (int shardIDX = 0; shardIDX < subSearchers.Length; shardIDX++)
                {
                    TopDocs       subHits;
                    ShardSearcher subSearcher = subSearchers[shardIDX];
                    if (sort == null)
                    {
                        subHits = subSearcher.Search(w, numHits);
                    }
                    else
                    {
                        TopFieldCollector c = TopFieldCollector.Create(sort, numHits, true, true, true, Random.NextBoolean());
                        subSearcher.Search(w, c);
                        subHits = c.GetTopDocs(0, numHits);
                    }

                    shardHits[shardIDX] = subHits;
                    if (VERBOSE)
                    {
                        Console.WriteLine("  shard=" + shardIDX + " " + subHits.TotalHits + " totalHits hits=" + (subHits.ScoreDocs == null ? "null" : subHits.ScoreDocs.Length.ToString()));
                        if (subHits.ScoreDocs != null)
                        {
                            foreach (ScoreDoc sd in subHits.ScoreDocs)
                            {
                                Console.WriteLine("    doc=" + sd.Doc + " score=" + sd.Score);
                            }
                        }
                    }
                }

                // Merge:
                TopDocs mergedHits;
                if (useFrom)
                {
                    mergedHits = TopDocs.Merge(sort, from, size, shardHits);
                }
                else
                {
                    mergedHits = TopDocs.Merge(sort, numHits, shardHits);
                }

                if (mergedHits.ScoreDocs != null)
                {
                    // Make sure the returned shards are correct:
                    for (int hitIDX = 0; hitIDX < mergedHits.ScoreDocs.Length; hitIDX++)
                    {
                        ScoreDoc sd = mergedHits.ScoreDocs[hitIDX];
                        Assert.AreEqual(ReaderUtil.SubIndex(sd.Doc, docStarts), sd.ShardIndex, "doc=" + sd.Doc + " wrong shard");
                    }
                }

                TestUtil.AssertEquals(topHits, mergedHits);
            }
            reader.Dispose();
            dir.Dispose();
        }
Beispiel #21
0
        public void TestSimple()
        {
            string    groupField = "hotel";
            FieldType customType = new FieldType();

            customType.IsStored = true;

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
                Random,
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy()));
            bool canUseDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
            bool useDv    = canUseDV && Random.nextBoolean();

            // 0
            Document doc = new Document();

            AddField(doc, groupField, "a", useDv);
            AddField(doc, "airport", "ams", useDv);
            AddField(doc, "duration", "5", useDv);
            w.AddDocument(doc);

            // 1
            doc = new Document();
            AddField(doc, groupField, "a", useDv);
            AddField(doc, "airport", "dus", useDv);
            AddField(doc, "duration", "10", useDv);
            w.AddDocument(doc);

            // 2
            doc = new Document();
            AddField(doc, groupField, "b", useDv);
            AddField(doc, "airport", "ams", useDv);
            AddField(doc, "duration", "10", useDv);
            w.AddDocument(doc);
            w.Commit(); // To ensure a second segment

            // 3
            doc = new Document();
            AddField(doc, groupField, "b", useDv);
            AddField(doc, "airport", "ams", useDv);
            AddField(doc, "duration", "5", useDv);
            w.AddDocument(doc);

            // 4
            doc = new Document();
            AddField(doc, groupField, "b", useDv);
            AddField(doc, "airport", "ams", useDv);
            AddField(doc, "duration", "5", useDv);
            w.AddDocument(doc);

            IndexSearcher indexSearcher = NewSearcher(w.GetReader());

            IList <TermGroupFacetCollector.FacetEntry> entries       = null;
            AbstractGroupFacetCollector groupedAirportFacetCollector = null;

            TermGroupFacetCollector.GroupedFacetResult airportResult = null;

            foreach (int limit in new int[] { 2, 10, 100, int.MaxValue })
            {
                // any of these limits is plenty for the data we have

                groupedAirportFacetCollector = CreateRandomCollector
                                                   (useDv ? "hotel_dv" : "hotel",
                                                   useDv ? "airport_dv" : "airport", null, false);
                indexSearcher.Search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
                int maxOffset = 5;
                airportResult = groupedAirportFacetCollector.MergeSegmentResults
                                    (int.MaxValue == limit ? limit : maxOffset + limit, 0, false);


                assertEquals(3, airportResult.TotalCount);
                assertEquals(0, airportResult.TotalMissingCount);

                entries = airportResult.GetFacetEntries(maxOffset, limit);
                assertEquals(0, entries.size());

                entries = airportResult.GetFacetEntries(0, limit);
                assertEquals(2, entries.size());
                assertEquals("ams", entries[0].Value.Utf8ToString());
                assertEquals(2, entries[0].Count);
                assertEquals("dus", entries[1].Value.Utf8ToString());
                assertEquals(1, entries[1].Count);

                entries = airportResult.GetFacetEntries(1, limit);
                assertEquals(1, entries.size());
                assertEquals("dus", entries[0].Value.Utf8ToString());
                assertEquals(1, entries[0].Count);
            }

            AbstractGroupFacetCollector groupedDurationFacetCollector = CreateRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", null, false);

            indexSearcher.Search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
            TermGroupFacetCollector.GroupedFacetResult durationResult = groupedDurationFacetCollector.MergeSegmentResults(10, 0, false);
            assertEquals(4, durationResult.TotalCount);
            assertEquals(0, durationResult.TotalMissingCount);

            entries = durationResult.GetFacetEntries(0, 10);
            assertEquals(2, entries.size());
            assertEquals("10", entries[0].Value.Utf8ToString());
            assertEquals(2, entries[0].Count);
            assertEquals("5", entries[1].Value.Utf8ToString());
            assertEquals(2, entries[1].Count);

            // 5
            doc = new Document();
            AddField(doc, groupField, "b", useDv);
            // missing airport
            if (useDv)
            {
                AddField(doc, "airport", "", useDv);
            }
            AddField(doc, "duration", "5", useDv);
            w.AddDocument(doc);

            // 6
            doc = new Document();
            AddField(doc, groupField, "b", useDv);
            AddField(doc, "airport", "bru", useDv);
            AddField(doc, "duration", "10", useDv);
            w.AddDocument(doc);

            // 7
            doc = new Document();
            AddField(doc, groupField, "b", useDv);
            AddField(doc, "airport", "bru", useDv);
            AddField(doc, "duration", "15", useDv);
            w.AddDocument(doc);

            // 8
            doc = new Document();
            AddField(doc, groupField, "a", useDv);
            AddField(doc, "airport", "bru", useDv);
            AddField(doc, "duration", "10", useDv);
            w.AddDocument(doc);

            indexSearcher.IndexReader.Dispose();
            indexSearcher = NewSearcher(w.GetReader());
            groupedAirportFacetCollector = CreateRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "airport_dv" : "airport", null, !useDv);
            indexSearcher.Search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
            airportResult = groupedAirportFacetCollector.MergeSegmentResults(3, 0, true);
            entries       = airportResult.GetFacetEntries(1, 2);
            assertEquals(2, entries.size());
            if (useDv)
            {
                assertEquals(6, airportResult.TotalCount);
                assertEquals(0, airportResult.TotalMissingCount);
                assertEquals("bru", entries[0].Value.Utf8ToString());
                assertEquals(2, entries[0].Count);
                assertEquals("", entries[1].Value.Utf8ToString());
                assertEquals(1, entries[1].Count);
            }
            else
            {
                assertEquals(5, airportResult.TotalCount);
                assertEquals(1, airportResult.TotalMissingCount);
                assertEquals("bru", entries[0].Value.Utf8ToString());
                assertEquals(2, entries[0].Count);
                assertEquals("dus", entries[1].Value.Utf8ToString());
                assertEquals(1, entries[1].Count);
            }

            groupedDurationFacetCollector = CreateRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", null, false);
            indexSearcher.Search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
            durationResult = groupedDurationFacetCollector.MergeSegmentResults(10, 2, true);
            assertEquals(5, durationResult.TotalCount);
            assertEquals(0, durationResult.TotalMissingCount);

            entries = durationResult.GetFacetEntries(1, 1);
            assertEquals(1, entries.size());
            assertEquals("5", entries[0].Value.Utf8ToString());
            assertEquals(2, entries[0].Count);

            // 9
            doc = new Document();
            AddField(doc, groupField, "c", useDv);
            AddField(doc, "airport", "bru", useDv);
            AddField(doc, "duration", "15", useDv);
            w.AddDocument(doc);

            // 10
            doc = new Document();
            AddField(doc, groupField, "c", useDv);
            AddField(doc, "airport", "dus", useDv);
            AddField(doc, "duration", "10", useDv);
            w.AddDocument(doc);

            indexSearcher.IndexReader.Dispose();
            indexSearcher = NewSearcher(w.GetReader());
            groupedAirportFacetCollector = CreateRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "airport_dv" : "airport", null, false);
            indexSearcher.Search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
            airportResult = groupedAirportFacetCollector.MergeSegmentResults(10, 0, false);
            entries       = airportResult.GetFacetEntries(0, 10);
            if (useDv)
            {
                assertEquals(8, airportResult.TotalCount);
                assertEquals(0, airportResult.TotalMissingCount);
                assertEquals(4, entries.size());
                assertEquals("", entries[0].Value.Utf8ToString());
                assertEquals(1, entries[0].Count);
                assertEquals("ams", entries[1].Value.Utf8ToString());
                assertEquals(2, entries[1].Count);
                assertEquals("bru", entries[2].Value.Utf8ToString());
                assertEquals(3, entries[2].Count);
                assertEquals("dus", entries[3].Value.Utf8ToString());
                assertEquals(2, entries[3].Count);
            }
            else
            {
                assertEquals(7, airportResult.TotalCount);
                assertEquals(1, airportResult.TotalMissingCount);
                assertEquals(3, entries.size());
                assertEquals("ams", entries[0].Value.Utf8ToString());
                assertEquals(2, entries[0].Count);
                assertEquals("bru", entries[1].Value.Utf8ToString());
                assertEquals(3, entries[1].Count);
                assertEquals("dus", entries[2].Value.Utf8ToString());
                assertEquals(2, entries[2].Count);
            }

            groupedDurationFacetCollector = CreateRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", "1", false);
            indexSearcher.Search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
            durationResult = groupedDurationFacetCollector.MergeSegmentResults(10, 0, true);
            assertEquals(5, durationResult.TotalCount);
            assertEquals(0, durationResult.TotalMissingCount);

            entries = durationResult.GetFacetEntries(0, 10);
            assertEquals(2, entries.size());
            assertEquals("10", entries[0].Value.Utf8ToString());
            assertEquals(3, entries[0].Count);
            assertEquals("15", entries[1].Value.Utf8ToString());
            assertEquals(2, entries[1].Count);

            w.Dispose();
            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
Beispiel #22
0
        public void TestMinShouldMatch()
        {
            Directory         dir      = NewDirectory();
            MockAnalyzer      analyzer = new MockAnalyzer(Random);
            RandomIndexWriter w        = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir, analyzer);

            string[] docs = new string[]
            {
                @"this is the end of the world right",
                @"is this it or maybe not",
                @"this is the end of the universe as we know it",
                @"there is the famous restaurant at the end of the universe"
            };

            for (int i = 0; i < docs.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField(@"id", @"" + i, Field.Store.YES));
                doc.Add(NewTextField(@"field", docs[i], Field.Store.NO));
                w.AddDocument(doc);
            }

            IndexReader   r = w.GetReader();
            IndexSearcher s = NewSearcher(r);

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 0.5f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 1);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 2.0f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 1);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 0.49f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
                assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch = 1.0f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
                assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
                assertTrue(search.ScoreDocs[1].Score > search.ScoreDocs[2].Score);
            }

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                query.LowFreqMinimumNumberShouldMatch  = 1.0f;
                query.HighFreqMinimumNumberShouldMatch = 4.0f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(search.ScoreDocs[1].Score, search.ScoreDocs[2].Score, 0.0f);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                // doc 2 and 3 only get a score from low freq terms
                assertEquals(
                    new JCG.HashSet <string> {
                    @"2", @"3"
                },
                    new JCG.HashSet <string> {
                    r.Document(search.ScoreDocs[1].Doc).Get(@"id"),
                    r.Document(search.ScoreDocs[2].Doc).Get(@"id")
                },
                    aggressive: false);
            }

            {
                // only high freq terms around - check that min should match is applied
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "the"));
                query.LowFreqMinimumNumberShouldMatch  = 1.0f;
                query.HighFreqMinimumNumberShouldMatch = 2.0f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 4);
            }

            {
                // only high freq terms around - check that min should match is applied
                CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "the"));
                query.LowFreqMinimumNumberShouldMatch  = 1.0f;
                query.HighFreqMinimumNumberShouldMatch = 2.0f;
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 2);
                assertEquals(
                    new JCG.HashSet <string> {
                    @"0", @"2"
                },
                    new JCG.HashSet <string> {
                    r.Document(search.ScoreDocs[0].Doc).Get(@"id"),
                    r.Document(search.ScoreDocs[1].Doc).Get(@"id")
                },
                    aggressive: false);
            }

            r.Dispose();
            w.Dispose();
            dir.Dispose();
        }
        /// <summary>
        /// LUCENENET specific
        /// Passed in because NewStringField and NewIndexWriterConfig are no
        /// longer static.
        /// </summary>
        private IndexReader Build(Random random, TestIndex index)
        {
            /* build an index */

            Document doc       = new Document();
            Field    idField   = NewStringField(random, "id", "", Field.Store.YES);
            Field    randField = NewStringField(random, "rand", "", Field.Store.YES);
            Field    bodyField = NewStringField(random, "body", "", Field.Store.NO);

            doc.Add(idField);
            doc.Add(randField);
            doc.Add(bodyField);

            RandomIndexWriter writer = new RandomIndexWriter(random, index.index, NewIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetOpenMode(OpenMode.CREATE).SetMaxBufferedDocs(TestUtil.NextInt32(random, 50, 1000)).SetMergePolicy(NewLogMergePolicy()));

            TestUtil.ReduceOpenFiles(writer.IndexWriter);

            while (true)
            {
                int minCount = 0;
                int maxCount = 0;

                for (int d = minId; d <= maxId; d++)
                {
                    idField.SetStringValue(Pad(d));
                    int r = index.allowNegativeRandomInts ? random.Next() : random.Next(int.MaxValue);
                    if (index.maxR < r)
                    {
                        index.maxR = r;
                        maxCount   = 1;
                    }
                    else if (index.maxR == r)
                    {
                        maxCount++;
                    }

                    if (r < index.minR)
                    {
                        index.minR = r;
                        minCount   = 1;
                    }
                    else if (r == index.minR)
                    {
                        minCount++;
                    }
                    randField.SetStringValue(Pad(r));
                    bodyField.SetStringValue("body");
                    writer.AddDocument(doc);
                }

                if (minCount == 1 && maxCount == 1)
                {
                    // our subclasses rely on only 1 doc having the min or
                    // max, so, we loop until we satisfy that.  it should be
                    // exceedingly rare (Yonik calculates 1 in ~429,000)
                    // times) that this loop requires more than one try:
                    IndexReader ir = writer.GetReader();
                    writer.Dispose();
                    return(ir);
                }

                // try again
                writer.DeleteAll();
            }
        }
Beispiel #24
0
        public void TestExtend()
        {
            Directory         dir      = NewDirectory();
            MockAnalyzer      analyzer = new MockAnalyzer(Random);
            RandomIndexWriter w        = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir, analyzer);
            var docs = new string[]
            {
                @"this is the end of the world right",
                @"is this it or maybe not",
                @"this is the end of the universe as we know it",
                @"there is the famous restaurant at the end of the universe"
            };

            for (int i = 0; i < docs.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField(@"id", @"" + i, Field.Store.YES));
                doc.Add(NewTextField(@"field", docs[i], Field.Store.NO));
                w.AddDocument(doc);
            }

            IndexReader   r = w.GetReader();
            IndexSearcher s = NewSearcher(r);

            {
                CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                              Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(@"0", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"2", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
                assertEquals(@"3", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
            }

            {
                // this one boosts the termQuery("field" "universe") by 10x
                CommonTermsQuery query = new ExtendedCommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
                                                                      Random.NextBoolean() ? 2.0f : 0.5f);
                query.Add(new Term("field", "is"));
                query.Add(new Term("field", "this"));
                query.Add(new Term("field", "end"));
                query.Add(new Term("field", "world"));
                query.Add(new Term("field", "universe"));
                query.Add(new Term("field", "right"));
                TopDocs search = s.Search(query, 10);
                assertEquals(search.TotalHits, 3);
                assertEquals(@"2", r.Document(search.ScoreDocs[0].Doc).Get(@"id"));
                assertEquals(@"3", r.Document(search.ScoreDocs[1].Doc).Get(@"id"));
                assertEquals(@"0", r.Document(search.ScoreDocs[2].Doc).Get(@"id"));
            }

            r.Dispose();
            w.Dispose();
            dir.Dispose();
        }
        public virtual void TestSetPosition()
        {
            Analyzer          analyzer = new AnalyzerAnonymousInnerClassHelper(this);
            Directory         store    = NewDirectory();
            RandomIndexWriter writer   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, store, analyzer);
            Document d = new Document();

            d.Add(NewTextField("field", "bogus", Field.Store.YES));
            writer.AddDocument(d);
            IndexReader reader = writer.GetReader();

            writer.Dispose();

            IndexSearcher searcher = NewSearcher(reader);

            DocsAndPositionsEnum pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("1"));

            pos.NextDoc();
            // first token should be at position 0
            Assert.AreEqual(0, pos.NextPosition());

            pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("2"));
            pos.NextDoc();
            // second token should be at position 2
            Assert.AreEqual(2, pos.NextPosition());

            PhraseQuery q;

            ScoreDoc[] hits;

            q = new PhraseQuery();
            q.Add(new Term("field", "1"));
            q.Add(new Term("field", "2"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // same as previous, just specify positions explicitely.
            q = new PhraseQuery();
            q.Add(new Term("field", "1"), 0);
            q.Add(new Term("field", "2"), 1);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // specifying correct positions should find the phrase.
            q = new PhraseQuery();
            q.Add(new Term("field", "1"), 0);
            q.Add(new Term("field", "2"), 2);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "3"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "3"));
            q.Add(new Term("field", "4"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // phrase query would find it when correct positions are specified.
            q = new PhraseQuery();
            q.Add(new Term("field", "3"), 0);
            q.Add(new Term("field", "4"), 0);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            // phrase query should fail for non existing searched term
            // even if there exist another searched terms in the same searched position.
            q = new PhraseQuery();
            q.Add(new Term("field", "3"), 0);
            q.Add(new Term("field", "9"), 0);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // multi-phrase query should succed for non existing searched term
            // because there exist another searched terms in the same searched position.
            MultiPhraseQuery mq = new MultiPhraseQuery();

            mq.Add(new Term[] { new Term("field", "3"), new Term("field", "9") }, 0);
            hits = searcher.Search(mq, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "4"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "3"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "4"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            reader.Dispose();
            store.Dispose();
        }
Beispiel #26
0
        public void TestRandomIndex()
        {
            Directory    dir      = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir, analyzer);

            CreateRandomIndex(AtLeast(50), w, Random.NextInt64());
            DirectoryReader reader        = w.GetReader();
            AtomicReader    wrapper       = SlowCompositeReaderWrapper.Wrap(reader);
            string          field         = @"body";
            Terms           terms         = wrapper.GetTerms(field);
            var             lowFreqQueue  = new PriorityQueueAnonymousClass(5);
            var             highFreqQueue = new PriorityQueueAnonymousClass1(5);

            try
            {
                TermsEnum iterator = terms.GetEnumerator();
                while (iterator.MoveNext())
                {
                    if (highFreqQueue.Count < 5)
                    {
                        highFreqQueue.Add(new TermAndFreq(
                                              BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq));
                        lowFreqQueue.Add(new TermAndFreq(
                                             BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq));
                    }
                    else
                    {
                        if (highFreqQueue.Top.freq < iterator.DocFreq)
                        {
                            highFreqQueue.Top.freq = iterator.DocFreq;
                            highFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term);
                            highFreqQueue.UpdateTop();
                        }

                        if (lowFreqQueue.Top.freq > iterator.DocFreq)
                        {
                            lowFreqQueue.Top.freq = iterator.DocFreq;
                            lowFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term);
                            lowFreqQueue.UpdateTop();
                        }
                    }
                }

                int lowFreq  = lowFreqQueue.Top.freq;
                int highFreq = highFreqQueue.Top.freq;
                AssumeTrue(@"unlucky index", highFreq - 1 > lowFreq);
                IList <TermAndFreq> highTerms = QueueToList(highFreqQueue);
                IList <TermAndFreq> lowTerms  = QueueToList(lowFreqQueue);

                IndexSearcher    searcher     = NewSearcher(reader);
                Occur            lowFreqOccur = RandomOccur(Random);
                BooleanQuery     verifyQuery  = new BooleanQuery();
                CommonTermsQuery cq           = new CommonTermsQuery(RandomOccur(Random),
                                                                     lowFreqOccur, highFreq - 1, Random.NextBoolean());
                foreach (TermAndFreq termAndFreq in lowTerms)
                {
                    cq.Add(new Term(field, termAndFreq.term));
                    verifyQuery.Add(new BooleanClause(new TermQuery(new Term(field,
                                                                             termAndFreq.term)), lowFreqOccur));
                }
                foreach (TermAndFreq termAndFreq in highTerms)
                {
                    cq.Add(new Term(field, termAndFreq.term));
                }

                TopDocs cqSearch = searcher.Search(cq, reader.MaxDoc);

                TopDocs verifySearch = searcher.Search(verifyQuery, reader.MaxDoc);
                assertEquals(verifySearch.TotalHits, cqSearch.TotalHits);
                var hits = new JCG.HashSet <int>();
                foreach (ScoreDoc doc in verifySearch.ScoreDocs)
                {
                    hits.Add(doc.Doc);
                }

                foreach (ScoreDoc doc in cqSearch.ScoreDocs)
                {
                    assertTrue(hits.Remove(doc.Doc));
                }

                assertTrue(hits.Count == 0);

                /*
                 *  need to force merge here since QueryUtils adds checks based
                 *  on leave readers which have different statistics than the top
                 *  level reader if we have more than one segment. This could
                 *  result in a different query / results.
                 */
                w.ForceMerge(1);
                DirectoryReader reader2 = w.GetReader();
                QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                    this,
#endif
                    Random, cq, NewSearcher(reader2));
                reader2.Dispose();
            }
            finally
            {
                reader.Dispose();
                wrapper.Dispose();
                w.Dispose();
                dir.Dispose();
            }
        }
Beispiel #27
0
        public override void BeforeClass()
        {
            base.BeforeClass();

            NUM_DOCS  = AtLeast(500);
            NUM_ORDS  = AtLeast(2);
            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random, Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy()));
            long   theLong           = long.MaxValue;
            double theDouble         = double.MaxValue;
            sbyte  theByte           = sbyte.MaxValue;
            short  theShort          = short.MaxValue;
            int    theInt            = int.MaxValue;
            float  theFloat          = float.MaxValue;

            UnicodeStrings = new string[NUM_DOCS];
            //MultiValued = new BytesRef[NUM_DOCS, NUM_ORDS];
            MultiValued = RectangularArrays.ReturnRectangularArray <BytesRef>(NUM_DOCS, NUM_ORDS);
            if (VERBOSE)
            {
                Console.WriteLine("TEST: setUp");
            }
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("theLong", (theLong--).ToString(CultureInfo.InvariantCulture), Field.Store.NO));
                doc.Add(NewStringField("theDouble", (theDouble--).ToString("R", CultureInfo.InvariantCulture), Field.Store.NO));
                doc.Add(NewStringField("theByte", (theByte--).ToString(CultureInfo.InvariantCulture), Field.Store.NO));
                doc.Add(NewStringField("theShort", (theShort--).ToString(CultureInfo.InvariantCulture), Field.Store.NO));
                doc.Add(NewStringField("theInt", (theInt--).ToString(CultureInfo.InvariantCulture), Field.Store.NO));
                doc.Add(NewStringField("theFloat", (theFloat--).ToString("R", CultureInfo.InvariantCulture), Field.Store.NO));
                if (i % 2 == 0)
                {
                    doc.Add(NewStringField("sparse", (i).ToString(CultureInfo.InvariantCulture), Field.Store.NO));
                }

                if (i % 2 == 0)
                {
                    doc.Add(new Int32Field("numInt", i, Field.Store.NO));
                }

                // sometimes skip the field:
                if (Random.Next(40) != 17)
                {
                    UnicodeStrings[i] = GenerateString(i);
                    doc.Add(NewStringField("theRandomUnicodeString", UnicodeStrings[i], Field.Store.YES));
                }

                // sometimes skip the field:
                if (Random.Next(10) != 8)
                {
                    for (int j = 0; j < NUM_ORDS; j++)
                    {
                        string newValue = GenerateString(i);
                        MultiValued[i][j] = new BytesRef(newValue);
                        doc.Add(NewStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES));
                    }
                    Array.Sort(MultiValued[i]);
                }
                writer.AddDocument(doc);
            }
            IndexReader r = writer.GetReader();

            Reader = SlowCompositeReaderWrapper.Wrap(r);
            writer.Dispose();
        }
Beispiel #28
0
        public void Test()
        {
            RandomIndexWriter writer;
            DirectoryReader   indexReader;
            int numParents        = AtLeast(200);
            IndexWriterConfig cfg = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            cfg.SetMergePolicy(NewLogMergePolicy());
            using (writer = new RandomIndexWriter(Random, NewDirectory(), cfg))
            {
                Document parentDoc = new Document();
                NumericDocValuesField parentVal = new NumericDocValuesField("parent_val", 0L);
                parentDoc.Add(parentVal);
                StringField parent = new StringField("parent", "true", Field.Store.YES);
                parentDoc.Add(parent);
                for (int i = 0; i < numParents; ++i)
                {
                    IList <Document> documents = new JCG.List <Document>();
                    int numChildren            = Random.nextInt(10);
                    for (int j = 0; j < numChildren; ++j)
                    {
                        Document childDoc = new Document();
                        childDoc.Add(new NumericDocValuesField("child_val", Random.nextInt(5)));
                        documents.Add(childDoc);
                    }
                    parentVal.SetInt64Value(Random.nextInt(50));
                    documents.Add(parentDoc);
                    writer.AddDocuments(documents);
                }
                writer.ForceMerge(1);
                indexReader = writer.GetReader();
            }

            AtomicReader     reader        = GetOnlySegmentReader(indexReader);
            Filter           parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("parent", "true"))));
            FixedBitSet      parentBits    = (FixedBitSet)parentsFilter.GetDocIdSet(reader.AtomicContext, null);
            NumericDocValues parentValues  = reader.GetNumericDocValues("parent_val");

            NumericDocValues childValues = reader.GetNumericDocValues("child_val");

            Sort parentSort = new Sort(new SortField("parent_val", SortFieldType.INT64));
            Sort childSort  = new Sort(new SortField("child_val", SortFieldType.INT64));

            Sort   sort   = new Sort(new SortField("custom", new BlockJoinComparerSource(parentsFilter, parentSort, childSort)));
            Sorter sorter = new Sorter(sort);

            Sorter.DocMap docMap = sorter.Sort(reader);
            assertEquals(reader.MaxDoc, docMap.Count);

            int[] children       = new int[1];
            int   numChildren2   = 0;
            int   previousParent = -1;

            for (int i = 0; i < docMap.Count; ++i)
            {
                int oldID = docMap.NewToOld(i);
                if (parentBits.Get(oldID))
                {
                    // check that we have the right children
                    for (int j = 0; j < numChildren2; ++j)
                    {
                        assertEquals(oldID, parentBits.NextSetBit(children[j]));
                    }
                    // check that children are sorted
                    for (int j = 1; j < numChildren2; ++j)
                    {
                        int doc1 = children[j - 1];
                        int doc2 = children[j];
                        if (childValues.Get(doc1) == childValues.Get(doc2))
                        {
                            assertTrue(doc1 < doc2); // sort is stable
                        }
                        else
                        {
                            assertTrue(childValues.Get(doc1) < childValues.Get(doc2));
                        }
                    }
                    // check that parents are sorted
                    if (previousParent != -1)
                    {
                        if (parentValues.Get(previousParent) == parentValues.Get(oldID))
                        {
                            assertTrue(previousParent < oldID);
                        }
                        else
                        {
                            assertTrue(parentValues.Get(previousParent) < parentValues.Get(oldID));
                        }
                    }
                    // reset
                    previousParent = oldID;
                    numChildren2   = 0;
                }
                else
                {
                    children = ArrayUtil.Grow(children, numChildren2 + 1);
                    children[numChildren2++] = oldID;
                }
            }
            indexReader.Dispose();
            writer.IndexWriter.Directory.Dispose();
        }
Beispiel #29
0
        public virtual void TestNonexistantFields()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter iw  = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            Document doc = new Document();

            iw.AddDocument(doc);
            DirectoryReader ir = iw.GetReader();

            iw.Dispose();

            AtomicReader ar = GetOnlySegmentReader(ir);

            IFieldCache cache = FieldCache.DEFAULT;

            cache.PurgeAllCaches();
            Assert.AreEqual(0, cache.GetCacheEntries().Length);

#pragma warning disable 612, 618
            Bytes bytes = cache.GetBytes(ar, "bogusbytes", true);
            Assert.AreEqual(0, bytes.Get(0));

            Int16s shorts = cache.GetInt16s(ar, "bogusshorts", true);
            Assert.AreEqual(0, shorts.Get(0));
#pragma warning restore 612, 618

            Int32s ints = cache.GetInt32s(ar, "bogusints", true);
            Assert.AreEqual(0, ints.Get(0));

            Int64s longs = cache.GetInt64s(ar, "boguslongs", true);
            Assert.AreEqual(0, longs.Get(0));

            Singles floats = cache.GetSingles(ar, "bogusfloats", true);
            Assert.AreEqual(0, floats.Get(0), 0.0f);

            Doubles doubles = cache.GetDoubles(ar, "bogusdoubles", true);
            Assert.AreEqual(0, doubles.Get(0), 0.0D);

            BytesRef        scratch  = new BytesRef();
            BinaryDocValues binaries = cache.GetTerms(ar, "bogusterms", true);
            binaries.Get(0, scratch);
            Assert.AreEqual(0, scratch.Length);

            SortedDocValues sorted = cache.GetTermsIndex(ar, "bogustermsindex");
            Assert.AreEqual(-1, sorted.GetOrd(0));
            sorted.Get(0, scratch);
            Assert.AreEqual(0, scratch.Length);

            SortedSetDocValues sortedSet = cache.GetDocTermOrds(ar, "bogusmultivalued");
            sortedSet.SetDocument(0);
            Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd());

            IBits bits = cache.GetDocsWithField(ar, "bogusbits");
            Assert.IsFalse(bits.Get(0));

            // check that we cached nothing
            Assert.AreEqual(0, cache.GetCacheEntries().Length);
            ir.Dispose();
            dir.Dispose();
        }
Beispiel #30
0
        public virtual void TestWrongIndexFieldName()
        {
            Store.Directory dir     = NewDirectory();
            Store.Directory taxoDir = NewDirectory();

            // Writes facet ords to a separate directory from the
            // main index:
            var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

            FacetsConfig config = new FacetsConfig();

            config.SetIndexFieldName("a", "$facets2");

            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            Document doc = new Document();

            doc.Add(new Int32Field("num", 10, Field.Store.NO));
            doc.Add(new FacetField("a", "foo1"));
            writer.AddDocument(config.Build(taxoWriter, doc));

            // NRT open
            IndexSearcher searcher = NewSearcher(writer.GetReader());

            writer.Dispose();

            // NRT open
            var taxoReader = new DirectoryTaxonomyReader(taxoWriter);

            taxoWriter.Dispose();

            FacetsCollector c = new FacetsCollector();

            searcher.Search(new MatchAllDocsQuery(), c);

            TaxonomyFacetSumValueSource facets = new TaxonomyFacetSumValueSource(taxoReader, config, c, new Int32FieldSource("num"));

            // Ask for top 10 labels for any dims that have counts:
            IList <FacetResult> results = facets.GetAllDims(10);

            Assert.IsTrue(results.Count == 0);

            try
            {
                facets.GetSpecificValue("a");
                fail("should have hit exc");
            }
            catch (Exception iae) when(iae.IsIllegalArgumentException())
            {
                // expected
            }

            try
            {
                facets.GetTopChildren(10, "a");
                fail("should have hit exc");
            }
            catch (Exception iae) when(iae.IsIllegalArgumentException())
            {
                // expected
            }

            IOUtils.Dispose(searcher.IndexReader, taxoReader, dir, taxoDir);
        }