Ejemplos de TermsEnum.MoveNext en C# (CSharp)

Lenguaje de programación: C# (CSharp)

Clase / Tipo: TermsEnum

Método / Función: MoveNext

Ejemplos en hotexamples.com: 19

C# (CSharp) TermsEnum.MoveNext - 19 ejemplos encontrados. Estos son los ejemplos en C# (CSharp) del mundo real mejor valorados de TermsEnum.MoveNext extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

Docs(30)

Next(30)

MoveNext(19)

DocFreq(14)

DocsAndPositions(14)

TotalTermFreq(13)

next(11)

Term(11)

SeekCeil(11)

SeekExact(7)

TermState(6)

GetTermState(6)

docFreq(5)

docs(3)

docsAndPositions(3)

Ord(2)

totalTermFreq(2)

seekExact(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: TestSameScoresWithThreads.cs Proyecto: murugangs/lucenenet

        public virtual void Test()
        {
            Directory    dir      = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir, analyzer);
            LineFileDocs docs         = new LineFileDocs(Random, DefaultCodecSupportsDocValues);
            int          charsToIndex = AtLeast(100000);
            int          charsIndexed = 0;

            //System.out.println("bytesToIndex=" + charsToIndex);
            while (charsIndexed < charsToIndex)
            {
                Document doc = docs.NextDoc();
                charsIndexed += doc.Get("body").Length;
                w.AddDocument(doc);
                //System.out.println("  bytes=" + charsIndexed + " add: " + doc);
            }
            IndexReader r = w.GetReader();

            //System.out.println("numDocs=" + r.NumDocs);
            w.Dispose();

            IndexSearcher s         = NewSearcher(r);
            Terms         terms     = MultiFields.GetFields(r).GetTerms("body");
            int           termCount = 0;
            TermsEnum     termsEnum = terms.GetEnumerator();

            while (termsEnum.MoveNext())
            {
                termCount++;
            }
            Assert.IsTrue(termCount > 0);

            // Target ~10 terms to search:
            double chance = 10.0 / termCount;

            termsEnum = terms.GetEnumerator(termsEnum);
            IDictionary <BytesRef, TopDocs> answers = new Dictionary <BytesRef, TopDocs>();

            while (termsEnum.MoveNext())
            {
                if (Random.NextDouble() <= chance)
                {
                    BytesRef term = BytesRef.DeepCopyOf(termsEnum.Term);
                    answers[term] = s.Search(new TermQuery(new Term("body", term)), 100);
                }
            }

            if (answers.Count > 0)
            {
                CountdownEvent startingGun = new CountdownEvent(1);
                int            numThreads  = TestUtil.NextInt32(Random, 2, 5);
                ThreadJob[]    threads     = new ThreadJob[numThreads];
                for (int threadID = 0; threadID < numThreads; threadID++)
                {
                    ThreadJob thread = new ThreadAnonymousClass(this, s, answers, startingGun);
                    threads[threadID] = thread;
                    thread.Start();
                }
                startingGun.Signal();
                foreach (ThreadJob thread in threads)
                {
                    thread.Join();
                }
            }
            r.Dispose();
            dir.Dispose();
        }

Ejemplo n.º 2

Mostrar archivo

        /// <summary>
        /// Build the suggest index, using up to the specified
        /// amount of temporary RAM while building.  Note that
        /// the weights for the suggestions are ignored.
        /// </summary>
        public virtual void Build(IInputEnumerator enumerator, double ramBufferSizeMB)
        {
            if (enumerator.HasPayloads)
            {
                throw new ArgumentException("this suggester doesn't support payloads");
            }
            if (enumerator.HasContexts)
            {
                throw new ArgumentException("this suggester doesn't support contexts");
            }

            string prefix    = this.GetType().Name;
            var    directory = OfflineSorter.DefaultTempDir();

            // LUCENENET specific - using GetRandomFileName() instead of picking a random int
            DirectoryInfo tempIndexPath; // LUCENENET: IDE0059: Remove unnecessary value assignment

            while (true)
            {
                tempIndexPath = new DirectoryInfo(Path.Combine(directory.FullName, prefix + ".index." + Path.GetFileNameWithoutExtension(Path.GetRandomFileName())));
                tempIndexPath.Create();
                if (System.IO.Directory.Exists(tempIndexPath.FullName))
                {
                    break;
                }
            }

            Directory dir = FSDirectory.Open(tempIndexPath);

            try
            {
#pragma warning disable 612, 618
                IndexWriterConfig iwc = new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, indexAnalyzer);
#pragma warning restore 612, 618
                iwc.SetOpenMode(OpenMode.CREATE);
                iwc.SetRAMBufferSizeMB(ramBufferSizeMB);
                IndexWriter writer = new IndexWriter(dir, iwc);

                var ft = new FieldType(TextField.TYPE_NOT_STORED);
                // TODO: if only we had IndexOptions.TERMS_ONLY...
                ft.IndexOptions = IndexOptions.DOCS_AND_FREQS;
                ft.OmitNorms    = true;
                ft.Freeze();

                Document doc   = new Document();
                Field    field = new Field("body", "", ft);
                doc.Add(field);

                totTokens = 0;
                IndexReader reader = null;

                bool success = false;
                count = 0;
                try
                {
                    while (enumerator.MoveNext())
                    {
                        BytesRef surfaceForm = enumerator.Current;
                        field.SetStringValue(surfaceForm.Utf8ToString());
                        writer.AddDocument(doc);
                        count++;
                    }

                    reader = DirectoryReader.Open(writer, false);

                    Terms terms = MultiFields.GetTerms(reader, "body");
                    if (terms == null)
                    {
                        throw new ArgumentException("need at least one suggestion");
                    }

                    // Move all ngrams into an FST:
                    TermsEnum termsEnum = terms.GetEnumerator(null);

                    Outputs <long?> outputs = PositiveInt32Outputs.Singleton;
                    Builder <long?> builder = new Builder <long?>(FST.INPUT_TYPE.BYTE1, outputs);

                    Int32sRef scratchInts = new Int32sRef();
                    while (termsEnum.MoveNext())
                    {
                        BytesRef term       = termsEnum.Term;
                        int      ngramCount = CountGrams(term);
                        if (ngramCount > grams)
                        {
                            throw new ArgumentException("tokens must not contain separator byte; got token=" + term + " but gramCount=" + ngramCount + ", which is greater than expected max ngram size=" + grams);
                        }
                        if (ngramCount == 1)
                        {
                            totTokens += termsEnum.TotalTermFreq;
                        }

                        builder.Add(Lucene.Net.Util.Fst.Util.ToInt32sRef(term, scratchInts), EncodeWeight(termsEnum.TotalTermFreq));
                    }

                    fst = builder.Finish();
                    if (fst == null)
                    {
                        throw new ArgumentException("need at least one suggestion");
                    }
                    //System.out.println("FST: " + fst.getNodeCount() + " nodes");

                    /*
                     * PrintWriter pw = new PrintWriter("/x/tmp/out.dot");
                     * Util.toDot(fst, pw, true, true);
                     * pw.close();
                     */

                    success = true;
                }
                finally
                {
                    if (success)
                    {
                        IOUtils.Dispose(writer, reader);
                    }
                    else
                    {
                        IOUtils.DisposeWhileHandlingException(writer, reader);
                    }
                }
            }
            finally
            {
                try
                {
                    IOUtils.Dispose(dir);
                }
                finally
                {
                    // LUCENENET specific - since we are removing the entire directory anyway,
                    // it doesn't make sense to first do a loop in order remove the files.
                    // Let the System.IO.Directory.Delete() method handle that.
                    // We also need to dispose the Directory instance first before deleting from disk.
                    try
                    {
                        System.IO.Directory.Delete(tempIndexPath.FullName, true);
                    }
                    catch (Exception e)
                    {
                        throw new InvalidOperationException("failed to remove " + tempIndexPath, e);
                    }
                }
            }
        }

Ejemplo n.º 3

Mostrar archivo

        public virtual void Test()
        {
#pragma warning disable 612, 618
            IFieldCache        cache   = FieldCache.DEFAULT;
            FieldCache.Doubles doubles = cache.GetDoubles(reader, "theDouble", Random.NextBoolean());
            Assert.AreSame(doubles, cache.GetDoubles(reader, "theDouble", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(doubles, cache.GetDoubles(reader, "theDouble", FieldCache.DEFAULT_DOUBLE_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(doubles.Get(i) == (double.MaxValue - i), doubles.Get(i) + " does not equal: " + (double.MaxValue - i));
            }

            FieldCache.Int64s longs = cache.GetInt64s(reader, "theLong", Random.NextBoolean());
            Assert.AreSame(longs, cache.GetInt64s(reader, "theLong", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(longs, cache.GetInt64s(reader, "theLong", FieldCache.DEFAULT_INT64_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(longs.Get(i) == (long.MaxValue - i), longs.Get(i) + " does not equal: " + (long.MaxValue - i) + " i=" + i);
            }

            FieldCache.Bytes bytes = cache.GetBytes(reader, "theByte", Random.NextBoolean());
            Assert.AreSame(bytes, cache.GetBytes(reader, "theByte", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(bytes, cache.GetBytes(reader, "theByte", FieldCache.DEFAULT_BYTE_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue((sbyte)bytes.Get(i) == (sbyte)(sbyte.MaxValue - i), (sbyte)bytes.Get(i) + " does not equal: " + (sbyte.MaxValue - i));
            }

            FieldCache.Int16s shorts = cache.GetInt16s(reader, "theShort", Random.NextBoolean());
            Assert.AreSame(shorts, cache.GetInt16s(reader, "theShort", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(shorts, cache.GetInt16s(reader, "theShort", FieldCache.DEFAULT_INT16_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(shorts.Get(i) == (short)(short.MaxValue - i), shorts.Get(i) + " does not equal: " + (short.MaxValue - i));
            }

            FieldCache.Int32s ints = cache.GetInt32s(reader, "theInt", Random.NextBoolean());
            Assert.AreSame(ints, cache.GetInt32s(reader, "theInt", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(ints, cache.GetInt32s(reader, "theInt", FieldCache.DEFAULT_INT32_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(ints.Get(i) == (int.MaxValue - i), ints.Get(i) + " does not equal: " + (int.MaxValue - i));
            }

            FieldCache.Singles floats = cache.GetSingles(reader, "theFloat", Random.NextBoolean());
            Assert.AreSame(floats, cache.GetSingles(reader, "theFloat", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(floats, cache.GetSingles(reader, "theFloat", FieldCache.DEFAULT_SINGLE_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(floats.Get(i) == (float.MaxValue - i), floats.Get(i) + " does not equal: " + (float.MaxValue - i));
            }
#pragma warning restore 612, 618

            IBits docsWithField = cache.GetDocsWithField(reader, "theLong");
            Assert.AreSame(docsWithField, cache.GetDocsWithField(reader, "theLong"), "Second request to cache return same array");
            Assert.IsTrue(docsWithField is Bits.MatchAllBits, "docsWithField(theLong) must be class Bits.MatchAllBits");
            Assert.IsTrue(docsWithField.Length == NUM_DOCS, "docsWithField(theLong) Size: " + docsWithField.Length + " is not: " + NUM_DOCS);
            for (int i = 0; i < docsWithField.Length; i++)
            {
                Assert.IsTrue(docsWithField.Get(i));
            }

            docsWithField = cache.GetDocsWithField(reader, "sparse");
            Assert.AreSame(docsWithField, cache.GetDocsWithField(reader, "sparse"), "Second request to cache return same array");
            Assert.IsFalse(docsWithField is Bits.MatchAllBits, "docsWithField(sparse) must not be class Bits.MatchAllBits");
            Assert.IsTrue(docsWithField.Length == NUM_DOCS, "docsWithField(sparse) Size: " + docsWithField.Length + " is not: " + NUM_DOCS);
            for (int i = 0; i < docsWithField.Length; i++)
            {
                Assert.AreEqual(i % 2 == 0, docsWithField.Get(i));
            }

            // getTermsIndex
            SortedDocValues termsIndex = cache.GetTermsIndex(reader, "theRandomUnicodeString");
            Assert.AreSame(termsIndex, cache.GetTermsIndex(reader, "theRandomUnicodeString"), "Second request to cache return same array");
            BytesRef br = new BytesRef();
            for (int i = 0; i < NUM_DOCS; i++)
            {
                BytesRef term;
                int      ord = termsIndex.GetOrd(i);
                if (ord == -1)
                {
                    term = null;
                }
                else
                {
                    termsIndex.LookupOrd(ord, br);
                    term = br;
                }
                string s = term == null ? null : term.Utf8ToString();
                Assert.IsTrue(unicodeStrings[i] == null || unicodeStrings[i].Equals(s, StringComparison.Ordinal), "for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i]);
            }

            int nTerms = termsIndex.ValueCount;

            TermsEnum tenum = termsIndex.GetTermsEnum();
            BytesRef  val   = new BytesRef();
            for (int i = 0; i < nTerms; i++)
            {
                tenum.MoveNext();
                BytesRef val1 = tenum.Term;
                termsIndex.LookupOrd(i, val);
                // System.out.println("i="+i);
                Assert.AreEqual(val, val1);
            }

            // seek the enum around (note this isn't a great test here)
            int num = AtLeast(100);
            for (int i = 0; i < num; i++)
            {
                int k = Random.Next(nTerms);
                termsIndex.LookupOrd(k, val);
                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, tenum.SeekCeil(val));
                Assert.AreEqual(val, tenum.Term);
            }

            for (int i = 0; i < nTerms; i++)
            {
                termsIndex.LookupOrd(i, val);
                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, tenum.SeekCeil(val));
                Assert.AreEqual(val, tenum.Term);
            }

            // test bad field
            termsIndex = cache.GetTermsIndex(reader, "bogusfield");

            // getTerms
            BinaryDocValues terms = cache.GetTerms(reader, "theRandomUnicodeString", true);
            Assert.AreSame(terms, cache.GetTerms(reader, "theRandomUnicodeString", true), "Second request to cache return same array");
            IBits bits = cache.GetDocsWithField(reader, "theRandomUnicodeString");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                terms.Get(i, br);
                BytesRef term;
                if (!bits.Get(i))
                {
                    term = null;
                }
                else
                {
                    term = br;
                }
                string s = term == null ? null : term.Utf8ToString();
                Assert.IsTrue(unicodeStrings[i] == null || unicodeStrings[i].Equals(s, StringComparison.Ordinal), "for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i]);
            }

            // test bad field
            terms = cache.GetTerms(reader, "bogusfield", false);

            // getDocTermOrds
            SortedSetDocValues termOrds = cache.GetDocTermOrds(reader, "theRandomUnicodeMultiValuedField");
            int numEntries = cache.GetCacheEntries().Length;
            // ask for it again, and check that we didnt create any additional entries:
            termOrds = cache.GetDocTermOrds(reader, "theRandomUnicodeMultiValuedField");
            Assert.AreEqual(numEntries, cache.GetCacheEntries().Length);

            for (int i = 0; i < NUM_DOCS; i++)
            {
                termOrds.SetDocument(i);
                // this will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
                ISet <BytesRef> values = new JCG.LinkedHashSet <BytesRef>(multiValued[i]);
                foreach (BytesRef v in values)
                {
                    if (v == null)
                    {
                        // why does this test use null values... instead of an empty list: confusing
                        break;
                    }
                    long ord = termOrds.NextOrd();
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(ord != SortedSetDocValues.NO_MORE_ORDS);
                    }
                    BytesRef scratch = new BytesRef();
                    termOrds.LookupOrd(ord, scratch);
                    Assert.AreEqual(v, scratch);
                }
                Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, termOrds.NextOrd());
            }

            // test bad field
            termOrds = cache.GetDocTermOrds(reader, "bogusfield");
            Assert.IsTrue(termOrds.ValueCount == 0);

            FieldCache.DEFAULT.PurgeByCacheKey(reader.CoreCacheKey);
        }

Ejemplo n.º 4

Mostrar archivo

Archivo: ThreadedIndexingAndSearchingTestCase.cs Proyecto: murugangs/lucenenet

 public override void Run()
 {
     if (Verbose)
     {
         Console.WriteLine(Thread.CurrentThread.Name + ": launch search thread");
     }
     while (Environment.TickCount < stopTimeMS)
     {
         try
         {
             IndexSearcher s = outerInstance.GetCurrentSearcher();
             try
             {
                 // Verify 1) IW is correctly setting
                 // diagnostics, and 2) segment warming for
                 // merged segments is actually happening:
                 foreach (AtomicReaderContext sub in s.IndexReader.Leaves)
                 {
                     SegmentReader segReader = (SegmentReader)sub.Reader;
                     IDictionary <string, string> diagnostics = segReader.SegmentInfo.Info.Diagnostics;
                     assertNotNull(diagnostics);
                     diagnostics.TryGetValue("source", out string source);
                     assertNotNull(source);
                     if (source.Equals("merge", StringComparison.Ordinal))
                     {
                         assertTrue("sub reader " + sub + " wasn't warmed: warmed=" + outerInstance.warmed + " diagnostics=" + diagnostics + " si=" + segReader.SegmentInfo,
                                    // LUCENENET: ConditionalWeakTable doesn't have ContainsKey, so we normalize to TryGetValue
                                    !outerInstance.m_assertMergedSegmentsWarmed || outerInstance.warmed.TryGetValue(segReader.core, out BooleanRef _));
                     }
                 }
                 if (s.IndexReader.NumDocs > 0)
                 {
                     outerInstance.SmokeTestSearcher(s);
                     Fields fields = MultiFields.GetFields(s.IndexReader);
                     if (fields == null)
                     {
                         continue;
                     }
                     Terms terms = fields.GetTerms("body");
                     if (terms == null)
                     {
                         continue;
                     }
                     TermsEnum termsEnum     = terms.GetEnumerator();
                     int       seenTermCount = 0;
                     int       shift;
                     int       trigger;
                     if (totTermCount < 30)
                     {
                         shift   = 0;
                         trigger = 1;
                     }
                     else
                     {
                         trigger = totTermCount / 30;
                         shift   = Random.Next(trigger);
                     }
                     while (Environment.TickCount < stopTimeMS)
                     {
                         if (!termsEnum.MoveNext())
                         {
                             totTermCount.Value = seenTermCount;
                             break;
                         }
                         seenTermCount++;
                         // search 30 terms
                         if ((seenTermCount + shift) % trigger == 0)
                         {
                             //if (VERBOSE) {
                             //System.out.println(Thread.currentThread().getName() + " now search body:" + term.Utf8ToString());
                             //}
                             totHits.AddAndGet(outerInstance.RunQuery(s, new TermQuery(new Term("body", termsEnum.Term))));
                         }
                     }
                     //if (VERBOSE) {
                     //System.out.println(Thread.currentThread().getName() + ": search done");
                     //}
                 }
             }
             finally
             {
                 outerInstance.ReleaseSearcher(s);
             }
         }
         catch (Exception t)
         {
             Console.WriteLine(Thread.CurrentThread.Name + ": hit exc");
             outerInstance.m_failed.Value = (true);
             Console.WriteLine(t.ToString());
             throw new Exception(t.ToString(), t);
         }
     }
 }

Ejemplo n.º 5

Mostrar archivo

Archivo: TestBlockPostingsFormat3.cs Proyecto: ywscr/lucenenet

        private void AssertTermsSeeking(Terms leftTerms, Terms rightTerms)
        {
            TermsEnum leftEnum  = null;
            TermsEnum rightEnum = null;

            // just an upper bound
            int    numTests = AtLeast(20);
            Random random   = Random;

            // collect this number of terms from the left side
            ISet <BytesRef> tests     = new JCG.HashSet <BytesRef>();
            int             numPasses = 0;

            while (numPasses < 10 && tests.Count < numTests)
            {
                leftEnum = leftTerms.GetEnumerator(leftEnum);
                BytesRef term = null;
                while (leftEnum.MoveNext())
                {
                    term = leftEnum.Term;
                    int code = random.Next(10);
                    if (code == 0)
                    {
                        // the term
                        tests.Add(BytesRef.DeepCopyOf(term));
                    }
                    else if (code == 1)
                    {
                        // truncated subsequence of term
                        term = BytesRef.DeepCopyOf(term);
                        if (term.Length > 0)
                        {
                            // truncate it
                            term.Length = random.Next(term.Length);
                        }
                    }
                    else if (code == 2)
                    {
                        // term, but ensure a non-zero offset
                        var newbytes = new byte[term.Length + 5];
                        Array.Copy(term.Bytes, term.Offset, newbytes, 5, term.Length);
                        tests.Add(new BytesRef(newbytes, 5, term.Length));
                    }
                }
                numPasses++;
            }

            IList <BytesRef> shuffledTests = new JCG.List <BytesRef>(tests);

            shuffledTests.Shuffle(Random);

            foreach (BytesRef b in shuffledTests)
            {
                leftEnum  = leftTerms.GetEnumerator(leftEnum);
                rightEnum = rightTerms.GetEnumerator(rightEnum);

                Assert.AreEqual(leftEnum.SeekExact(b), rightEnum.SeekExact(b));
                Assert.AreEqual(leftEnum.SeekExact(b), rightEnum.SeekExact(b));

                SeekStatus leftStatus;
                SeekStatus rightStatus;

                leftStatus  = leftEnum.SeekCeil(b);
                rightStatus = rightEnum.SeekCeil(b);
                Assert.AreEqual(leftStatus, rightStatus);
                if (leftStatus != SeekStatus.END)
                {
                    Assert.AreEqual(leftEnum.Term, rightEnum.Term);
                }

                leftStatus  = leftEnum.SeekCeil(b);
                rightStatus = rightEnum.SeekCeil(b);
                Assert.AreEqual(leftStatus, rightStatus);
                if (leftStatus != SeekStatus.END)
                {
                    Assert.AreEqual(leftEnum.Term, rightEnum.Term);
                }
            }
        }

Ejemplo n.º 6

Mostrar archivo

        public virtual void TestTransitionAPI()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random, dir);

            Documents.Document doc = new Documents.Document();
#pragma warning disable 612, 618
            doc.Add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED));
            doc.Add(new Field("tokenized_reader", new StringReader("abc xyz")));
            doc.Add(new Field("tokenized_tokenstream", w.IndexWriter.Analyzer.GetTokenStream("tokenized_tokenstream", new StringReader("abc xyz"))));
            doc.Add(new Field("binary", new byte[10]));
            doc.Add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
            doc.Add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
            doc.Add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS));
            doc.Add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
#pragma warning restore 612, 618
            w.AddDocument(doc);
            IndexReader r = w.GetReader();
            w.Dispose();

            doc = r.Document(0);
            // 4 stored fields
            Assert.AreEqual(4, doc.Fields.Count);
            Assert.AreEqual("abc", doc.Get("stored"));
            Assert.AreEqual("abc xyz", doc.Get("stored_indexed"));
            Assert.AreEqual("abc xyz", doc.Get("stored_tokenized"));
            BytesRef br = doc.GetBinaryValue("binary");
            Assert.IsNotNull(br);
            Assert.AreEqual(10, br.Length);

            IndexSearcher s = new IndexSearcher(r);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("indexed", "abc xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).TotalHits);
            Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).TotalHits);

            foreach (string field in new string[] { "tv", "tv_pos", "tv_off", "tv_pos_off" })
            {
                Fields tvFields = r.GetTermVectors(0);
                Terms  tvs      = tvFields.GetTerms(field);
                Assert.IsNotNull(tvs);
                Assert.AreEqual(2, tvs.Count);
                TermsEnum tvsEnum = tvs.GetEnumerator();
                Assert.IsTrue(tvsEnum.MoveNext());
                Assert.AreEqual(new BytesRef("abc"), tvsEnum.Term);
                DocsAndPositionsEnum dpEnum = tvsEnum.DocsAndPositions(null, null);
                if (field.Equals("tv", StringComparison.Ordinal))
                {
                    Assert.IsNull(dpEnum);
                }
                else
                {
                    Assert.IsNotNull(dpEnum);
                }
                Assert.IsTrue(tvsEnum.MoveNext());
                Assert.AreEqual(new BytesRef("xyz"), tvsEnum.Term);
                Assert.IsFalse(tvsEnum.MoveNext());
            }

            r.Dispose();
            dir.Dispose();
        }

Ejemplo n.º 7

Mostrar archivo

        public virtual void TestPhrasePrefix()
        {
            Directory         indexStore = NewDirectory();
            RandomIndexWriter writer     = new RandomIndexWriter(Random, indexStore);

            Add("blueberry pie", writer);
            Add("blueberry strudel", writer);
            Add("blueberry pizza", writer);
            Add("blueberry chewing gum", writer);
            Add("bluebird pizza", writer);
            Add("bluebird foobar pizza", writer);
            Add("piccadilly circus", writer);

            IndexReader   reader   = writer.GetReader();
            IndexSearcher searcher = NewSearcher(reader);

            // search for "blueberry pi*":
            MultiPhraseQuery query1 = new MultiPhraseQuery();
            // search for "strawberry pi*":
            MultiPhraseQuery query2 = new MultiPhraseQuery();

            query1.Add(new Term("body", "blueberry"));
            query2.Add(new Term("body", "strawberry"));

            LinkedList <Term> termsWithPrefix = new LinkedList <Term>();

            // this TermEnum gives "piccadilly", "pie" and "pizza".
            string    prefix = "pi";
            TermsEnum te     = MultiFields.GetFields(reader).GetTerms("body").GetEnumerator();

            te.SeekCeil(new BytesRef(prefix));
            do
            {
                string s = te.Term.Utf8ToString();
                if (s.StartsWith(prefix, StringComparison.Ordinal))
                {
                    termsWithPrefix.AddLast(new Term("body", s));
                }
                else
                {
                    break;
                }
            } while (te.MoveNext());

            query1.Add(termsWithPrefix.ToArray(/*new Term[0]*/));
            Assert.AreEqual("body:\"blueberry (piccadilly pie pizza)\"", query1.ToString());
            query2.Add(termsWithPrefix.ToArray(/*new Term[0]*/));
            Assert.AreEqual("body:\"strawberry (piccadilly pie pizza)\"", query2.ToString());

            ScoreDoc[] result;
            result = searcher.Search(query1, null, 1000).ScoreDocs;
            Assert.AreEqual(2, result.Length);
            result = searcher.Search(query2, null, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);

            // search for "blue* pizza":
            MultiPhraseQuery query3 = new MultiPhraseQuery();

            termsWithPrefix.Clear();
            prefix = "blue";
            te.SeekCeil(new BytesRef(prefix));

            do
            {
                if (te.Term.Utf8ToString().StartsWith(prefix, StringComparison.Ordinal))
                {
                    termsWithPrefix.AddLast(new Term("body", te.Term.Utf8ToString()));
                }
            } while (te.MoveNext());

            query3.Add(termsWithPrefix.ToArray(/*new Term[0]*/));
            query3.Add(new Term("body", "pizza"));

            result = searcher.Search(query3, null, 1000).ScoreDocs;
            Assert.AreEqual(2, result.Length); // blueberry pizza, bluebird pizza
            Assert.AreEqual("body:\"(blueberry bluebird) pizza\"", query3.ToString());

            // test slop:
            query3.Slop = 1;
            result      = searcher.Search(query3, null, 1000).ScoreDocs;

            // just make sure no exc:
            searcher.Explain(query3, 0);

            Assert.AreEqual(3, result.Length); // blueberry pizza, bluebird pizza, bluebird
            // foobar pizza

            MultiPhraseQuery query4 = new MultiPhraseQuery();

            try
            {
                query4.Add(new Term("field1", "foo"));
                query4.Add(new Term("field2", "foobar"));
                Assert.Fail();
            }
            catch (Exception e) when(e.IsIllegalArgumentException())
            {
                // okay, all terms must belong to the same field
            }

            writer.Dispose();
            reader.Dispose();
            indexStore.Dispose();
        }

Ejemplo n.º 8

Mostrar archivo

        public void TestRandomIndex()
        {
            Directory    dir      = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir, analyzer);

            CreateRandomIndex(AtLeast(50), w, Random.NextInt64());
            DirectoryReader reader        = w.GetReader();
            AtomicReader    wrapper       = SlowCompositeReaderWrapper.Wrap(reader);
            string          field         = @"body";
            Terms           terms         = wrapper.GetTerms(field);
            var             lowFreqQueue  = new PriorityQueueAnonymousClass(5);
            var             highFreqQueue = new PriorityQueueAnonymousClass1(5);

            try
            {
                TermsEnum iterator = terms.GetEnumerator();
                while (iterator.MoveNext())
                {
                    if (highFreqQueue.Count < 5)
                    {
                        highFreqQueue.Add(new TermAndFreq(
                                              BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq));
                        lowFreqQueue.Add(new TermAndFreq(
                                             BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq));
                    }
                    else
                    {
                        if (highFreqQueue.Top.freq < iterator.DocFreq)
                        {
                            highFreqQueue.Top.freq = iterator.DocFreq;
                            highFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term);
                            highFreqQueue.UpdateTop();
                        }

                        if (lowFreqQueue.Top.freq > iterator.DocFreq)
                        {
                            lowFreqQueue.Top.freq = iterator.DocFreq;
                            lowFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term);
                            lowFreqQueue.UpdateTop();
                        }
                    }
                }

                int lowFreq  = lowFreqQueue.Top.freq;
                int highFreq = highFreqQueue.Top.freq;
                AssumeTrue(@"unlucky index", highFreq - 1 > lowFreq);
                List <TermAndFreq> highTerms = QueueToList(highFreqQueue);
                List <TermAndFreq> lowTerms  = QueueToList(lowFreqQueue);

                IndexSearcher    searcher     = NewSearcher(reader);
                Occur            lowFreqOccur = RandomOccur(Random);
                BooleanQuery     verifyQuery  = new BooleanQuery();
                CommonTermsQuery cq           = new CommonTermsQuery(RandomOccur(Random),
                                                                     lowFreqOccur, highFreq - 1, Random.NextBoolean());
                foreach (TermAndFreq termAndFreq in lowTerms)
                {
                    cq.Add(new Term(field, termAndFreq.term));
                    verifyQuery.Add(new BooleanClause(new TermQuery(new Term(field,
                                                                             termAndFreq.term)), lowFreqOccur));
                }
                foreach (TermAndFreq termAndFreq in highTerms)
                {
                    cq.Add(new Term(field, termAndFreq.term));
                }

                TopDocs cqSearch = searcher.Search(cq, reader.MaxDoc);

                TopDocs verifySearch = searcher.Search(verifyQuery, reader.MaxDoc);
                assertEquals(verifySearch.TotalHits, cqSearch.TotalHits);
                var hits = new JCG.HashSet <int>();
                foreach (ScoreDoc doc in verifySearch.ScoreDocs)
                {
                    hits.Add(doc.Doc);
                }

                foreach (ScoreDoc doc in cqSearch.ScoreDocs)
                {
                    assertTrue(hits.Remove(doc.Doc));
                }

                assertTrue(hits.Count == 0);

                /*
                 *  need to force merge here since QueryUtils adds checks based
                 *  on leave readers which have different statistics than the top
                 *  level reader if we have more than one segment. This could
                 *  result in a different query / results.
                 */
                w.ForceMerge(1);
                DirectoryReader reader2 = w.GetReader();
                QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                    this,
#endif
                    Random, cq, NewSearcher(reader2));
                reader2.Dispose();
            }
            finally
            {
                reader.Dispose();
                wrapper.Dispose();
                w.Dispose();
                dir.Dispose();
            }
        }

Ejemplo n.º 9

Mostrar archivo

Archivo: FieldTermStack.cs Proyecto: YAFNET/YAFNET

        //public static void main( string[] args ) throws Exception {
        //  Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_CURRENT);
        //  QueryParser parser = new QueryParser(Version.LUCENE_CURRENT,  "f", analyzer );
        //  Query query = parser.parse( "a x:b" );
        //  FieldQuery fieldQuery = new FieldQuery( query, true, false );

        //  Directory dir = new RAMDirectory();
        //  IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer));
        //  Document doc = new Document();
        //  IndexableFieldType ft = new IndexableFieldType(TextField.TYPE_STORED);
        //  ft.setStoreTermVectors(true);
        //  ft.setStoreTermVectorOffsets(true);
        //  ft.setStoreTermVectorPositions(true);
        //  doc.add( new Field( "f", ft, "a a a b b c a b b c d e f" ) );
        //  doc.add( new Field( "f", ft, "b a b a f" ) );
        //  writer.addDocument( doc );
        //  writer.close();

        //  IndexReader reader = IndexReader.open(dir1);
        //  new FieldTermStack( reader, 0, "f", fieldQuery );
        //  reader.close();
        //}

        /// <summary>
        /// a constructor.
        /// </summary>
        /// <param name="reader"><see cref="IndexReader"/> of the index</param>
        /// <param name="docId">document id to be highlighted</param>
        /// <param name="fieldName">field of the document to be highlighted</param>
        /// <param name="fieldQuery"><see cref="FieldQuery"/> object</param>
        /// <exception cref="IOException">If there is a low-level I/O error</exception>
        public FieldTermStack(IndexReader reader, int docId, string fieldName, FieldQuery fieldQuery)
        {
            this.fieldName = fieldName;

            ISet <string> termSet = fieldQuery.GetTermSet(fieldName);

            // just return to make null snippet if un-matched fieldName specified when fieldMatch == true
            if (termSet is null)
            {
                return;
            }

            Fields vectors = reader.GetTermVectors(docId);

            if (vectors is null)
            {
                // null snippet
                return;
            }

            Terms vector = vectors.GetTerms(fieldName);

            if (vector is null)
            {
                // null snippet
                return;
            }

            CharsRef             spare     = new CharsRef();
            TermsEnum            termsEnum = vector.GetEnumerator();
            DocsAndPositionsEnum dpEnum    = null;
            BytesRef             text;

            int numDocs = reader.MaxDoc;

            while (termsEnum.MoveNext())
            {
                text = termsEnum.Term;
                UnicodeUtil.UTF8toUTF16(text, spare);
                string term = spare.ToString();
                if (!termSet.Contains(term))
                {
                    continue;
                }
                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                if (dpEnum is null)
                {
                    // null snippet
                    return;
                }

                dpEnum.NextDoc();

                // For weight look here: http://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/DefaultSimilarity.html
                float weight = (float)(Math.Log(numDocs / (double)(reader.DocFreq(new Term(fieldName, text)) + 1)) + 1.0);

                int freq = dpEnum.Freq;

                for (int i = 0; i < freq; i++)
                {
                    int pos = dpEnum.NextPosition();
                    if (dpEnum.StartOffset < 0)
                    {
                        return; // no offsets, null snippet
                    }
                    termList.Add(new TermInfo(term, dpEnum.StartOffset, dpEnum.EndOffset, pos, weight));
                }
            }

            // sort by position
            CollectionUtil.TimSort(termList);

            // now look for dups at the same position, linking them together
            int      currentPos = -1;
            TermInfo previous   = null;
            TermInfo first      = null;

            for (int i = 0; i < termList.Count;)
            {
                TermInfo current = termList[i];
                if (current.Position == currentPos)
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(previous != null);
                    }
                    previous.SetNext(current);
                    previous = current;
                    //iterator.Remove();

                    // LUCENENET NOTE: Remove, but don't advance the i position (since removing will advance to the next item)
                    termList.RemoveAt(i);
                }
                else
                {
                    if (previous != null)
                    {
                        previous.SetNext(first);
                    }
                    previous   = first = current;
                    currentPos = current.Position;

                    // LUCENENET NOTE: Only increment the position if we don't do a delete.
                    i++;
                }
            }

            if (previous != null)
            {
                previous.SetNext(first);
            }
        }

Ejemplo n.º 10

Mostrar archivo

        /// <summary>
        /// Low level api. Returns a token stream generated from a <see cref="Terms"/>. This
        /// can be used to feed the highlighter with a pre-parsed token
        /// stream.  The <see cref="Terms"/> must have offsets available.
        /// <para/>
        /// In my tests the speeds to recreate 1000 token streams using this method are:
        /// <list type="bullet">
        ///     <item><description>
        ///     with TermVector offset only data stored - 420  milliseconds
        ///     </description></item>
        ///     <item><description>
        ///     with TermVector offset AND position data stored - 271 milliseconds
        ///     (nb timings for TermVector with position data are based on a tokenizer with contiguous
        ///     positions - no overlaps or gaps)
        ///     </description></item>
        ///     <item><description>
        ///     The cost of not using TermPositionVector to store
        ///     pre-parsed content and using an analyzer to re-parse the original content:
        ///     - reanalyzing the original content - 980 milliseconds
        ///     </description></item>
        /// </list>
        ///
        /// The re-analyze timings will typically vary depending on -
        /// <list type="number">
        ///     <item><description>
        ///     The complexity of the analyzer code (timings above were using a
        ///     stemmer/lowercaser/stopword combo)
        ///     </description></item>
        ///     <item><description>
        ///     The  number of other fields (Lucene reads ALL fields off the disk
        ///     when accessing just one document field - can cost dear!)
        ///     </description></item>
        ///     <item><description>
        ///     Use of compression on field storage - could be faster due to compression (less disk IO)
        ///     or slower (more CPU burn) depending on the content.
        ///     </description></item>
        /// </list>
        /// </summary>
        /// <param name="tpv"></param>
        /// <param name="tokenPositionsGuaranteedContiguous">true if the token position numbers have no overlaps or gaps. If looking
        /// to eek out the last drops of performance, set to true. If in doubt, set to false.</param>
        /// <exception cref="ArgumentException">if no offsets are available</exception>
        public static TokenStream GetTokenStream(Terms tpv,
                                                 bool tokenPositionsGuaranteedContiguous)
        {
            if (!tpv.HasOffsets)
            {
                throw new ArgumentException("Cannot create TokenStream from Terms without offsets");
            }

            if (!tokenPositionsGuaranteedContiguous && tpv.HasPositions)
            {
                return(new TokenStreamFromTermPositionVector(tpv));
            }

            bool hasPayloads = tpv.HasPayloads;

            // code to reconstruct the original sequence of Tokens
            TermsEnum termsEnum   = tpv.GetEnumerator();
            int       totalTokens = 0;

            while (termsEnum.MoveNext())
            {
                totalTokens += (int)termsEnum.TotalTermFreq;
            }
            Token[]      tokensInOriginalOrder = new Token[totalTokens];
            List <Token> unsortedTokens        = null;

            termsEnum = tpv.GetEnumerator();
            DocsAndPositionsEnum dpEnum = null;

            while (termsEnum.MoveNext())
            {
                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                if (dpEnum == null)
                {
                    throw new ArgumentException("Required TermVector Offset information was not found");
                }
                string term = termsEnum.Term.Utf8ToString();

                dpEnum.NextDoc();
                int freq = dpEnum.Freq;
                for (int posUpto = 0; posUpto < freq; posUpto++)
                {
                    int pos = dpEnum.NextPosition();
                    if (dpEnum.StartOffset < 0)
                    {
                        throw new ArgumentException("Required TermVector Offset information was not found");
                    }
                    Token token = new Token(term, dpEnum.StartOffset, dpEnum.EndOffset);
                    if (hasPayloads)
                    {
                        // Must make a deep copy of the returned payload,
                        // since D&PEnum API is allowed to re-use on every
                        // call:
                        token.Payload = BytesRef.DeepCopyOf(dpEnum.GetPayload());
                    }

                    if (tokenPositionsGuaranteedContiguous && pos != -1)
                    {
                        // We have positions stored and a guarantee that the token position
                        // information is contiguous

                        // This may be fast BUT wont work if Tokenizers used which create >1
                        // token in same position or
                        // creates jumps in position numbers - this code would fail under those
                        // circumstances

                        // tokens stored with positions - can use this to index straight into
                        // sorted array
                        tokensInOriginalOrder[pos] = token;
                    }
                    else
                    {
                        // tokens NOT stored with positions or not guaranteed contiguous - must
                        // add to list and sort later
                        if (unsortedTokens == null)
                        {
                            unsortedTokens = new List <Token>();
                        }
                        unsortedTokens.Add(token);
                    }
                }
            }

            // If the field has been stored without position data we must perform a sort
            if (unsortedTokens != null)
            {
                tokensInOriginalOrder = unsortedTokens.ToArray();
                ArrayUtil.TimSort(tokensInOriginalOrder, new TokenComparer());
                //tokensInOriginalOrder = tokensInOriginalOrder
                //    .OrderBy(t => t, new TokenComparer() )
                //    .ToArray();
            }
            return(new StoredTokenStream(tokensInOriginalOrder));
        }

Ejemplo n.º 11

Mostrar archivo

        /// <summary>
        /// Safe (but, slowish) default method to write every
        /// vector field in the document.
        /// </summary>
        protected void AddAllDocVectors(Fields vectors, MergeState mergeState)
        {
            if (vectors == null)
            {
                StartDocument(0);
                FinishDocument();
                return;
            }

            int numFields = vectors.Count;

            if (numFields == -1)
            {
                // count manually! TODO: Maybe enforce that Fields.size() returns something valid?
                numFields = 0;
                //for (IEnumerator<string> it = vectors.Iterator(); it.hasNext();)
                foreach (string it in vectors)
                {
                    numFields++;
                }
            }
            StartDocument(numFields);

            string lastFieldName = null;

            TermsEnum            termsEnum            = null;
            DocsAndPositionsEnum docsAndPositionsEnum = null;

            int fieldCount = 0;

            foreach (string fieldName in vectors)
            {
                fieldCount++;
                FieldInfo fieldInfo = mergeState.FieldInfos.FieldInfo(fieldName);

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(lastFieldName == null || fieldName.CompareToOrdinal(lastFieldName) > 0, "lastFieldName={0} fieldName={1}", lastFieldName, fieldName);
                }
                lastFieldName = fieldName;

                Terms terms = vectors.GetTerms(fieldName);
                if (terms == null)
                {
                    // FieldsEnum shouldn't lie...
                    continue;
                }

                bool hasPositions = terms.HasPositions;
                bool hasOffsets   = terms.HasOffsets;
                bool hasPayloads  = terms.HasPayloads;
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(!hasPayloads || hasPositions);
                }

                int numTerms = (int)terms.Count;
                if (numTerms == -1)
                {
                    // count manually. It is stupid, but needed, as Terms.size() is not a mandatory statistics function
                    numTerms  = 0;
                    termsEnum = terms.GetEnumerator(termsEnum);
                    while (termsEnum.MoveNext())
                    {
                        numTerms++;
                    }
                }

                StartField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads);
                termsEnum = terms.GetEnumerator(termsEnum);

                int termCount = 0;
                while (termsEnum.MoveNext())
                {
                    termCount++;

                    int freq = (int)termsEnum.TotalTermFreq;

                    StartTerm(termsEnum.Term, freq);

                    if (hasPositions || hasOffsets)
                    {
                        docsAndPositionsEnum = termsEnum.DocsAndPositions(null, docsAndPositionsEnum);
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(docsAndPositionsEnum != null);
                        }

                        int docID = docsAndPositionsEnum.NextDoc();
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(docID != DocIdSetIterator.NO_MORE_DOCS);
                            Debugging.Assert(docsAndPositionsEnum.Freq == freq);
                        }

                        for (int posUpto = 0; posUpto < freq; posUpto++)
                        {
                            int pos         = docsAndPositionsEnum.NextPosition();
                            int startOffset = docsAndPositionsEnum.StartOffset;
                            int endOffset   = docsAndPositionsEnum.EndOffset;

                            BytesRef payload = docsAndPositionsEnum.GetPayload();

                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(!hasPositions || pos >= 0);
                            }
                            AddPosition(pos, startOffset, endOffset, payload);
                        }
                    }
                    FinishTerm();
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(termCount == numTerms);
                }
                FinishField();
            }
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(fieldCount == numFields);
            }
            FinishDocument();
        }

Ejemplo n.º 12

Mostrar archivo

 protected internal override void NextTerm()
 {
     m_mergeTerm = tenum.MoveNext() ? tenum.Term : null;
 }

Ejemplo n.º 13

Mostrar archivo

Archivo: TestBlockPostingsFormat3.cs Proyecto: ywscr/lucenenet

        /// <summary>
        /// checks the terms enum sequentially
        /// if deep is false, it does a 'shallow' test that doesnt go down to the docsenums
        /// </summary>
        public virtual void AssertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, bool deep)
        {
            IBits randomBits = new RandomBits(MAXDOC, Random.NextDouble(), Random);
            DocsAndPositionsEnum leftPositions  = null;
            DocsAndPositionsEnum rightPositions = null;
            DocsEnum             leftDocs       = null;
            DocsEnum             rightDocs      = null;

            while (leftTermsEnum.MoveNext())
            {
                Assert.IsTrue(rightTermsEnum.MoveNext());
                Assert.AreEqual(leftTermsEnum.Term, rightTermsEnum.Term);
                AssertTermStats(leftTermsEnum, rightTermsEnum);
                if (deep)
                {
                    // with payloads + off
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions));
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions));

                    AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions));
                    AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions));
                    // with payloads only
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsFlags.PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsFlags.PAYLOADS));
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsFlags.PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsFlags.PAYLOADS));

                    AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsFlags.PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsFlags.PAYLOADS));
                    AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsFlags.PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsFlags.PAYLOADS));

                    // with offsets only
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsFlags.OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsFlags.OFFSETS));
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsFlags.OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsFlags.OFFSETS));

                    AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsFlags.OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsFlags.OFFSETS));
                    AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsFlags.OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsFlags.OFFSETS));

                    // with positions only
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsFlags.NONE), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsFlags.NONE));
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsFlags.NONE), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsFlags.NONE));

                    AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsFlags.NONE), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsFlags.NONE));
                    AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsFlags.NONE), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsFlags.NONE));

                    // with freqs:
                    AssertDocsEnum(leftDocs = leftTermsEnum.Docs(null, leftDocs), rightDocs = rightTermsEnum.Docs(null, rightDocs));
                    AssertDocsEnum(leftDocs = leftTermsEnum.Docs(randomBits, leftDocs), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs));

                    // w/o freqs:
                    AssertDocsEnum(leftDocs = leftTermsEnum.Docs(null, leftDocs, DocsFlags.NONE), rightDocs = rightTermsEnum.Docs(null, rightDocs, DocsFlags.NONE));
                    AssertDocsEnum(leftDocs = leftTermsEnum.Docs(randomBits, leftDocs, DocsFlags.NONE), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs, DocsFlags.NONE));

                    // with freqs:
                    AssertDocsSkipping(leftTermsEnum.DocFreq, leftDocs = leftTermsEnum.Docs(null, leftDocs), rightDocs = rightTermsEnum.Docs(null, rightDocs));
                    AssertDocsSkipping(leftTermsEnum.DocFreq, leftDocs = leftTermsEnum.Docs(randomBits, leftDocs), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs));

                    // w/o freqs:
                    AssertDocsSkipping(leftTermsEnum.DocFreq, leftDocs = leftTermsEnum.Docs(null, leftDocs, DocsFlags.NONE), rightDocs = rightTermsEnum.Docs(null, rightDocs, DocsFlags.NONE));
                    AssertDocsSkipping(leftTermsEnum.DocFreq, leftDocs = leftTermsEnum.Docs(randomBits, leftDocs, DocsFlags.NONE), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs, DocsFlags.NONE));
                }
            }
            Assert.IsFalse(rightTermsEnum.MoveNext());
        }

Ejemplo n.º 14

Mostrar archivo

        public virtual void TestPhrasePrefix()
        {
            Directory         indexStore = NewDirectory();
            RandomIndexWriter writer     = new RandomIndexWriter(Random, indexStore);
            Document          doc1       = new Document();
            Document          doc2       = new Document();
            Document          doc3       = new Document();
            Document          doc4       = new Document();
            Document          doc5       = new Document();

            doc1.Add(NewTextField("body", "blueberry pie", Field.Store.YES));
            doc2.Add(NewTextField("body", "blueberry strudel", Field.Store.YES));
            doc3.Add(NewTextField("body", "blueberry pizza", Field.Store.YES));
            doc4.Add(NewTextField("body", "blueberry chewing gum", Field.Store.YES));
            doc5.Add(NewTextField("body", "piccadilly circus", Field.Store.YES));
            writer.AddDocument(doc1);
            writer.AddDocument(doc2);
            writer.AddDocument(doc3);
            writer.AddDocument(doc4);
            writer.AddDocument(doc5);
            IndexReader reader = writer.GetReader();

            writer.Dispose();

            IndexSearcher searcher = NewSearcher(reader);

            // PhrasePrefixQuery query1 = new PhrasePrefixQuery();
            MultiPhraseQuery query1 = new MultiPhraseQuery();
            // PhrasePrefixQuery query2 = new PhrasePrefixQuery();
            MultiPhraseQuery query2 = new MultiPhraseQuery();

            query1.Add(new Term("body", "blueberry"));
            query2.Add(new Term("body", "strawberry"));

            LinkedList <Term> termsWithPrefix = new LinkedList <Term>();

            // this TermEnum gives "piccadilly", "pie" and "pizza".
            string    prefix = "pi";
            TermsEnum te     = MultiFields.GetFields(reader).GetTerms("body").GetEnumerator();

            te.SeekCeil(new BytesRef(prefix));
            do
            {
                string s = te.Term.Utf8ToString();
                if (s.StartsWith(prefix, StringComparison.Ordinal))
                {
                    termsWithPrefix.AddLast(new Term("body", s));
                }
                else
                {
                    break;
                }
            } while (te.MoveNext());

            query1.Add(termsWithPrefix.ToArray(/*new Term[0]*/));
            query2.Add(termsWithPrefix.ToArray(/*new Term[0]*/));

            ScoreDoc[] result;
            result = searcher.Search(query1, null, 1000).ScoreDocs;
            Assert.AreEqual(2, result.Length);

            result = searcher.Search(query2, null, 1000).ScoreDocs;
            Assert.AreEqual(0, result.Length);
            reader.Dispose();
            indexStore.Dispose();
        }

Ejemplo n.º 15

Mostrar archivo

        ///<summary>Constructor</summary>
        /// <param name="vector">
        /// Terms that contains the data for
        /// creating the <see cref="TokenStream"/>. Must have positions and offsets.
        /// </param>
        public TokenStreamFromTermPositionVector(Terms vector)
        {
            termAttribute = AddAttribute <ICharTermAttribute>();
            positionIncrementAttribute = AddAttribute <IPositionIncrementAttribute>();
            offsetAttribute            = AddAttribute <IOffsetAttribute>();
            payloadAttribute           = AddAttribute <IPayloadAttribute>();

            bool                 hasOffsets  = vector.HasOffsets;
            bool                 hasPayloads = vector.HasPayloads;
            TermsEnum            termsEnum   = vector.GetEnumerator();
            BytesRef             text;
            DocsAndPositionsEnum dpEnum = null;

            while (termsEnum.MoveNext())
            {
                text   = termsEnum.Term;
                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                dpEnum.NextDoc();
                int freq = dpEnum.Freq;
                for (int j = 0; j < freq; j++)
                {
                    int   pos = dpEnum.NextPosition();
                    Token token;
                    if (hasOffsets)
                    {
                        token = new Token(text.Utf8ToString(),
                                          dpEnum.StartOffset,
                                          dpEnum.EndOffset);
                    }
                    else
                    {
                        token = new Token();
                        token.SetEmpty().Append(text.Utf8ToString());
                    }
                    if (hasPayloads)
                    {
                        // Must make a deep copy of the returned payload,
                        // since D&PEnum API is allowed to re-use on every
                        // call:
                        token.Payload = BytesRef.DeepCopyOf(dpEnum.GetPayload());
                    }

                    // Yes - this is the position, not the increment! This is for
                    // sorting. This value
                    // will be corrected before use.
                    token.PositionIncrement = pos;
                    this.positionedTokens.Add(token);
                }
            }

            CollectionUtil.TimSort(this.positionedTokens, tokenComparer);

            int lastPosition = -1;

            foreach (Token token in this.positionedTokens)
            {
                int thisPosition = token.PositionIncrement;
                token.PositionIncrement = thisPosition - lastPosition;
                lastPosition            = thisPosition;
            }
            this.tokensAtCurrentPosition = this.positionedTokens.GetEnumerator();
        }

Ejemplo n.º 16

Mostrar archivo

        public virtual void Merge(MergeState mergeState, IndexOptions indexOptions, TermsEnum termsEnum)
        {
            BytesRef term;

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(termsEnum != null);
            }
            long        sumTotalTermFreq         = 0;
            long        sumDocFreq               = 0;
            long        sumDFsinceLastAbortCheck = 0;
            FixedBitSet visitedDocs              = new FixedBitSet(mergeState.SegmentInfo.DocCount);

            if (indexOptions == IndexOptions.DOCS_ONLY)
            {
                if (docsEnum == null)
                {
                    docsEnum = new MappingMultiDocsEnum();
                }
                docsEnum.MergeState = mergeState;

                MultiDocsEnum docsEnumIn = null;

                while (termsEnum.MoveNext())
                {
                    term = termsEnum.Term;
                    // We can pass null for liveDocs, because the
                    // mapping enum will skip the non-live docs:
                    docsEnumIn = (MultiDocsEnum)termsEnum.Docs(null, docsEnumIn, DocsFlags.NONE);
                    if (docsEnumIn != null)
                    {
                        docsEnum.Reset(docsEnumIn);
                        PostingsConsumer postingsConsumer = StartTerm(term);
                        TermStats        stats            = postingsConsumer.Merge(mergeState, indexOptions, docsEnum, visitedDocs);
                        if (stats.DocFreq > 0)
                        {
                            FinishTerm(term, stats);
                            sumTotalTermFreq         += stats.DocFreq;
                            sumDFsinceLastAbortCheck += stats.DocFreq;
                            sumDocFreq += stats.DocFreq;
                            if (sumDFsinceLastAbortCheck > 60000)
                            {
                                mergeState.CheckAbort.Work(sumDFsinceLastAbortCheck / 5.0);
                                sumDFsinceLastAbortCheck = 0;
                            }
                        }
                    }
                }
            }
            else if (indexOptions == IndexOptions.DOCS_AND_FREQS)
            {
                if (docsAndFreqsEnum == null)
                {
                    docsAndFreqsEnum = new MappingMultiDocsEnum();
                }
                docsAndFreqsEnum.MergeState = mergeState;

                MultiDocsEnum docsAndFreqsEnumIn = null;

                while (termsEnum.MoveNext())
                {
                    term = termsEnum.Term;
                    // We can pass null for liveDocs, because the
                    // mapping enum will skip the non-live docs:
                    docsAndFreqsEnumIn = (MultiDocsEnum)termsEnum.Docs(null, docsAndFreqsEnumIn);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(docsAndFreqsEnumIn != null);
                    }
                    docsAndFreqsEnum.Reset(docsAndFreqsEnumIn);
                    PostingsConsumer postingsConsumer = StartTerm(term);
                    TermStats        stats            = postingsConsumer.Merge(mergeState, indexOptions, docsAndFreqsEnum, visitedDocs);
                    if (stats.DocFreq > 0)
                    {
                        FinishTerm(term, stats);
                        sumTotalTermFreq         += stats.TotalTermFreq;
                        sumDFsinceLastAbortCheck += stats.DocFreq;
                        sumDocFreq += stats.DocFreq;
                        if (sumDFsinceLastAbortCheck > 60000)
                        {
                            mergeState.CheckAbort.Work(sumDFsinceLastAbortCheck / 5.0);
                            sumDFsinceLastAbortCheck = 0;
                        }
                    }
                }
            }
            else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
            {
                if (postingsEnum == null)
                {
                    postingsEnum = new MappingMultiDocsAndPositionsEnum();
                }
                postingsEnum.MergeState = mergeState;
                MultiDocsAndPositionsEnum postingsEnumIn = null;
                while (termsEnum.MoveNext())
                {
                    term = termsEnum.Term;
                    // We can pass null for liveDocs, because the
                    // mapping enum will skip the non-live docs:
                    postingsEnumIn = (MultiDocsAndPositionsEnum)termsEnum.DocsAndPositions(null, postingsEnumIn, DocsAndPositionsFlags.PAYLOADS);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(postingsEnumIn != null);
                    }
                    postingsEnum.Reset(postingsEnumIn);

                    PostingsConsumer postingsConsumer = StartTerm(term);
                    TermStats        stats            = postingsConsumer.Merge(mergeState, indexOptions, postingsEnum, visitedDocs);
                    if (stats.DocFreq > 0)
                    {
                        FinishTerm(term, stats);
                        sumTotalTermFreq         += stats.TotalTermFreq;
                        sumDFsinceLastAbortCheck += stats.DocFreq;
                        sumDocFreq += stats.DocFreq;
                        if (sumDFsinceLastAbortCheck > 60000)
                        {
                            mergeState.CheckAbort.Work(sumDFsinceLastAbortCheck / 5.0);
                            sumDFsinceLastAbortCheck = 0;
                        }
                    }
                }
            }
            else
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
                }
                if (postingsEnum == null)
                {
                    postingsEnum = new MappingMultiDocsAndPositionsEnum();
                }
                postingsEnum.MergeState = mergeState;
                MultiDocsAndPositionsEnum postingsEnumIn = null;
                while (termsEnum.MoveNext())
                {
                    term = termsEnum.Term;
                    // We can pass null for liveDocs, because the
                    // mapping enum will skip the non-live docs:
                    postingsEnumIn = (MultiDocsAndPositionsEnum)termsEnum.DocsAndPositions(null, postingsEnumIn);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(postingsEnumIn != null);
                    }
                    postingsEnum.Reset(postingsEnumIn);

                    PostingsConsumer postingsConsumer = StartTerm(term);
                    TermStats        stats            = postingsConsumer.Merge(mergeState, indexOptions, postingsEnum, visitedDocs);
                    if (stats.DocFreq > 0)
                    {
                        FinishTerm(term, stats);
                        sumTotalTermFreq         += stats.TotalTermFreq;
                        sumDFsinceLastAbortCheck += stats.DocFreq;
                        sumDocFreq += stats.DocFreq;
                        if (sumDFsinceLastAbortCheck > 60000)
                        {
                            mergeState.CheckAbort.Work(sumDFsinceLastAbortCheck / 5.0);
                            sumDFsinceLastAbortCheck = 0;
                        }
                    }
                }
            }
            Finish(indexOptions == IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq, sumDocFreq, visitedDocs.Cardinality());
        }

Ejemplo n.º 17

Mostrar archivo

        public virtual void TestSimple()
        {
            int numNodes = TestUtil.NextInt32(Random, 1, 10);

            double runTimeSec = AtLeast(3);

            int minDocsToMakeTerms = TestUtil.NextInt32(Random, 5, 20);

            int maxSearcherAgeSeconds = TestUtil.NextInt32(Random, 1, 3);

            if (Verbose)
            {
                Console.WriteLine("TEST: numNodes=" + numNodes + " runTimeSec=" + runTimeSec + " maxSearcherAgeSeconds=" + maxSearcherAgeSeconds);
            }

            Start(numNodes, runTimeSec, maxSearcherAgeSeconds);

            JCG.List <PreviousSearchState> priorSearches = new JCG.List <PreviousSearchState>();
            IList <BytesRef> terms = null;

            while (J2N.Time.NanoTime() < endTimeNanos)
            {
                bool doFollowon = priorSearches.Count > 0 && Random.Next(7) == 1;

                // Pick a random node; we will run the query on this node:
                int myNodeID = Random.Next(numNodes);

                NodeState.ShardIndexSearcher localShardSearcher;

                PreviousSearchState prevSearchState;

                if (doFollowon)
                {
                    // Pretend user issued a followon query:
                    prevSearchState = priorSearches[Random.Next(priorSearches.Count)];

                    if (Verbose)
                    {
                        Console.WriteLine("\nTEST: follow-on query age=" + ((J2N.Time.NanoTime() - prevSearchState.SearchTimeNanos) / 1000000000.0));
                    }

                    try
                    {
                        localShardSearcher = m_nodes[myNodeID].Acquire(prevSearchState.Versions);
                    }
                    catch (SearcherExpiredException see)
                    {
                        // Expected, sometimes; in a "real" app we would
                        // either forward this error to the user ("too
                        // much time has passed; please re-run your
                        // search") or sneakily just switch to newest
                        // searcher w/o telling them...
                        if (Verbose)
                        {
                            Console.WriteLine("  searcher expired during local shard searcher init: " + see);
                        }
                        priorSearches.Remove(prevSearchState);
                        continue;
                    }
                }
                else
                {
                    if (Verbose)
                    {
                        Console.WriteLine("\nTEST: fresh query");
                    }
                    // Do fresh query:
                    localShardSearcher = m_nodes[myNodeID].Acquire();
                    prevSearchState    = null;
                }

                IndexReader[] subs = new IndexReader[numNodes];

                PreviousSearchState searchState = null;

                try
                {
                    // Mock: now make a single reader (MultiReader) from all node
                    // searchers.  In a real shard env you can't do this... we
                    // do it to confirm results from the shard searcher
                    // are correct:
                    int docCount = 0;
                    try
                    {
                        for (int nodeID = 0; nodeID < numNodes; nodeID++)
                        {
                            long          subVersion = localShardSearcher.GetNodeVersions()[nodeID];
                            IndexSearcher sub        = m_nodes[nodeID].Searchers.Acquire(subVersion);
                            if (sub is null)
                            {
                                nodeID--;
                                while (nodeID >= 0)
                                {
                                    subs[nodeID].DecRef();
                                    subs[nodeID] = null;
                                    nodeID--;
                                }
                                throw new SearcherExpiredException("nodeID=" + nodeID + " version=" + subVersion);
                            }
                            subs[nodeID] = sub.IndexReader;
                            docCount    += subs[nodeID].MaxDoc;
                        }
                    }
                    catch (SearcherExpiredException see)
                    {
                        // Expected
                        if (Verbose)
                        {
                            Console.WriteLine("  searcher expired during mock reader init: " + see);
                        }
                        continue;
                    }

                    IndexReader   mockReader   = new MultiReader(subs);
                    IndexSearcher mockSearcher = new IndexSearcher(mockReader);

                    Query query;
                    Sort  sort;

                    if (prevSearchState != null)
                    {
                        query = prevSearchState.Query;
                        sort  = prevSearchState.Sort;
                    }
                    else
                    {
                        if (terms is null && docCount > minDocsToMakeTerms)
                        {
                            // TODO: try to "focus" on high freq terms sometimes too
                            // TODO: maybe also periodically reset the terms...?
                            TermsEnum termsEnum = MultiFields.GetTerms(mockReader, "body").GetEnumerator();
                            terms = new JCG.List <BytesRef>();
                            while (termsEnum.MoveNext())
                            {
                                terms.Add(BytesRef.DeepCopyOf(termsEnum.Term));
                            }
                            if (Verbose)
                            {
                                Console.WriteLine("TEST: init terms: " + terms.Count + " terms");
                            }
                            if (terms.Count == 0)
                            {
                                terms = null;
                            }
                        }

                        if (Verbose)
                        {
                            Console.WriteLine("  maxDoc=" + mockReader.MaxDoc);
                        }

                        if (terms != null)
                        {
                            if (Random.NextBoolean())
                            {
                                query = new TermQuery(new Term("body", terms[Random.Next(terms.Count)]));
                            }
                            else
                            {
                                string t = terms[Random.Next(terms.Count)].Utf8ToString();
                                string prefix;
                                if (t.Length <= 1)
                                {
                                    prefix = t;
                                }
                                else
                                {
                                    prefix = t.Substring(0, TestUtil.NextInt32(Random, 1, 2));
                                }
                                query = new PrefixQuery(new Term("body", prefix));
                            }

                            if (Random.NextBoolean())
                            {
                                sort = null;
                            }
                            else
                            {
                                // TODO: sort by more than 1 field
                                int what = Random.Next(3);
                                if (what == 0)
                                {
                                    sort = new Sort(SortField.FIELD_SCORE);
                                }
                                else if (what == 1)
                                {
                                    // TODO: this sort doesn't merge
                                    // correctly... it's tricky because you
                                    // could have > 2.1B docs across all shards:
                                    //sort = new Sort(SortField.FIELD_DOC);
                                    sort = null;
                                }
                                else if (what == 2)
                                {
                                    sort = new Sort(new SortField[] { new SortField("docid", SortFieldType.INT32, Random.NextBoolean()) });
                                }
                                else
                                {
                                    sort = new Sort(new SortField[] { new SortField("title", SortFieldType.STRING, Random.NextBoolean()) });
                                }
                            }
                        }
                        else
                        {
                            query = null;
                            sort  = null;
                        }
                    }

                    if (query != null)
                    {
                        try
                        {
                            searchState = AssertSame(mockSearcher, localShardSearcher, query, sort, prevSearchState);
                        }
                        catch (SearcherExpiredException see)
                        {
                            // Expected; in a "real" app we would
                            // either forward this error to the user ("too
                            // much time has passed; please re-run your
                            // search") or sneakily just switch to newest
                            // searcher w/o telling them...
                            if (Verbose)
                            {
                                Console.WriteLine("  searcher expired during search: " + see);
                                Console.Out.Write(see.StackTrace);
                            }
                            // We can't do this in general: on a very slow
                            // computer it's possible the local searcher
                            // expires before we can finish our search:
                            // assert prevSearchState != null;
                            if (prevSearchState != null)
                            {
                                priorSearches.Remove(prevSearchState);
                            }
                        }
                    }
                }
                finally
                {
                    //m_nodes[myNodeID].Release(localShardSearcher);
                    NodeState.Release(localShardSearcher); // LUCENENET: Made Release() static per CA1822 for performance
                    foreach (IndexReader sub in subs)
                    {
                        if (sub != null)
                        {
                            sub.DecRef();
                        }
                    }
                }

                if (searchState != null && searchState.SearchAfterLocal != null && Random.Next(5) == 3)
                {
                    priorSearches.Add(searchState);
                    if (priorSearches.Count > 200)
                    {
                        priorSearches.Shuffle(Random);
                        priorSearches.RemoveRange(100, priorSearches.Count - 100); // LUCENENET: Converted end index to length
                    }
                }
            }

            Finish();
        }

Ejemplo n.º 18

Mostrar archivo

Archivo: Test10KPulsings.cs Proyecto: zalintyre/lucenenet

        public virtual void Test10kPulsed()
        {
            // we always run this test with pulsing codec.
            Codec cp = TestUtil.AlwaysPostingsFormat(new Pulsing41PostingsFormat(1));

            DirectoryInfo        f   = CreateTempDir("10kpulsed");
            BaseDirectoryWrapper dir = NewFSDirectory(f);

            dir.CheckIndexOnDispose = false; // we do this ourselves explicitly
            RandomIndexWriter iw = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetCodec(cp));

            Document  document = new Document();
            FieldType ft       = new FieldType(TextField.TYPE_STORED);

            switch (TestUtil.NextInt32(Random, 0, 2))
            {
            case 0:
                ft.IndexOptions = IndexOptions.DOCS_ONLY;
                break;

            case 1:
                ft.IndexOptions = IndexOptions.DOCS_AND_FREQS;
                break;

            default:
                ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                break;
            }

            Field field = NewField("field", "", ft);

            document.Add(field);

            //NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ROOT));

            for (int i = 0; i < 10050; i++)
            {
                //field.StringValue = df.format(i);
                field.SetStringValue(i.ToString("00000", CultureInfo.InvariantCulture));
                iw.AddDocument(document);
            }

            IndexReader ir = iw.GetReader();

            iw.Dispose();

            TermsEnum te = MultiFields.GetTerms(ir, "field").GetEnumerator();
            DocsEnum  de = null;

            for (int i = 0; i < 10050; i++)
            {
                //string expected = df.format(i);
                string expected = i.ToString("00000", CultureInfo.InvariantCulture);
                te.MoveNext();
                assertEquals(expected, te.Term.Utf8ToString());
                de = TestUtil.Docs(Random, te, null, de, DocsFlags.NONE);
                assertTrue(de.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.NextDoc());
            }
            ir.Dispose();

            TestUtil.CheckIndex(dir);
            dir.Dispose();
        }

Ejemplo n.º 19

Mostrar archivo

        private void DuellReaders(CompositeReader other, AtomicReader memIndexReader)
        {
            AtomicReader competitor = SlowCompositeReaderWrapper.Wrap(other);
            Fields       memFields  = memIndexReader.Fields;

            foreach (string field in competitor.Fields)
            {
                Terms memTerms = memFields.GetTerms(field);
                Terms iwTerms  = memIndexReader.GetTerms(field);
                if (iwTerms == null)
                {
                    assertNull(memTerms);
                }
                else
                {
                    NumericDocValues normValues    = competitor.GetNormValues(field);
                    NumericDocValues memNormValues = memIndexReader.GetNormValues(field);
                    if (normValues != null)
                    {
                        // mem idx always computes norms on the fly
                        assertNotNull(memNormValues);
                        assertEquals(normValues.Get(0), memNormValues.Get(0));
                    }

                    assertNotNull(memTerms);
                    assertEquals(iwTerms.DocCount, memTerms.DocCount);
                    assertEquals(iwTerms.SumDocFreq, memTerms.SumDocFreq);
                    assertEquals(iwTerms.SumTotalTermFreq, memTerms.SumTotalTermFreq);
                    TermsEnum iwTermsIter  = iwTerms.GetEnumerator();
                    TermsEnum memTermsIter = memTerms.GetEnumerator();
                    if (iwTerms.HasPositions)
                    {
                        bool offsets = iwTerms.HasOffsets && memTerms.HasOffsets;

                        while (iwTermsIter.MoveNext())
                        {
                            assertTrue(memTermsIter.MoveNext());
                            assertEquals(iwTermsIter.Term, memTermsIter.Term);
                            DocsAndPositionsEnum iwDocsAndPos  = iwTermsIter.DocsAndPositions(null, null);
                            DocsAndPositionsEnum memDocsAndPos = memTermsIter.DocsAndPositions(null, null);
                            while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS)
                            {
                                assertEquals(iwDocsAndPos.DocID, memDocsAndPos.NextDoc());
                                assertEquals(iwDocsAndPos.Freq, memDocsAndPos.Freq);
                                for (int i = 0; i < iwDocsAndPos.Freq; i++)
                                {
                                    assertEquals("term: " + iwTermsIter.Term.Utf8ToString(), iwDocsAndPos.NextPosition(), memDocsAndPos.NextPosition());
                                    if (offsets)
                                    {
                                        assertEquals(iwDocsAndPos.StartOffset, memDocsAndPos.StartOffset);
                                        assertEquals(iwDocsAndPos.EndOffset, memDocsAndPos.EndOffset);
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        while (iwTermsIter.MoveNext())
                        {
                            assertEquals(iwTermsIter.Term, memTermsIter.Term);
                            DocsEnum iwDocsAndPos  = iwTermsIter.Docs(null, null);
                            DocsEnum memDocsAndPos = memTermsIter.Docs(null, null);
                            while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS)
                            {
                                assertEquals(iwDocsAndPos.DocID, memDocsAndPos.NextDoc());
                                assertEquals(iwDocsAndPos.Freq, memDocsAndPos.Freq);
                            }
                        }
                    }
                }
            }
        }