Terms() public method

this may return null if the field does not exist.
public Terms ( string field ) : Lucene.Net.Index.Terms
field string
return Lucene.Net.Index.Terms
Beispiel #1
0
        public virtual void TestDocsEnumStart()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document          doc    = new Document();

            doc.Add(NewStringField("foo", "bar", Field.Store.NO));
            writer.AddDocument(doc);
            DirectoryReader reader = writer.Reader;
            AtomicReader    r      = GetOnlySegmentReader(reader);
            DocsEnum        disi   = TestUtil.Docs(Random(), r, "foo", new BytesRef("bar"), null, null, DocsEnum.FLAG_NONE);
            int             docid  = disi.DocID();

            Assert.AreEqual(-1, docid);
            Assert.IsTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);

            // now reuse and check again
            TermsEnum te = r.Terms("foo").Iterator(null);

            Assert.IsTrue(te.SeekExact(new BytesRef("bar")));
            disi  = TestUtil.Docs(Random(), te, null, disi, DocsEnum.FLAG_NONE);
            docid = disi.DocID();
            Assert.AreEqual(-1, docid);
            Assert.IsTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            writer.Dispose();
            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestDocsAndPositionsEnumStart()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
            Document          doc    = new Document();

            doc.Add(NewTextField("foo", "bar", Field.Store.NO));
            writer.AddDocument(doc);
            DirectoryReader      reader = writer.Reader;
            AtomicReader         r      = GetOnlySegmentReader(reader);
            DocsAndPositionsEnum disi   = r.TermPositionsEnum(new Term("foo", "bar"));
            int docid = disi.DocID();

            Assert.AreEqual(-1, docid);
            Assert.IsTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);

            // now reuse and check again
            TermsEnum te = r.Terms("foo").Iterator(null);

            Assert.IsTrue(te.SeekExact(new BytesRef("bar")));
            disi  = te.DocsAndPositions(null, disi);
            docid = disi.DocID();
            Assert.AreEqual(-1, docid);
            Assert.IsTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            writer.Dispose();
            r.Dispose();
            dir.Dispose();
        }
        public override void Warm(AtomicReader reader)
        {
            long startTime      = Environment.TickCount;
            int  indexedCount   = 0;
            int  docValuesCount = 0;
            int  normsCount     = 0;

            foreach (FieldInfo info in reader.FieldInfos)
            {
                if (info.Indexed)
                {
                    reader.Terms(info.Name);
                    indexedCount++;

                    if (info.HasNorms())
                    {
                        reader.GetNormValues(info.Name);
                        normsCount++;
                    }
                }

                if (info.HasDocValues())
                {
                    switch (info.DocValuesType)
                    {
                    case DocValuesType_e.NUMERIC:
                        reader.GetNumericDocValues(info.Name);
                        break;

                    case DocValuesType_e.BINARY:
                        reader.GetBinaryDocValues(info.Name);
                        break;

                    case DocValuesType_e.SORTED:
                        reader.GetSortedDocValues(info.Name);
                        break;

                    case DocValuesType_e.SORTED_SET:
                        reader.GetSortedSetDocValues(info.Name);
                        break;

                    default:
                        Debug.Assert(false);     // unknown dv type
                        break;
                    }
                    docValuesCount++;
                }
            }

            reader.Document(0);
            reader.GetTermVectors(0);

            if (InfoStream.IsEnabled("SMSW"))
            {
                InfoStream.Message("SMSW", "Finished warming segment: " + reader + ", indexed=" + indexedCount + ", docValues=" + docValuesCount + ", norms=" + normsCount + ", time=" + (Environment.TickCount - startTime));
            }
        }
        public override void Warm(AtomicReader reader)
        {
            long startTime = DateTime.Now.Millisecond;
            int indexedCount = 0;
            int docValuesCount = 0;
            int normsCount = 0;
            foreach (FieldInfo info in reader.FieldInfos)
            {
                if (info.Indexed)
                {
                    reader.Terms(info.Name);
                    indexedCount++;

                    if (info.HasNorms())
                    {
                        reader.GetNormValues(info.Name);
                        normsCount++;
                    }
                }

                if (info.HasDocValues())
                {
                    switch (info.DocValuesType)
                    {
                        case DocValuesType_e.NUMERIC:
                            reader.GetNumericDocValues(info.Name);
                            break;

                        case DocValuesType_e.BINARY:
                            reader.GetBinaryDocValues(info.Name);
                            break;

                        case DocValuesType_e.SORTED:
                            reader.GetSortedDocValues(info.Name);
                            break;

                        case DocValuesType_e.SORTED_SET:
                            reader.GetSortedSetDocValues(info.Name);
                            break;

                        default:
                            Debug.Assert(false); // unknown dv type
                            break;
                    }
                    docValuesCount++;
                }
            }

            reader.Document(0);
            reader.GetTermVectors(0);

            if (InfoStream.IsEnabled("SMSW"))
            {
                InfoStream.Message("SMSW", "Finished warming segment: " + reader + ", indexed=" + indexedCount + ", docValues=" + docValuesCount + ", norms=" + normsCount + ", time=" + (DateTime.Now.Millisecond - startTime));
            }
        }
 public virtual DocsAndPositionsEnum GetDocsAndPositions(AtomicReader reader, BytesRef bytes, Bits liveDocs)
 {
     Terms terms = reader.Terms(FieldName);
     if (terms != null)
     {
         TermsEnum te = terms.Iterator(null);
         if (te.SeekExact(bytes))
         {
             return te.DocsAndPositions(liveDocs, null);
         }
     }
     return null;
 }
        public virtual DocsAndPositionsEnum GetDocsAndPositions(AtomicReader reader, BytesRef bytes, Bits liveDocs)
        {
            Terms terms = reader.Terms(FieldName);

            if (terms != null)
            {
                TermsEnum te = terms.Iterator(null);
                if (te.SeekExact(bytes))
                {
                    return(te.DocsAndPositions(liveDocs, null));
                }
            }
            return(null);
        }
Beispiel #7
0
        public virtual void Test()
        {
            IList <string> postingsList   = new List <string>();
            int            numTerms       = AtLeast(300);
            int            maxTermsPerDoc = TestUtil.NextInt(Random(), 10, 20);
            bool           isSimpleText   = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"));

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TEST_NIGHTLY || RANDOM_MULTIPLIER > 1))
            {
                // Otherwise test can take way too long (> 2 hours)
                numTerms /= 2;
            }
            if (VERBOSE)
            {
                Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc);
                Console.WriteLine("numTerms=" + numTerms);
            }
            for (int i = 0; i < numTerms; i++)
            {
                string term = Convert.ToString(i);
                for (int j = 0; j < i; j++)
                {
                    postingsList.Add(term);
                }
            }

            postingsList = CollectionsHelper.Shuffle(postingsList);

            ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList);

            Directory dir = NewFSDirectory(CreateTempDir(GetFullMethodName()));

            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            int threadCount = TestUtil.NextInt(Random(), 1, 5);

            if (VERBOSE)
            {
                Console.WriteLine("config: " + iw.w.Config);
                Console.WriteLine("threadCount=" + threadCount);
            }

            Field     prototype = NewTextField("field", "", Field.Store.NO);
            FieldType fieldType = new FieldType((FieldType)prototype.FieldType);

            if (Random().NextBoolean())
            {
                fieldType.OmitNorms = true;
            }
            int options = Random().Next(3);

            if (options == 0)
            {
                fieldType.IndexOptions     = FieldInfo.IndexOptions.DOCS_AND_FREQS; // we dont actually need positions
                fieldType.StoreTermVectors = true;                                  // but enforce term vectors when we do this so we check SOMETHING
            }
            else if (options == 1 && !DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat("field")))
            {
                fieldType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            }
            // else just positions

            ThreadClass[]  threads     = new ThreadClass[threadCount];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int threadID = 0; threadID < threadCount; threadID++)
            {
                Random   threadRandom = new Random(Random().Next());
                Document document     = new Document();
                Field    field        = new Field("field", "", fieldType);
                document.Add(field);
                threads[threadID] = new ThreadAnonymousInnerClassHelper(this, numTerms, maxTermsPerDoc, postings, iw, startingGun, threadRandom, document, field);
                threads[threadID].Start();
            }
            startingGun.Signal();
            foreach (ThreadClass t in threads)
            {
                t.Join();
            }

            iw.ForceMerge(1);
            DirectoryReader ir = iw.Reader;

            Assert.AreEqual(1, ir.Leaves.Count);
            AtomicReader air   = (AtomicReader)ir.Leaves[0].Reader;
            Terms        terms = air.Terms("field");

            // numTerms-1 because there cannot be a term 0 with 0 postings:
            Assert.AreEqual(numTerms - 1, terms.Size());
            TermsEnum termsEnum = terms.Iterator(null);
            BytesRef  termBR;

            while ((termBR = termsEnum.Next()) != null)
            {
                int value = Convert.ToInt32(termBR.Utf8ToString());
                Assert.AreEqual(value, termsEnum.TotalTermFreq());
                // don't really need to check more than this, as CheckIndex
                // will verify that totalTermFreq == total number of positions seen
                // from a docsAndPositionsEnum.
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
        public virtual void Test()
        {
            IList <string> postingsList   = new List <string>();
            int            numTerms       = AtLeast(300);
            int            maxTermsPerDoc = TestUtil.NextInt(Random(), 10, 20);

            bool isSimpleText = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"));

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TEST_NIGHTLY || RANDOM_MULTIPLIER > 1))
            {
                // Otherwise test can take way too long (> 2 hours)
                numTerms /= 2;
            }

            if (VERBOSE)
            {
                Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc);
                Console.WriteLine("numTerms=" + numTerms);
            }

            for (int i = 0; i < numTerms; i++)
            {
                string term = Convert.ToString(i);
                for (int j = 0; j < i; j++)
                {
                    postingsList.Add(term);
                }
            }
            postingsList = CollectionsHelper.Shuffle(postingsList);

            ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList);

            Directory         dir = NewFSDirectory(CreateTempDir("bagofpostings"));
            RandomIndexWriter iw  = new RandomIndexWriter(Random(), dir, iwc);

            int threadCount = TestUtil.NextInt(Random(), 1, 5);

            if (VERBOSE)
            {
                Console.WriteLine("config: " + iw.w.Config);
                Console.WriteLine("threadCount=" + threadCount);
            }

            ThreadClass[]  threads     = new ThreadClass[threadCount];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int threadID = 0; threadID < threadCount; threadID++)
            {
                threads[threadID] = new ThreadAnonymousInnerClassHelper(this, maxTermsPerDoc, postings, iw, startingGun);
                threads[threadID].Start();
            }
            startingGun.Signal();
            foreach (ThreadClass t in threads)
            {
                t.Join();
            }

            iw.ForceMerge(1);
            DirectoryReader ir = iw.Reader;

            Assert.AreEqual(1, ir.Leaves.Count);
            AtomicReader air   = (AtomicReader)ir.Leaves[0].Reader;
            Terms        terms = air.Terms("field");

            // numTerms-1 because there cannot be a term 0 with 0 postings:
            Assert.AreEqual(numTerms - 1, air.Fields.UniqueTermCount);
            if (iwc.Codec is Lucene3xCodec == false)
            {
                Assert.AreEqual(numTerms - 1, terms.Size());
            }
            TermsEnum termsEnum = terms.Iterator(null);
            BytesRef  term_;

            while ((term_ = termsEnum.Next()) != null)
            {
                int value = Convert.ToInt32(term_.Utf8ToString());
                Assert.AreEqual(value, termsEnum.DocFreq());
                // don't really need to check more than this, as CheckIndex
                // will verify that docFreq == actual number of documents seen
                // from a docsAndPositionsEnum.
            }
            ir.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
        public virtual void TestIgnoreStoredFields()
        {
            Directory       dir1 = GetDir1(Random());
            Directory       dir2 = GetDir2(Random());
            CompositeReader ir1  = DirectoryReader.Open(dir1);
            CompositeReader ir2  = DirectoryReader.Open(dir2);

            // with overlapping
            ParallelCompositeReader pr = new ParallelCompositeReader(false, new CompositeReader[] { ir1, ir2 }, new CompositeReader[] { ir1 });

            Assert.AreEqual("v1", pr.Document(0).Get("f1"));
            Assert.AreEqual("v1", pr.Document(0).Get("f2"));
            Assert.IsNull(pr.Document(0).Get("f3"));
            Assert.IsNull(pr.Document(0).Get("f4"));
            // check that fields are there
            AtomicReader slow = SlowCompositeReaderWrapper.Wrap(pr);

            Assert.IsNotNull(slow.Terms("f1"));
            Assert.IsNotNull(slow.Terms("f2"));
            Assert.IsNotNull(slow.Terms("f3"));
            Assert.IsNotNull(slow.Terms("f4"));
            pr.Dispose();

            // no stored fields at all
            pr = new ParallelCompositeReader(false, new CompositeReader[] { ir2 }, new CompositeReader[0]);
            Assert.IsNull(pr.Document(0).Get("f1"));
            Assert.IsNull(pr.Document(0).Get("f2"));
            Assert.IsNull(pr.Document(0).Get("f3"));
            Assert.IsNull(pr.Document(0).Get("f4"));
            // check that fields are there
            slow = SlowCompositeReaderWrapper.Wrap(pr);
            Assert.IsNull(slow.Terms("f1"));
            Assert.IsNull(slow.Terms("f2"));
            Assert.IsNotNull(slow.Terms("f3"));
            Assert.IsNotNull(slow.Terms("f4"));
            pr.Dispose();

            // without overlapping
            pr = new ParallelCompositeReader(true, new CompositeReader[] { ir2 }, new CompositeReader[] { ir1 });
            Assert.AreEqual("v1", pr.Document(0).Get("f1"));
            Assert.AreEqual("v1", pr.Document(0).Get("f2"));
            Assert.IsNull(pr.Document(0).Get("f3"));
            Assert.IsNull(pr.Document(0).Get("f4"));
            // check that fields are there
            slow = SlowCompositeReaderWrapper.Wrap(pr);
            Assert.IsNull(slow.Terms("f1"));
            Assert.IsNull(slow.Terms("f2"));
            Assert.IsNotNull(slow.Terms("f3"));
            Assert.IsNotNull(slow.Terms("f4"));
            pr.Dispose();

            // no main readers
            try
            {
                new ParallelCompositeReader(true, new CompositeReader[0], new CompositeReader[] { ir1 });
                Assert.Fail("didn't get expected exception: need a non-empty main-reader array");
            }
            catch (System.ArgumentException iae)
            {
                // pass
            }

            dir1.Dispose();
            dir2.Dispose();
        }