public RegexTermEnum(IndexReader reader, Term term, IRegexCapabilities regexImpl)
		{
			_sField = term.Field();
			string sText = term.Text();
			
			_regexImpl = regexImpl;

			_regexImpl.Compile(sText);

			_sPre = _regexImpl.Prefix() ?? "";

			SetEnum(reader.Terms(new Term(term.Field(), _sPre)));
		}
Esempio n. 2
0
        public virtual void  TestTerms()
        {
            TermEnum terms = reader.Terms();

            Assert.IsTrue(terms != null);
            while (terms.Next() == true)
            {
                Term term = terms.Term();
                Assert.IsTrue(term != null);
                //System.out.println("Term: " + term);
                System.String fieldValue = (System.String)DocHelper.nameValues[term.Field()];
                Assert.IsTrue(fieldValue.IndexOf(term.Text()) != -1);
            }

            TermDocs termDocs = reader.TermDocs();

            Assert.IsTrue(termDocs != null);
            termDocs.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "field"));
            Assert.IsTrue(termDocs.Next() == true);

            termDocs.Seek(new Term(DocHelper.NO_NORMS_KEY, DocHelper.NO_NORMS_TEXT));
            Assert.IsTrue(termDocs.Next() == true);


            TermPositions positions = reader.TermPositions();

            positions.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "field"));
            Assert.IsTrue(positions != null);
            Assert.IsTrue(positions.Doc() == 0);
            Assert.IsTrue(positions.NextPosition() >= 0);
        }
		public virtual void  TestSimpleSkip()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new PayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			Term term = new Term("test", "a");
			for (int i = 0; i < 5000; i++)
			{
				Document d1 = new Document();
				d1.Add(new Field(term.Field(), term.Text(), Field.Store.NO, Field.Index.ANALYZED));
				writer.AddDocument(d1);
			}
			writer.Flush();
			writer.Optimize();
			writer.Close();
			
			IndexReader reader = SegmentReader.GetOnlySegmentReader(dir);
			SegmentTermPositions tp = (SegmentTermPositions) reader.TermPositions();
			tp.freqStream_ForNUnit = new CountingStream(this, tp.freqStream_ForNUnit);
			
			for (int i = 0; i < 2; i++)
			{
				counter = 0;
				tp.Seek(term);
				
				CheckSkipTo(tp, 14, 185); // no skips
				CheckSkipTo(tp, 17, 190); // one skip on level 0
				CheckSkipTo(tp, 287, 200); // one skip on level 1, two on level 0
				
				// this test would fail if we had only one skip level,
				// because than more bytes would be read from the freqStream
				CheckSkipTo(tp, 4800, 250); // one skip on level 2
			}
		}
Esempio n. 4
0
        public virtual void  TestTerms()
        {
            try
            {
                TermEnum terms = reader.Terms();
                Assert.IsTrue(terms != null);
                while (terms.Next() == true)
                {
                    Term term = terms.Term();
                    Assert.IsTrue(term != null);
                    //System.out.println("Term: " + term);
                    System.String fieldValue = (System.String)DocHelper.nameValues[term.Field()];
                    Assert.IsTrue(fieldValue.IndexOf(term.Text()) != -1);
                }

                TermDocs termDocs = reader.TermDocs();
                Assert.IsTrue(termDocs != null);
                termDocs.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "Field"));
                Assert.IsTrue(termDocs.Next() == true);

                TermPositions positions = reader.TermPositions();
                positions.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "Field"));
                Assert.IsTrue(positions != null);
                Assert.IsTrue(positions.Doc() == 0);
                Assert.IsTrue(positions.NextPosition() >= 0);
            }
            catch (System.IO.IOException e)
            {
                System.Console.Error.WriteLine(e.StackTrace);
                Assert.IsTrue(false);
            }
        }
Esempio n. 5
0
        public virtual void  TestSimpleSkip()
        {
            RAMDirectory dir    = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(dir, new PayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
            Term         term   = new Term("test", "a");

            for (int i = 0; i < 5000; i++)
            {
                Document d1 = new Document();
                d1.Add(new Field(term.Field(), term.Text(), Field.Store.NO, Field.Index.ANALYZED));
                writer.AddDocument(d1);
            }
            writer.Flush();
            writer.Optimize();
            writer.Close();

            IndexReader          reader = SegmentReader.GetOnlySegmentReader(dir);
            SegmentTermPositions tp     = (SegmentTermPositions)reader.TermPositions();

            tp.freqStream_ForNUnit = new CountingStream(this, tp.freqStream_ForNUnit);

            for (int i = 0; i < 2; i++)
            {
                counter = 0;
                tp.Seek(term);

                CheckSkipTo(tp, 14, 185);                 // no skips
                CheckSkipTo(tp, 17, 190);                 // one skip on level 0
                CheckSkipTo(tp, 287, 200);                // one skip on level 1, two on level 0

                // this test would fail if we had only one skip level,
                // because than more bytes would be read from the freqStream
                CheckSkipTo(tp, 4800, 250);                 // one skip on level 2
            }
        }
Esempio n. 6
0
        public virtual void TestSimpleSkip()
        {
            Directory   dir    = new CountingRAMDirectory(this, new RAMDirectory());
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat())).SetMergePolicy(NewLogMergePolicy()));
            Term        term   = new Term("test", "a");

            for (int i = 0; i < 5000; i++)
            {
                Document d1 = new Document();
                d1.Add(NewTextField(term.Field(), term.Text(), Field.Store.NO));
                writer.AddDocument(d1);
            }
            writer.Commit();
            writer.ForceMerge(1);
            writer.Dispose();

            AtomicReader reader = GetOnlySegmentReader(DirectoryReader.Open(dir));

            for (int i = 0; i < 2; i++)
            {
                Counter = 0;
                DocsAndPositionsEnum tp = reader.TermPositionsEnum(term);
                CheckSkipTo(tp, 14, 185);  // no skips
                CheckSkipTo(tp, 17, 190);  // one skip on level 0
                CheckSkipTo(tp, 287, 200); // one skip on level 1, two on level 0

                // this test would fail if we had only one skip level,
                // because than more bytes would be read from the freqStream
                CheckSkipTo(tp, 4800, 250); // one skip on level 2
            }
        }
Esempio n. 7
0
        /// <summary>
        /// Creates a <seealso cref="TermContext"/> from a top-level <seealso cref="IndexReaderContext"/> and the
        /// given <seealso cref="Term"/>. this method will lookup the given term in all context's leaf readers
        /// and register each of the readers containing the term in the returned <seealso cref="TermContext"/>
        /// using the leaf reader's ordinal.
        /// <p>
        /// Note: the given context must be a top-level context.
        /// </summary>
        public static TermContext Build(IndexReaderContext context, Term term)
        {
            Debug.Assert(context != null && context.IsTopLevel);
            string      field = term.Field();
            BytesRef    bytes = term.Bytes();
            TermContext perReaderTermState = new TermContext(context);

            //if (DEBUG) System.out.println("prts.build term=" + term);
            foreach (AtomicReaderContext ctx in context.Leaves)
            {
                //if (DEBUG) System.out.println("  r=" + leaves[i].reader);
                Fields fields = ctx.AtomicReader.Fields;
                if (fields != null)
                {
                    Terms terms = fields.Terms(field);
                    if (terms != null)
                    {
                        TermsEnum termsEnum = terms.Iterator(null);
                        if (termsEnum.SeekExact(bytes))
                        {
                            TermState termState = termsEnum.TermState();
                            //if (DEBUG) System.out.println("    found");
                            perReaderTermState.Register(termState, ctx.Ord, termsEnum.DocFreq(), termsEnum.TotalTermFreq());
                        }
                    }
                }
            }
            return(perReaderTermState);
        }
Esempio n. 8
0
        /// <summary>
        /// Returns the number of documents containing the term
        /// <code>t</code>.  this method returns 0 if the term or
        /// field does not exists.  this method does not take into
        /// account deleted documents that have not yet been merged
        /// away.
        /// </summary>
        public override sealed long TotalTermFreq(Term term)
        {
            Fields fields = Fields;

            if (fields == null)
            {
                return(0);
            }
            Terms terms = fields.Terms(term.Field());

            if (terms == null)
            {
                return(0);
            }
            TermsEnum termsEnum = terms.Iterator(null);

            if (termsEnum.SeekExact(term.Bytes()))
            {
                return(termsEnum.TotalTermFreq());
            }
            else
            {
                return(0);
            }
        }
Esempio n. 9
0
        public override int DocFreq(Term term)
        {
            EnsureOpen();
            IndexReader reader = ((IndexReader)fieldToReader[term.Field()]);

            return(reader == null?0:reader.DocFreq(term));
        }
Esempio n. 10
0
            public override bool Next()
            {
                if (termEnum == null)
                {
                    return(false);
                }

                // another term in this field?
                if (termEnum.Next() && (System.Object)termEnum.Term().Field() == (System.Object)field)
                {
                    return(true);                 // yes, keep going
                }
                termEnum.Close();                 // close old termEnum

                // find the next field with terms, if any
                if (fieldIterator == null)
                {
                    System.Collections.Comparer   comparer = System.Collections.Comparer.Default;
                    System.Collections.SortedList newList  = new System.Collections.SortedList();
                    if (Enclosing_Instance.fieldToReader != null)
                    {
                        if (Enclosing_Instance.fieldToReader.Count > 0)
                        {
                            int index = 0;
                            while (comparer.Compare(Enclosing_Instance.fieldToReader.GetKey(index), field) < 0)
                            {
                                index++;
                            }
                            for (; index < Enclosing_Instance.fieldToReader.Count; index++)
                            {
                                newList.Add(Enclosing_Instance.fieldToReader.GetKey(index), Enclosing_Instance.fieldToReader[Enclosing_Instance.fieldToReader.GetKey(index)]);
                            }
                        }
                    }

                    fieldIterator = newList.Keys.GetEnumerator();
                    fieldIterator.MoveNext();
                    System.Object generatedAux = fieldIterator.Current;                     // Skip field to get next one
                }
                while (fieldIterator.MoveNext())
                {
                    field    = ((System.String)fieldIterator.Current);
                    termEnum = ((IndexReader)Enclosing_Instance.fieldToReader[field]).Terms(new Term(field));
                    Term term = termEnum.Term();
                    if (term != null && (System.Object)term.Field() == (System.Object)field)
                    {
                        return(true);
                    }
                    else
                    {
                        termEnum.Close();
                    }
                }

                return(false);                // no more fields
            }
Esempio n. 11
0
 public override bool TermCompare(Term term)
 {
     prefix = base.GetPrefixTerm();
     if ((System.Object)term.Field() == (System.Object)prefix.Field() && term.Text().Equals(prefix.Text()))
     {
         return true;
     }
     endEnum = true;
     return false;
 }
Esempio n. 12
0
 // used only by assert
 private bool CheckDeleteTerm(Term term)
 {
     if (term != null)
     {
         Debug.Assert(LastDeleteTerm == null || term.CompareTo(LastDeleteTerm) > 0, "lastTerm=" + LastDeleteTerm + " vs term=" + term);
     }
     // TODO: we re-use term now in our merged iterable, but we shouldn't clone, instead copy for this assert
     LastDeleteTerm = term == null ? null : new Term(term.Field(), BytesRef.DeepCopyOf(term.Bytes_Renamed));
     return(true);
 }
Esempio n. 13
0
        /// <summary>
        /// Returns <seealso cref="DocsAndPositionsEnum"/> for the specified
        ///  term.  this will return null if the
        ///  field or term does not exist or positions weren't indexed. </summary>
        ///  <seealso cref= TermsEnum#docsAndPositions(Bits, DocsAndPositionsEnum)  </seealso>
        public DocsAndPositionsEnum TermPositionsEnum(Term term)
        {
            Debug.Assert(term.Field() != null);
            Debug.Assert(term.Bytes() != null);
            Fields fields = Fields;

            if (fields != null)
            {
                Terms terms = fields.Terms(term.Field());
                if (terms != null)
                {
                    TermsEnum termsEnum = terms.Iterator(null);
                    if (termsEnum.SeekExact(term.Bytes()))
                    {
                        return(termsEnum.DocsAndPositions(LiveDocs, null));
                    }
                }
            }
            return(null);
        }
Esempio n. 14
0
		/// <summary>Equality compare on the term </summary>
		public override bool TermCompare(Term term)
		{
			if (_sField == term.Field())
			{
				string sSearchText = term.Text();
				if (sSearchText.StartsWith(_sPre)) return _regexImpl.Match(sSearchText);
			} //eif

			_bEndEnum = true;
			return false;
		}
Esempio n. 15
0
            public ParallelTermEnum(ParallelReader enclosingInstance, Term term)
            {
                InitBlock(enclosingInstance);
                field = term.Field();
                IndexReader reader = ((IndexReader)Enclosing_Instance.fieldToReader[field]);

                if (reader != null)
                {
                    termEnum = reader.Terms(term);
                }
            }
Esempio n. 16
0
 /// <param name="term"> The term documents need to have in order to be a match for this filter. </param>
 public TermFilter(Term term)
 {
     if (term == null)
     {
         throw new System.ArgumentException("Term must not be null");
     }
     else if (term.Field() == null)
     {
         throw new System.ArgumentException("Field must not be null");
     }
     this.term = term;
 }
Esempio n. 17
0
        public virtual int[] ToDocsArray(Term term, Bits bits, IndexReader reader)
        {
            Fields    fields     = MultiFields.GetFields(reader);
            Terms     cterms     = fields.Terms(term.Field());
            TermsEnum ctermsEnum = cterms.Iterator(null);

            if (ctermsEnum.SeekExact(new BytesRef(term.Text())))
            {
                DocsEnum docsEnum = TestUtil.Docs(Random(), ctermsEnum, bits, null, DocsEnum.FLAG_NONE);
                return(ToArray(docsEnum));
            }
            return(null);
        }
Esempio n. 18
0
            public override bool Next()
            {
                if (termEnum == null)
                {
                    return(false);
                }

                // another term in this field?
                if (termEnum.Next() && (System.Object)termEnum.Term().Field() == (System.Object)field)
                {
                    return(true);                 // yes, keep going
                }
                termEnum.Close();                 // close old termEnum

                // find the next field with terms, if any
                if (fieldIterator == null)
                {
                    List <string> tmpList = new List <string>();
                    bool          m       = false;
                    //JAVA: fieldIterator = fieldToReader.tailMap(field).keySet().iterator();
                    //JAVA: fieldIterator.next();  // Skip field to get next one
                    foreach (string key in Enclosing_Instance.fieldToReader.Keys)
                    {
                        if (key == field && m == false)
                        {
                            m = true;
                        }
                        if (m)
                        {
                            tmpList.Add(key);
                        }
                    }
                    fieldIterator = tmpList.GetEnumerator();
                }
                while (fieldIterator.MoveNext())
                {
                    field    = fieldIterator.Current;
                    termEnum = Enclosing_Instance.fieldToReader[field].Terms(new Term(field));
                    Term term = termEnum.Term();
                    if (term != null && (System.Object)term.Field() == (System.Object)field)
                    {
                        return(true);
                    }
                    else
                    {
                        termEnum.Close();
                    }
                }

                return(false);                // no more fields
            }
Esempio n. 19
0
        public void  Set(Term term)
        {
            if (term == null)
            {
                Reset();
                return;
            }

            // copy text into the buffer
            SetTextLength(term.Text().Length);
            text = term.Text().ToCharArray();

            this.field = term.Field();
            this.term  = term;
        }
Esempio n. 20
0
        public static int Count(Term t, IndexReader r)
        {
            int count = 0;
            DocsEnum td = TestUtil.Docs(Random(), r, t.Field(), new BytesRef(t.Text()), MultiFields.GetLiveDocs(r), null, 0);

            if (td != null)
            {
                while (td.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                {
                    td.DocID();
                    count++;
                }
            }
            return count;
        }
Esempio n. 21
0
 /// <summary>
 /// 得到指定Term的文档
 /// </summary>
 /// <param name="term"></param>
 /// <returns></returns>
 public IList<TermDoc> DocumentCount(Term term)
 {
     TermDocs docs = open.Reader.TermDocs(term);
     List<TermDoc> list = new List<TermDoc>();
     while (docs.Next()) {
         TermDoc doc2 = new TermDoc();
         doc2.Freq = docs.Freq();
         doc2.Doc = docs.Doc();
         doc2.Term = term;
         doc2.Norm = GetNorm(open.Reader, term.Field(), doc2.Doc);
         TermDoc item = doc2;
         list.Add(item);
     }
     docs.Close();
     return list;
 }
Esempio n. 22
0
        public void  Set(Term term)
        {
            if (term == null)
            {
                Reset();
                return;
            }
            System.String termText = term.Text();
            int           termLen  = termText.Length;

            text.SetLength(termLen);
            SupportClass.TextSupport.GetCharsFromString(termText, 0, termLen, text.result, 0);
            dirty     = true;
            field     = term.Field();
            this.term = term;
        }
Esempio n. 23
0
        public void  Set(Term term)
        {
            if (term == null)
            {
                Reset();
                return;
            }
            string termText = term.Text();
            int    termLen  = termText.Length;

            text.setLength(termLen);
            for (int i = 0; i < termLen; i++)
            {
                text.result[i] = (char)termText[i];
            }
            dirty     = true;
            field     = term.Field();
            this.term = term;
        }
Esempio n. 24
0
        public void  Set(Term term)
        {
            if (term == null)
            {
                Reset();
                return;
            }

            // copy text into the buffer
            SetTextLength(term.Text().Length);

            System.String sourceString = term.Text();
            int           sourceEnd    = term.Text().Length;

            for (int i = 0; i < sourceEnd; i++)
            {
                text[i] = (char)sourceString[i];
            }

            this.field = term.Field();
            this.term  = term;
        }
Esempio n. 25
0
            public override bool Next()
            {
                if (termEnum == null)
                {
                    return(false);
                }

                // another term in this field?
                if (termEnum.Next() && (System.Object)termEnum.Term().Field() == (System.Object)field)
                {
                    return(true);                 // yes, keep going
                }
                termEnum.Close();                 // close old termEnum

                // find the next field with terms, if any
                if (fieldIterator == null)
                {
                    fieldIterator = SupportClass.TailMap(Enclosing_Instance.fieldToReader, field).Keys.GetEnumerator();
                    fieldIterator.MoveNext();                     // Skip field to get next one
                }
                while (fieldIterator.MoveNext())
                {
                    field    = ((System.String)fieldIterator.Current);
                    termEnum = ((IndexReader)Enclosing_Instance.fieldToReader[field]).Terms(new Term(field, ""));
                    Term term = termEnum.Term();
                    if (term != null && (System.Object)term.Field() == (System.Object)field)
                    {
                        return(true);
                    }
                    else
                    {
                        termEnum.Close();
                    }
                }

                return(false);                // no more fields
            }
Esempio n. 26
0
            public override bool Next()
            {
                if (termEnum == null)
                {
                    return(false);
                }

                // another term in this field?
                if (termEnum.Next() && (object)termEnum.Term().Field() == (object)field)
                {
                    return(true);                 // yes, keep going
                }
                termEnum.Close();                 // close old termEnum

                // find the next field with terms, if any
                if (fieldIterator == null)
                {
                    fieldIterator = SupportClass.CollectionsSupport.TailMap(Enclosing_Instance.fieldToReader, field).Keys.GetEnumerator();
                }
                while (fieldIterator.MoveNext())
                {
                    field    = fieldIterator.Current;
                    termEnum = Enclosing_Instance.fieldToReader[field].Terms(new Term(field));
                    Term term = termEnum.Term();
                    if (term != null && (object)term.Field() == (object)field)
                    {
                        return(true);
                    }
                    else
                    {
                        termEnum.Close();
                    }
                }

                return(false);                // no more fields
            }
Esempio n. 27
0
 //--
 public virtual Term VisitTerm(Term term)
 {
     var field = term.Field();
     var text = term.Text();
     var visitedField = VisitField(field);
     var visitedText = VisitFieldText(text);
     if (field == visitedField && text == visitedText)
         return term;
     return new Term(visitedField, visitedText);
 }
Esempio n. 28
0
 private string TermToString(Term t)
 {
     var fieldName = t.Field();
     var value = t.Text();
     return String.Concat(fieldName, ":", value);
 }
Esempio n. 29
0
			public virtual void  Seek(Term term)
			{
				IndexReader reader = ((IndexReader) Enclosing_Instance.fieldToReader[term.Field()]);
				termDocs = reader != null?reader.TermDocs(term):null;
			}
Esempio n. 30
0
		public override int DocFreq(Term term)
		{
			EnsureOpen();
			IndexReader reader = ((IndexReader) fieldToReader[term.Field()]);
			return reader == null?0:reader.DocFreq(term);
		}
Esempio n. 31
0
 public ParallelTermEnum(ParallelReader enclosingInstance, Term term)
 {
     InitBlock(enclosingInstance);
     field    = term.Field();
     termEnum = ((IndexReader)Enclosing_Instance.fieldToReader[field]).Terms(term);
 }
Esempio n. 32
0
        public virtual void TestSkipTo(int indexDivisor)
        {
            Directory dir = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));

            Term ta = new Term("content", "aaa");
            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer, "aaa aaa aaa aaa");
            }

            Term tb = new Term("content", "bbb");
            for (int i = 0; i < 16; i++)
            {
                AddDoc(writer, "bbb bbb bbb bbb");
            }

            Term tc = new Term("content", "ccc");
            for (int i = 0; i < 50; i++)
            {
                AddDoc(writer, "ccc ccc ccc ccc");
            }

            // assure that we deal with a single segment
            writer.ForceMerge(1);
            writer.Dispose();

            IndexReader reader = DirectoryReader.Open(dir, indexDivisor);

            DocsEnum tdocs = TestUtil.Docs(Random(), reader, ta.Field(), new BytesRef(ta.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);

            // without optimization (assumption skipInterval == 16)

            // with next
            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(0, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(1, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Advance(2) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(2, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(4) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(4, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(9) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(9, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(10) != DocIdSetIterator.NO_MORE_DOCS);

            // without next
            tdocs = TestUtil.Docs(Random(), reader, ta.Field(), new BytesRef(ta.Text()), MultiFields.GetLiveDocs(reader), null, 0);

            Assert.IsTrue(tdocs.Advance(0) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(0, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(4) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(4, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(9) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(9, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(10) != DocIdSetIterator.NO_MORE_DOCS);

            // exactly skipInterval documents and therefore with optimization

            // with next
            tdocs = TestUtil.Docs(Random(), reader, tb.Field(), new BytesRef(tb.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);

            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(10, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(11, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Advance(12) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(12, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(15) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(15, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(24) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(24, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(25) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(25, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(26) != DocIdSetIterator.NO_MORE_DOCS);

            // without next
            tdocs = TestUtil.Docs(Random(), reader, tb.Field(), new BytesRef(tb.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);

            Assert.IsTrue(tdocs.Advance(5) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(10, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(15) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(15, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(24) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(24, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(25) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(25, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(26) != DocIdSetIterator.NO_MORE_DOCS);

            // much more than skipInterval documents and therefore with optimization

            // with next
            tdocs = TestUtil.Docs(Random(), reader, tc.Field(), new BytesRef(tc.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);

            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(26, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(27, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Advance(28) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(28, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(40) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(40, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(57) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(57, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(74) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(74, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(75) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(75, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(76) != DocIdSetIterator.NO_MORE_DOCS);

            //without next
            tdocs = TestUtil.Docs(Random(), reader, tc.Field(), new BytesRef(tc.Text()), MultiFields.GetLiveDocs(reader), null, 0);
            Assert.IsTrue(tdocs.Advance(5) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(26, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(40) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(40, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(57) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(57, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(74) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(74, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(75) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(75, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(76) != DocIdSetIterator.NO_MORE_DOCS);

            reader.Dispose();
            dir.Dispose();
        }
Esempio n. 33
0
			public virtual void  Seek(Term term)
			{
				termDocs = ((IndexReader) Enclosing_Instance.fieldToReader[term.Field()]).TermDocs(term);
			}
Esempio n. 34
0
        public virtual void AddTerm(Term term, int docIDUpto)
        {
            int?current;

            Terms.TryGetValue(term, out current);
            if (current != null && docIDUpto < current)
            {
                // Only record the new number if it's greater than the
                // current one.  this is important because if multiple
                // threads are replacing the same doc at nearly the
                // same time, it's possible that one thread that got a
                // higher docID is scheduled before the other
                // threads.  If we blindly replace than we can
                // incorrectly get both docs indexed.
                return;
            }

            Terms[term] = Convert.ToInt32(docIDUpto);
            // note that if current != null then it means there's already a buffered
            // delete on that term, therefore we seem to over-count. this over-counting
            // is done to respect IndexWriterConfig.setMaxBufferedDeleteTerms.
            NumTermDeletes.IncrementAndGet();
            if (current == null)
            {
                BytesUsed.AddAndGet(BYTES_PER_DEL_TERM + term.Bytes_Renamed.Length + (RamUsageEstimator.NUM_BYTES_CHAR * term.Field().Length));
            }
        }
Esempio n. 35
0
 public virtual int[] ToDocsArray(Term term, Bits bits, IndexReader reader)
 {
     Fields fields = MultiFields.GetFields(reader);
     Terms cterms = fields.Terms(term.Field());
     TermsEnum ctermsEnum = cterms.Iterator(null);
     if (ctermsEnum.SeekExact(new BytesRef(term.Text())))
     {
         DocsEnum docsEnum = TestUtil.Docs(Random(), ctermsEnum, bits, null, DocsEnum.FLAG_NONE);
         return ToArray(docsEnum);
     }
     return null;
 }
Esempio n. 36
0
        public virtual void AddTerm(Term term, int docIDUpto)
        {
            int? current;
            Terms.TryGetValue(term, out current);
            if (current != null && docIDUpto < current)
            {
                // Only record the new number if it's greater than the
                // current one.  this is important because if multiple
                // threads are replacing the same doc at nearly the
                // same time, it's possible that one thread that got a
                // higher docID is scheduled before the other
                // threads.  If we blindly replace than we can
                // incorrectly get both docs indexed.
                return;
            }

            Terms[term] = Convert.ToInt32(docIDUpto);
            // note that if current != null then it means there's already a buffered
            // delete on that term, therefore we seem to over-count. this over-counting
            // is done to respect IndexWriterConfig.setMaxBufferedDeleteTerms.
            NumTermDeletes.IncrementAndGet();
            if (current == null)
            {
                BytesUsed.AddAndGet(BYTES_PER_DEL_TERM + term.Bytes_Renamed.Length + (RamUsageEstimator.NUM_BYTES_CHAR * term.Field().Length));
            }
        }
Esempio n. 37
0
        public virtual void TestSkipTo(int indexDivisor)
        {
            Directory   dir    = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));

            Term ta = new Term("content", "aaa");

            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer, "aaa aaa aaa aaa");
            }

            Term tb = new Term("content", "bbb");

            for (int i = 0; i < 16; i++)
            {
                AddDoc(writer, "bbb bbb bbb bbb");
            }

            Term tc = new Term("content", "ccc");

            for (int i = 0; i < 50; i++)
            {
                AddDoc(writer, "ccc ccc ccc ccc");
            }

            // assure that we deal with a single segment
            writer.ForceMerge(1);
            writer.Dispose();

            IndexReader reader = DirectoryReader.Open(dir, indexDivisor);

            DocsEnum tdocs = TestUtil.Docs(Random(), reader, ta.Field(), new BytesRef(ta.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);

            // without optimization (assumption skipInterval == 16)

            // with next
            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(0, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(1, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Advance(2) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(2, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(4) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(4, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(9) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(9, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(10) != DocIdSetIterator.NO_MORE_DOCS);

            // without next
            tdocs = TestUtil.Docs(Random(), reader, ta.Field(), new BytesRef(ta.Text()), MultiFields.GetLiveDocs(reader), null, 0);

            Assert.IsTrue(tdocs.Advance(0) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(0, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(4) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(4, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(9) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(9, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(10) != DocIdSetIterator.NO_MORE_DOCS);

            // exactly skipInterval documents and therefore with optimization

            // with next
            tdocs = TestUtil.Docs(Random(), reader, tb.Field(), new BytesRef(tb.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);

            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(10, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(11, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Advance(12) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(12, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(15) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(15, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(24) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(24, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(25) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(25, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(26) != DocIdSetIterator.NO_MORE_DOCS);

            // without next
            tdocs = TestUtil.Docs(Random(), reader, tb.Field(), new BytesRef(tb.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);

            Assert.IsTrue(tdocs.Advance(5) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(10, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(15) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(15, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(24) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(24, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(25) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(25, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(26) != DocIdSetIterator.NO_MORE_DOCS);

            // much more than skipInterval documents and therefore with optimization

            // with next
            tdocs = TestUtil.Docs(Random(), reader, tc.Field(), new BytesRef(tc.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);

            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(26, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(27, tdocs.DocID());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Advance(28) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(28, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(40) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(40, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(57) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(57, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(74) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(74, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(75) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(75, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(76) != DocIdSetIterator.NO_MORE_DOCS);

            //without next
            tdocs = TestUtil.Docs(Random(), reader, tc.Field(), new BytesRef(tc.Text()), MultiFields.GetLiveDocs(reader), null, 0);
            Assert.IsTrue(tdocs.Advance(5) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(26, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(40) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(40, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(57) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(57, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(74) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(74, tdocs.DocID());
            Assert.IsTrue(tdocs.Advance(75) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(75, tdocs.DocID());
            Assert.IsFalse(tdocs.Advance(76) != DocIdSetIterator.NO_MORE_DOCS);

            reader.Dispose();
            dir.Dispose();
        }
Esempio n. 38
0
		public override int DocFreq(Term term)
		{
			return ((IndexReader) fieldToReader[term.Field()]).DocFreq(term);
		}
Esempio n. 39
0
			public override void  Seek(Term term)
			{
				termDocs = ((IndexReader) Enclosing_Instance.fieldToReader[term.Field()]).TermPositions(term);
			}
Esempio n. 40
0
            public virtual void  Seek(Term term)
            {
                IndexReader reader = ((IndexReader)Enclosing_Instance.fieldToReader[term.Field()]);

                termDocs = reader != null?reader.TermDocs(term) : null;
            }
Esempio n. 41
0
 internal virtual void AssertTermDocsCount(string msg, IndexReader reader, Term term, int expected)
 {
     DocsEnum tdocs = TestUtil.Docs(Random(), reader, term.Field(), new BytesRef(term.Text()), MultiFields.GetLiveDocs(reader), null, 0);
     int count = 0;
     if (tdocs != null)
     {
         while (tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
         {
             count++;
         }
     }
     Assert.AreEqual(expected, count, msg + ", count mismatch");
 }
        public virtual void TestSimpleSkip()
        {
            Directory dir = new CountingRAMDirectory(this, new RAMDirectory());
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat())).SetMergePolicy(NewLogMergePolicy()));
            Term term = new Term("test", "a");
            for (int i = 0; i < 5000; i++)
            {
                Document d1 = new Document();
                d1.Add(NewTextField(term.Field(), term.Text(), Field.Store.NO));
                writer.AddDocument(d1);
            }
            writer.Commit();
            writer.ForceMerge(1);
            writer.Dispose();

            AtomicReader reader = GetOnlySegmentReader(DirectoryReader.Open(dir));

            for (int i = 0; i < 2; i++)
            {
                Counter = 0;
                DocsAndPositionsEnum tp = reader.TermPositionsEnum(term);
                CheckSkipTo(tp, 14, 185); // no skips
                CheckSkipTo(tp, 17, 190); // one skip on level 0
                CheckSkipTo(tp, 287, 200); // one skip on level 1, two on level 0

                // this test would fail if we had only one skip level,
                // because than more bytes would be read from the freqStream
                CheckSkipTo(tp, 4800, 250); // one skip on level 2
            }
        }
Esempio n. 43
0
        public static void  VerifyEquals(IndexReader r1, IndexReader r2, System.String idField)
        {
            Assert.AreEqual(r1.NumDocs(), r2.NumDocs());
            bool hasDeletes = !(r1.MaxDoc() == r2.MaxDoc() && r1.NumDocs() == r1.MaxDoc());

            int[] r2r1 = new int[r2.MaxDoc()];             // r2 id to r1 id mapping

            TermDocs termDocs1 = r1.TermDocs();
            TermDocs termDocs2 = r2.TermDocs();

            // create mapping from id2 space to id2 based on idField
            idField = StringHelper.Intern(idField);
            TermEnum termEnum = r1.Terms(new Term(idField, ""));

            do
            {
                Term term = termEnum.Term();
                if (term == null || (System.Object)term.Field() != (System.Object)idField)
                {
                    break;
                }

                termDocs1.Seek(termEnum);
                if (!termDocs1.Next())
                {
                    // This doc is deleted and wasn't replaced
                    termDocs2.Seek(termEnum);
                    Assert.IsFalse(termDocs2.Next());
                    continue;
                }

                int id1 = termDocs1.Doc();
                Assert.IsFalse(termDocs1.Next());

                termDocs2.Seek(termEnum);
                Assert.IsTrue(termDocs2.Next());
                int id2 = termDocs2.Doc();
                Assert.IsFalse(termDocs2.Next());

                r2r1[id2] = id1;

                // verify stored fields are equivalent
                try
                {
                    VerifyEquals(r1.Document(id1), r2.Document(id2));
                }
                catch (System.Exception t)
                {
                    System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
                    System.Console.Out.WriteLine("  d1=" + r1.Document(id1));
                    System.Console.Out.WriteLine("  d2=" + r2.Document(id2));
                    throw t;
                }

                try
                {
                    // verify term vectors are equivalent
                    VerifyEquals(r1.GetTermFreqVectors(id1), r2.GetTermFreqVectors(id2));
                }
                catch (System.Exception e)
                {
                    System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
                    TermFreqVector[] tv1 = r1.GetTermFreqVectors(id1);
                    System.Console.Out.WriteLine("  d1=" + tv1);
                    if (tv1 != null)
                    {
                        for (int i = 0; i < tv1.Length; i++)
                        {
                            System.Console.Out.WriteLine("    " + i + ": " + tv1[i]);
                        }
                    }

                    TermFreqVector[] tv2 = r2.GetTermFreqVectors(id2);
                    System.Console.Out.WriteLine("  d2=" + tv2);
                    if (tv2 != null)
                    {
                        for (int i = 0; i < tv2.Length; i++)
                        {
                            System.Console.Out.WriteLine("    " + i + ": " + tv2[i]);
                        }
                    }

                    throw e;
                }
            }while (termEnum.Next());

            termEnum.Close();

            // Verify postings
            TermEnum termEnum1 = r1.Terms(new Term("", ""));
            TermEnum termEnum2 = r2.Terms(new Term("", ""));

            // pack both doc and freq into single element for easy sorting
            long[] info1 = new long[r1.NumDocs()];
            long[] info2 = new long[r2.NumDocs()];

            for (; ;)
            {
                Term term1, term2;

                // iterate until we get some docs
                int len1;
                for (; ;)
                {
                    len1  = 0;
                    term1 = termEnum1.Term();
                    if (term1 == null)
                    {
                        break;
                    }
                    termDocs1.Seek(termEnum1);
                    while (termDocs1.Next())
                    {
                        int d1 = termDocs1.Doc();
                        int f1 = termDocs1.Freq();
                        info1[len1] = (((long)d1) << 32) | f1;
                        len1++;
                    }
                    if (len1 > 0)
                    {
                        break;
                    }
                    if (!termEnum1.Next())
                    {
                        break;
                    }
                }

                // iterate until we get some docs
                int len2;
                for (; ;)
                {
                    len2  = 0;
                    term2 = termEnum2.Term();
                    if (term2 == null)
                    {
                        break;
                    }
                    termDocs2.Seek(termEnum2);
                    while (termDocs2.Next())
                    {
                        int d2 = termDocs2.Doc();
                        int f2 = termDocs2.Freq();
                        info2[len2] = (((long)r2r1[d2]) << 32) | f2;
                        len2++;
                    }
                    if (len2 > 0)
                    {
                        break;
                    }
                    if (!termEnum2.Next())
                    {
                        break;
                    }
                }

                if (!hasDeletes)
                {
                    Assert.AreEqual(termEnum1.DocFreq(), termEnum2.DocFreq());
                }

                Assert.AreEqual(len1, len2);
                if (len1 == 0)
                {
                    break;                     // no more terms
                }
                Assert.AreEqual(term1, term2);

                // sort info2 to get it into ascending docid
                System.Array.Sort(info2, 0, len2 - 0);

                // now compare
                for (int i = 0; i < len1; i++)
                {
                    Assert.AreEqual(info1[i], info2[i]);
                }

                termEnum1.Next();
                termEnum2.Next();
            }
        }
Esempio n. 44
0
		public void  Set(Term term)
		{
			if (term == null)
			{
				Reset();
				return ;
			}
			
			// copy text into the buffer
			SetTextLength(term.Text().Length);

			System.String sourceString = term.Text();
			int sourceEnd = term.Text().Length;
			for (int i = 0; i < sourceEnd; i++)
			{
				text[i] = (char) sourceString[i];
			}
			
			this.field = term.Field();
			this.term = term;
		}
Esempio n. 45
0
        private OpenBitSet FastBits(IndexReader reader)
        {

            OpenBitSet bits = new OpenBitSet(reader.MaxDoc());
            bits.Set(0, reader.MaxDoc()); //assume all are valid
            Term startTerm = new Term(fieldName);
            TermEnum te = reader.Terms(startTerm);
            if (te != null)
            {
                Term currTerm = te.Term();

                while ((currTerm != null) && (currTerm.Field() == startTerm.Field())) //term fieldnames are interned
                {
                    if (te.DocFreq() > 1)
                    {
                        int lastDoc = -1;
                        //unset potential duplicates
                        TermDocs td = reader.TermDocs(currTerm);
                        td.Next();
                        if (keepMode == KM_USE_FIRST_OCCURRENCE)
                        {
                            td.Next();
                        }
                        do
                        {
                            lastDoc = td.Doc();
                            bits.Clear(lastDoc);
                        } while (td.Next());
                        if (keepMode == KM_USE_LAST_OCCURRENCE)
                        {
                            //restore the last bit
                            bits.Set(lastDoc);
                        }
                    }
                    if (!te.Next())
                    {
                        break;
                    }
                    currTerm = te.Term();
                }
            }
            return bits;
        }
Esempio n. 46
0
 public override int DocFreq(Term term)
 {
     return(((IndexReader)fieldToReader[term.Field()]).DocFreq(term));
 }
Esempio n. 47
0
        private OpenBitSet CorrectBits(IndexReader reader)
        {

            OpenBitSet bits = new OpenBitSet(reader.MaxDoc()); //assume all are INvalid
            Term startTerm = new Term(fieldName);
            TermEnum te = reader.Terms(startTerm);
            if (te != null)
            {
                Term currTerm = te.Term();
                while ((currTerm != null) && (currTerm.Field() == startTerm.Field())) //term fieldnames are interned
                {
                    int lastDoc = -1;
                    //set non duplicates
                    TermDocs td = reader.TermDocs(currTerm);
                    if (td.Next())
                    {
                        if (keepMode == KM_USE_FIRST_OCCURRENCE)
                        {
                            bits.Set(td.Doc());
                        }
                        else
                        {
                            do
                            {
                                lastDoc = td.Doc();
                            } while (td.Next());
                            bits.Set(lastDoc);
                        }
                    }
                    if (!te.Next())
                    {
                        break;
                    }
                    currTerm = te.Term();
                }
            }
            return bits;
        }
Esempio n. 48
0
			public ParallelTermEnum(ParallelReader enclosingInstance, Term term)
			{
				InitBlock(enclosingInstance);
				field = term.Field();
				IndexReader reader = ((IndexReader) Enclosing_Instance.fieldToReader[field]);
				if (reader != null)
					termEnum = reader.Terms(term);
			}
Esempio n. 49
0
        private void contextMenuItemShowAll_Click(object sender, System.EventArgs e)
        {
            if (listTerms.SelectedItems == null) return;

            ListViewItem selItem = listTerms.SelectedItems[0];
            if (selItem == null) return;

            string field = selItem.SubItems[2].Text.Trim().Substring(1, selItem.SubItems[2].Text.Trim().Length - 2);
            string text = selItem.SubItems[3].Text;

            if (field == null || text == null)
                return;

            Term t = new Term(field, text);

            _luke.Search(t.Field() + ":" + t.Text());
        }
Esempio n. 50
0
			public override void  Seek(Term term)
			{
				IndexReader reader = ((IndexReader) Enclosing_Instance.fieldToReader[term.Field()]);
				termDocs = reader != null?reader.TermPositions(term):null;
			}
Esempio n. 51
0
 private void VerifyTermDocs(Directory dir, Term term, int numDocs)
 {
     IndexReader reader = DirectoryReader.Open(dir);
     DocsEnum docsEnum = TestUtil.Docs(Random(), reader, term.Field(), term.Bytes(), null, null, DocsEnum.FLAG_NONE);
     int count = 0;
     while (docsEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
     {
         count++;
     }
     Assert.AreEqual(numDocs, count);
     reader.Dispose();
 }
Esempio n. 52
0
        private string GetTermText(Term term)
        {
            var fieldName = term.Field();
            var fieldText = term.Text();
            if (fieldText == null)
                return null;

            var info = SenseNet.ContentRepository.Schema.ContentTypeManager.GetPerFieldIndexingInfo(fieldName);
            if (info == null)
            {
                var c = fieldText.ToCharArray();
                for (int i = 0; i < c.Length; i++)
                    if (c[i] < ' ')
                        c[i] = '.';
                return new String(c);
            }
            var fieldHandler = info.IndexFieldHandler;
            switch (fieldHandler.IndexFieldType)
            {
                case SenseNet.Search.Indexing.IndexFieldType.String:
                    return GetTermText(fieldText);
                case SenseNet.Search.Indexing.IndexFieldType.Int:
                    return Convert.ToString(NumericUtils.PrefixCodedToInt(fieldText), CultureInfo.InvariantCulture);
                case SenseNet.Search.Indexing.IndexFieldType.Long:
                    return Convert.ToString(NumericUtils.PrefixCodedToLong(fieldText), CultureInfo.InvariantCulture);
                case SenseNet.Search.Indexing.IndexFieldType.Float:
                    return Convert.ToString(NumericUtils.PrefixCodedToFloat(fieldText), CultureInfo.InvariantCulture);
                case SenseNet.Search.Indexing.IndexFieldType.Double:
                    return Convert.ToString(NumericUtils.PrefixCodedToDouble(fieldText), CultureInfo.InvariantCulture);
                case SenseNet.Search.Indexing.IndexFieldType.DateTime:
                    var d = new DateTime(NumericUtils.PrefixCodedToLong(fieldText));
                    if (d.Hour == 0 && d.Minute == 0 && d.Second == 0)
                        return GetTermText(d.ToString("yyyy-MM-dd"));
                    if (d.Second == 0)
                        return GetTermText(d.ToString("yyyy-MM-dd HH:mm"));
                    return GetTermText(d.ToString("yyyy-MM-dd HH:mm:ss"));
                default:
                    throw new NotImplementedException("Unknown IndexFieldType: " + fieldHandler.IndexFieldType);
            }
        }
Esempio n. 53
0
 public void Set(Term term)
 {
     if (term == null)
     {
         Reset();
         return ;
     }
     System.String termText = term.Text();
     int termLen = termText.Length;
     text.SetLength(termLen);
     SupportClass.TextSupport.GetCharsFromString(termText, 0, termLen, text.result, 0);
     dirty = true;
     field = term.Field();
     this.term = term;
 }
Esempio n. 54
0
 public void Set(Term term)
 {
     if (term == null)
     {
         Reset();
         return ;
     }
     string termText = term.Text();
     int termLen = termText.Length;
     text.setLength(termLen);
     for (int i = 0; i < termLen; i++)
     {
         text.result[i] = (char) termText[i];
     }
     dirty = true;
     field = term.Field();
     this.term = term;
 }
Esempio n. 55
0
 public override void  Seek(Term term)
 {
     termDocs = ((IndexReader)Enclosing_Instance.fieldToReader[term.Field()]).TermPositions(term);
 }
Esempio n. 56
0
        // DocValues updates
        private void ApplyDocValuesUpdates <T1>(IEnumerable <T1> updates, ReadersAndUpdates rld, SegmentReader reader, DocValuesFieldUpdates.Container dvUpdatesContainer) where T1 : DocValuesUpdate
        {
            lock (this)
            {
                Fields fields = reader.Fields;
                if (fields == null)
                {
                    // this reader has no postings
                    return;
                }

                // TODO: we can process the updates per DV field, from last to first so that
                // if multiple terms affect same document for the same field, we add an update
                // only once (that of the last term). To do that, we can keep a bitset which
                // marks which documents have already been updated. So e.g. if term T1
                // updates doc 7, and then we process term T2 and it updates doc 7 as well,
                // we don't apply the update since we know T1 came last and therefore wins
                // the update.
                // We can also use that bitset as 'liveDocs' to pass to TermEnum.docs(), so
                // that these documents aren't even returned.

                string    currentField = null;
                TermsEnum termsEnum    = null;
                DocsEnum  docs         = null;

                //System.out.println(Thread.currentThread().getName() + " numericDVUpdate reader=" + reader);
                foreach (DocValuesUpdate update in updates)
                {
                    Term term  = update.Term;
                    int  limit = update.DocIDUpto;

                    // TODO: we traverse the terms in update order (not term order) so that we
                    // apply the updates in the correct order, i.e. if two terms udpate the
                    // same document, the last one that came in wins, irrespective of the
                    // terms lexical order.
                    // we can apply the updates in terms order if we keep an updatesGen (and
                    // increment it with every update) and attach it to each NumericUpdate. Note
                    // that we cannot rely only on docIDUpto because an app may send two updates
                    // which will get same docIDUpto, yet will still need to respect the order
                    // those updates arrived.

                    if (!term.Field().Equals(currentField))
                    {
                        // if we change the code to process updates in terms order, enable this assert
                        //        assert currentField == null || currentField.compareTo(term.field()) < 0;
                        currentField = term.Field();
                        Terms terms = fields.Terms(currentField);
                        if (terms != null)
                        {
                            termsEnum = terms.Iterator(termsEnum);
                        }
                        else
                        {
                            termsEnum = null;
                            continue; // no terms in that field
                        }
                    }

                    if (termsEnum == null)
                    {
                        continue;
                    }
                    // System.out.println("  term=" + term);

                    if (termsEnum.SeekExact(term.Bytes()))
                    {
                        // we don't need term frequencies for this
                        DocsEnum docsEnum = termsEnum.Docs(rld.LiveDocs, docs, DocsEnum.FLAG_NONE);

                        //System.out.println("BDS: got docsEnum=" + docsEnum);

                        DocValuesFieldUpdates dvUpdates = dvUpdatesContainer.GetUpdates(update.Field, update.Type);
                        if (dvUpdates == null)
                        {
                            dvUpdates = dvUpdatesContainer.NewUpdates(update.Field, update.Type, reader.MaxDoc);
                        }
                        int doc;
                        while ((doc = docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            //System.out.println(Thread.currentThread().getName() + " numericDVUpdate term=" + term + " doc=" + docID);
                            if (doc >= limit)
                            {
                                break; // no more docs that can be updated for this term
                            }
                            dvUpdates.Add(doc, update.Value);
                        }
                    }
                }
            }
        }
Esempio n. 57
0
 public virtual void  Seek(Term term)
 {
     termDocs = ((IndexReader)Enclosing_Instance.fieldToReader[term.Field()]).TermDocs(term);
 }
Esempio n. 58
0
            public override void  Seek(Term term)
            {
                IndexReader reader = ((IndexReader)Enclosing_Instance.fieldToReader[term.Field()]);

                termDocs = reader != null?reader.TermPositions(term) : null;
            }
Esempio n. 59
0
			public ParallelTermEnum(ParallelReader enclosingInstance, Term term)
			{
				InitBlock(enclosingInstance);
				field = term.Field();
				termEnum = ((IndexReader) Enclosing_Instance.fieldToReader[field]).Terms(term);
			}