TermDocs() public abstract method

Returns an unpositioned Lucene.Net.Index.TermDocs enumerator.
public abstract TermDocs ( ) : TermDocs
return TermDocs
			internal MatchAllScorer(MatchAllDocsQuery enclosingInstance, IndexReader reader, Similarity similarity, Weight w, byte[] norms):base(similarity)
			{
				InitBlock(enclosingInstance);
				this.termDocs = reader.TermDocs(null);
				score = w.Value;
				this.norms = norms;
			}
Exemplo n.º 2
0
 internal MatchAllScorer(MatchAllDocsQuery enclosingInstance, IndexReader reader, Similarity similarity, Weight w, byte[] norms, IState state) : base(similarity)
 {
     InitBlock(enclosingInstance);
     this.termDocs = reader.TermDocs(null, state);
     score         = w.Value;
     this.norms    = norms;
 }
Exemplo n.º 3
0
            protected internal override string[] CreateValue(IndexReader reader, Entry entryKey, IState state)
            {
                System.String   field    = StringHelper.Intern(entryKey.field);
                System.String[] retArray = new System.String[reader.MaxDoc];
                TermDocs        termDocs = reader.TermDocs(state);
                TermEnum        termEnum = reader.Terms(new Term(field), state);

                try
                {
                    do
                    {
                        Term term = termEnum.Term;
                        if (term == null || (System.Object)term.Field != (System.Object)field)
                        {
                            break;
                        }
                        System.String termval = term.Text;
                        termDocs.Seek(termEnum, state);
                        while (termDocs.Next(state))
                        {
                            retArray[termDocs.Doc] = termval;
                        }
                    }while (termEnum.Next(state));
                }
                finally
                {
                    termDocs.Close();
                    termEnum.Close();
                }
                return(retArray);
            }
Exemplo n.º 4
0
            protected internal override float[] CreateValue(IndexReader reader, Entry entryKey, IState state)
            {
                Entry entry = entryKey;

                System.String field  = entry.field;
                FloatParser   parser = (FloatParser)entry.custom;

                if (parser == null)
                {
                    try
                    {
                        return(wrapper.GetFloats(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_FLOAT_PARSER, state));
                    }
                    catch (System.FormatException)
                    {
                        return(wrapper.GetFloats(reader, field, Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_FLOAT_PARSER, state));
                    }
                }
                float[]  retArray = null;
                TermDocs termDocs = reader.TermDocs(state);
                TermEnum termEnum = reader.Terms(new Term(field), state);

                try
                {
                    do
                    {
                        Term term = termEnum.Term;
                        if (term == null || (System.Object)term.Field != (System.Object)field)
                        {
                            break;
                        }
                        float termval = parser.ParseFloat(term.Text);
                        if (retArray == null)
                        {
                            // late init
                            retArray = new float[reader.MaxDoc];
                        }
                        termDocs.Seek(termEnum, state);
                        while (termDocs.Next(state))
                        {
                            retArray[termDocs.Doc] = termval;
                        }
                    }while (termEnum.Next(state));
                }
                catch (StopFillCacheException)
                {
                }
                finally
                {
                    termDocs.Close();
                    termEnum.Close();
                }
                if (retArray == null)
                {
                    // no values
                    retArray = new float[reader.MaxDoc];
                }
                return(retArray);
            }
 // constructor
 internal ValueSourceScorer(ValueSourceQuery enclosingInstance, Similarity similarity, IndexReader reader, ValueSourceWeight w)
     : base(similarity)
 {
     InitBlock(enclosingInstance);
     this.weight  = w;
     this.qWeight = w.Value;
     // this is when/where the values are first created.
     vals     = Enclosing_Instance.valSrc.GetValues(reader);
     termDocs = reader.TermDocs(null);
 }
Exemplo n.º 6
0
        public virtual void  TestMutipleDocument()
        {
            RAMDirectory dir    = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(dir, new KeywordAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);
            Document     doc    = new Document();

            doc.Add(new Field("partnum", "Q36", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc, null);
            doc = new Document();
            doc.Add(new Field("partnum", "Q37", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc, null);
            writer.Close();

            IndexReader reader = IndexReader.Open((Directory)dir, true, null);
            TermDocs    td     = reader.TermDocs(new Term("partnum", "Q36"), null);

            Assert.IsTrue(td.Next(null));
            td = reader.TermDocs(new Term("partnum", "Q37"), null);
            Assert.IsTrue(td.Next(null));
        }
        public override BitArray Bits(IndexReader reader)
        {
            BitArray bitArray = new BitArray(reader.MaxDoc());
            TermDocs termDocs = reader.TermDocs(new Term("score", "5"));
            while (termDocs.Next())
            {
                bitArray.Set(termDocs.Doc(), true);
            }

            return bitArray;
        }
		public static int Count(Term t, IndexReader r)
		{
			int count = 0;
			TermDocs td = r.TermDocs(t);
			while (td.Next())
			{
				td.Doc();
				count++;
			}
			td.Close();
			return count;
		}
        private void  VerifyTermDocs(Directory dir, Term term, int numDocs)
        {
            IndexReader reader   = IndexReader.Open(dir);
            TermDocs    termDocs = reader.TermDocs(term);
            int         count    = 0;

            while (termDocs.Next())
            {
                count++;
            }
            Assert.AreEqual(numDocs, count);
            reader.Close();
        }
Exemplo n.º 10
0
        public static int Count(Term t, IndexReader r)
        {
            int      count = 0;
            TermDocs td    = r.TermDocs(t, null);

            while (td.Next(null))
            {
                var d = td.Doc;
                count++;
            }
            td.Close();
            return(count);
        }
 public override DocIdSet GetDocIdSet(IndexReader reader)
 {
     OpenBitSet bitSet = new OpenBitSet(reader.NumDocs());
     TermDocs termDocs = reader.TermDocs(new Term("TenantId", _tenantId));
     while (termDocs.Next())
     {
         if (termDocs.Freq > 0)
         {
             bitSet.Set(termDocs.Doc);
         }
     }
     return bitSet;
 }
Exemplo n.º 12
0
        public static int Count(Term t, IndexReader r)
        {
            int      count = 0;
            TermDocs td    = r.TermDocs(t);

            while (td.Next())
            {
                td.Doc();
                count++;
            }
            td.Close();
            return(count);
        }
Exemplo n.º 13
0
        public virtual void  TestAllTermDocs()
        {
            IndexReader reader   = OpenReader();
            int         NUM_DOCS = 2;
            TermDocs    td       = reader.TermDocs(null);

            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(td.Next());
                Assert.AreEqual(i, td.Doc());
                Assert.AreEqual(1, td.Freq());
            }
            td.Close();
            reader.Close();
        }
Exemplo n.º 14
0
            protected internal override short[] CreateValue(IndexReader reader, Entry entryKey, IState state)
            {
                Entry entry = entryKey;

                System.String field  = entry.field;
                ShortParser   parser = (ShortParser)entry.custom;

                if (parser == null)
                {
                    return(wrapper.GetShorts(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_SHORT_PARSER, state));
                }
                short[]  retArray = new short[reader.MaxDoc];
                TermDocs termDocs = reader.TermDocs(state);
                TermEnum termEnum = reader.Terms(new Term(field), state);

                try
                {
                    do
                    {
                        Term term = termEnum.Term;
                        if (term == null || (System.Object)term.Field != (System.Object)field)
                        {
                            break;
                        }
                        short termval = parser.ParseShort(term.Text);
                        termDocs.Seek(termEnum, state);
                        while (termDocs.Next(state))
                        {
                            retArray[termDocs.Doc] = termval;
                        }
                    }while (termEnum.Next(state));
                }
                catch (StopFillCacheException)
                {
                }
                finally
                {
                    termDocs.Close();
                    termEnum.Close();
                }
                return(retArray);
            }
Exemplo n.º 15
0
        /// <summary>
        /// Get the DocIdSet.
        /// </summary>
        /// <param name="reader">Applcible reader.</param>
        /// <returns>The set.</returns>
        public override DocIdSet GetDocIdSet(IndexReader reader)
        {
            OpenBitSet result = new OpenBitSet(reader.MaxDoc());
            TermDocs td = reader.TermDocs();
            try
            {
                foreach (Term t in this.terms)
                {
                    td.Seek(t);
                    while (td.Next())
                    {
                        result.Set(td.Doc());
                    }
                }
            }
            finally
            {
                td.Close();
            }

            return result;
        }
Exemplo n.º 16
0
        private static Dictionary<string, int[]> FillCache(IndexReader reader, int docBase, string field)
        {
            using (var termDocs = reader.TermDocs())
            {
                var items = new Dictionary<string, int[]>();
                var docsForTerm = new List<int>();

                using (var termEnum = reader.Terms(new Term(field)))
                {
                    do
                    {
                        if (termEnum.Term == null || field != termEnum.Term.Field)
                            break;

                        Term term = termEnum.Term;
                        if (LowPrecisionNumber(term.Field, term.Text))
                            continue;

                        var totalDocCountIncludedDeletes = termEnum.DocFreq();
                        termDocs.Seek(termEnum.Term);
                        while (termDocs.Next() && totalDocCountIncludedDeletes > 0)
                        {
                            var curDoc = termDocs.Doc;
                            totalDocCountIncludedDeletes -= 1;
                            if (reader.IsDeleted(curDoc))
                                continue;

                            docsForTerm.Add(curDoc + docBase);
                        }
                        docsForTerm.Sort();
                        items[term.Text] = docsForTerm.ToArray();
                        docsForTerm.Clear();
                    } while (termEnum.Next());
                }
                return items;
            }
        }
Exemplo n.º 17
0
            public override DocIdSetIterator Iterator()
            {
                // Synchronization needed because deleted docs BitVector
                // can change after call to hasDeletions until TermDocs creation.
                // We only use an iterator with termDocs, when this was requested (e.g. range contains 0)
                // and the index has deletions
                TermDocs termDocs;

                lock (reader)
                {
                    termDocs = IsCacheable ? null : reader.TermDocs(null);
                }
                if (termDocs != null)
                {
                    // a DocIdSetIterator using TermDocs to iterate valid docIds
                    return(new AnonymousClassDocIdSetIterator(termDocs, this));
                }
                else
                {
                    // a DocIdSetIterator generating docIds by incrementing a variable -
                    // this one can be used if there are no deletions are on the index
                    return(new AnonymousClassDocIdSetIterator1(this));
                }
            }
Exemplo n.º 18
0
		public override DocIdSet GetDocIdSet(IndexReader reader)
		{
			var bits = new OpenBitSet(reader.MaxDoc());

			TermDocs termDocs = reader.TermDocs();
			List<double> area = _shape.Area;
			int sz = area.Count;
			
			// iterate through each boxid
			for (int i = 0; i < sz; i++)
			{
				double boxId = area[i];
				termDocs.Seek(new Term(_fieldName, NumericUtils.DoubleToPrefixCoded(boxId)));

				// iterate through all documents
				// which have this boxId
				while (termDocs.Next())
				{
					bits.FastSet(termDocs.Doc());
				}
			}

			return bits;
		}
Exemplo n.º 19
0
		public static void  VerifyEquals(IndexReader r1, IndexReader r2, System.String idField)
		{
			Assert.AreEqual(r1.NumDocs(), r2.NumDocs());
			bool hasDeletes = !(r1.MaxDoc() == r2.MaxDoc() && r1.NumDocs() == r1.MaxDoc());
			
			int[] r2r1 = new int[r2.MaxDoc()]; // r2 id to r1 id mapping
			
			TermDocs termDocs1 = r1.TermDocs();
			TermDocs termDocs2 = r2.TermDocs();
			
			// create mapping from id2 space to id2 based on idField
			idField = StringHelper.Intern(idField);
			TermEnum termEnum = r1.Terms(new Term(idField, ""));
			do 
			{
				Term term = termEnum.Term();
				if (term == null || (System.Object) term.Field() != (System.Object) idField)
					break;
				
				termDocs1.Seek(termEnum);
				if (!termDocs1.Next())
				{
					// This doc is deleted and wasn't replaced
					termDocs2.Seek(termEnum);
					Assert.IsFalse(termDocs2.Next());
					continue;
				}
				
				int id1 = termDocs1.Doc();
				Assert.IsFalse(termDocs1.Next());
				
				termDocs2.Seek(termEnum);
				Assert.IsTrue(termDocs2.Next());
				int id2 = termDocs2.Doc();
				Assert.IsFalse(termDocs2.Next());
				
				r2r1[id2] = id1;
				
				// verify stored fields are equivalent
				try
				{
					VerifyEquals(r1.Document(id1), r2.Document(id2));
				}
				catch (System.Exception t)
				{
					System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
					System.Console.Out.WriteLine("  d1=" + r1.Document(id1));
					System.Console.Out.WriteLine("  d2=" + r2.Document(id2));
					throw t;
				}
				
				try
				{
					// verify term vectors are equivalent        
					VerifyEquals(r1.GetTermFreqVectors(id1), r2.GetTermFreqVectors(id2));
				}
				catch (System.Exception e)
				{
					System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
					TermFreqVector[] tv1 = r1.GetTermFreqVectors(id1);
					System.Console.Out.WriteLine("  d1=" + tv1);
					if (tv1 != null)
						for (int i = 0; i < tv1.Length; i++)
						{
							System.Console.Out.WriteLine("    " + i + ": " + tv1[i]);
						}
					
					TermFreqVector[] tv2 = r2.GetTermFreqVectors(id2);
					System.Console.Out.WriteLine("  d2=" + tv2);
					if (tv2 != null)
						for (int i = 0; i < tv2.Length; i++)
						{
							System.Console.Out.WriteLine("    " + i + ": " + tv2[i]);
						}
					
					throw e;
				}
			}
			while (termEnum.Next());
			
			termEnum.Close();
			
			// Verify postings
			TermEnum termEnum1 = r1.Terms(new Term("", ""));
			TermEnum termEnum2 = r2.Terms(new Term("", ""));
			
			// pack both doc and freq into single element for easy sorting
			long[] info1 = new long[r1.NumDocs()];
			long[] info2 = new long[r2.NumDocs()];
			
			for (; ; )
			{
				Term term1, term2;
				
				// iterate until we get some docs
				int len1;
				for (; ; )
				{
					len1 = 0;
					term1 = termEnum1.Term();
					if (term1 == null)
						break;
					termDocs1.Seek(termEnum1);
					while (termDocs1.Next())
					{
						int d1 = termDocs1.Doc();
						int f1 = termDocs1.Freq();
						info1[len1] = (((long) d1) << 32) | f1;
						len1++;
					}
					if (len1 > 0)
						break;
					if (!termEnum1.Next())
						break;
				}
				
				// iterate until we get some docs
				int len2;
				for (; ; )
				{
					len2 = 0;
					term2 = termEnum2.Term();
					if (term2 == null)
						break;
					termDocs2.Seek(termEnum2);
					while (termDocs2.Next())
					{
						int d2 = termDocs2.Doc();
						int f2 = termDocs2.Freq();
						info2[len2] = (((long) r2r1[d2]) << 32) | f2;
						len2++;
					}
					if (len2 > 0)
						break;
					if (!termEnum2.Next())
						break;
				}
				
				if (!hasDeletes)
					Assert.AreEqual(termEnum1.DocFreq(), termEnum2.DocFreq());
				
				Assert.AreEqual(len1, len2);
				if (len1 == 0)
					break; // no more terms
				
				Assert.AreEqual(term1, term2);
				
				// sort info2 to get it into ascending docid
				System.Array.Sort(info2, 0, len2 - 0);
				
				// now compare
				for (int i = 0; i < len1; i++)
				{
					Assert.AreEqual(info1[i], info2[i]);
				}
				
				termEnum1.Next();
				termEnum2.Next();
			}
		}
Exemplo n.º 20
0
        private int CheckDbAndIndex(DbDataReader dbreader, IndexReader ixreader, List<Difference> result)
        {
            var versionId = dbreader.GetInt32(0);
            var dbNodeTimestamp = dbreader.GetInt64(1);
            var dbVersionTimestamp = dbreader.GetInt64(2);

            var termDocs = ixreader.TermDocs(new Lucene.Net.Index.Term(LucObject.FieldName.VersionId, Lucene.Net.Util.NumericUtils.IntToPrefixCoded(versionId)));
            Lucene.Net.Documents.Document doc = null;
            int docid = -1;
            if (termDocs.Next())
            {
                docid = termDocs.Doc();
                doc = ixreader.Document(docid);
                var indexNodeTimestamp = ParseLong(doc.Get(LucObject.FieldName.NodeTimestamp));
                var indexVersionTimestamp = ParseLong(doc.Get(LucObject.FieldName.VersionTimestamp));
                var nodeId = ParseInt(doc.Get(LucObject.FieldName.NodeId));
                var version = doc.Get(LucObject.FieldName.Version);
                var p = doc.Get(LucObject.FieldName.Path);
                if (termDocs.Next())
                {
                    result.Add(new Difference(IndexDifferenceKind.MoreDocument)
                    {
                        DocId = docid,
                        NodeId = nodeId,
                        VersionId = versionId,
                        Version = version,
                        Path = p,
                        DbNodeTimestamp = dbNodeTimestamp,
                        DbVersionTimestamp = dbVersionTimestamp,
                        IxNodeTimestamp = indexNodeTimestamp,
                        IxVersionTimestamp = indexVersionTimestamp,
                    });
                }
                if (dbVersionTimestamp != indexVersionTimestamp)
                {
                    result.Add(new Difference(IndexDifferenceKind.DifferentVersionTimestamp)
                    {
                        DocId = docid,
                        VersionId = versionId,
                        DbNodeTimestamp = dbNodeTimestamp,
                        DbVersionTimestamp = dbVersionTimestamp,
                        IxNodeTimestamp = indexNodeTimestamp,
                        IxVersionTimestamp = indexVersionTimestamp,
                        NodeId = nodeId,
                        Version = version,
                        Path = p
                    });
                }
                if (dbNodeTimestamp != indexNodeTimestamp)
                {
                    var ok = false;
                    var isLastDraft = doc.Get(LucObject.FieldName.IsLastDraft);
                    if (isLastDraft != BooleanIndexHandler.YES)
                    {
                        var latestDocs = ixreader.TermDocs(new Lucene.Net.Index.Term(LucObject.FieldName.NodeId, Lucene.Net.Util.NumericUtils.IntToPrefixCoded(nodeId)));
                        Lucene.Net.Documents.Document latestDoc = null;
                        while (latestDocs.Next())
                        {
                            var latestdocid = latestDocs.Doc();
                            var d = ixreader.Document(latestdocid);
                            if (d.Get(LucObject.FieldName.IsLastDraft) != BooleanIndexHandler.YES)
                                continue;
                            latestDoc = d;
                            break;
                        }
                        var latestPath = latestDoc.Get(LucObject.FieldName.Path);
                        if (latestPath == p)
                            ok = true;
                    }
                    if (!ok)
                    {
                        result.Add(new Difference(IndexDifferenceKind.DifferentNodeTimestamp)
                        {
                            DocId = docid,
                            VersionId = versionId,
                            DbNodeTimestamp = dbNodeTimestamp,
                            DbVersionTimestamp = dbVersionTimestamp,
                            IxNodeTimestamp = indexNodeTimestamp,
                            IxVersionTimestamp = indexVersionTimestamp,
                            NodeId = nodeId,
                            Version = version,
                            Path = p
                        });
                    }
                }
            }
            else
            {
                result.Add(new Difference(IndexDifferenceKind.NotInIndex)
                {
                    DocId = docid,
                    VersionId = versionId,
                    DbNodeTimestamp = dbNodeTimestamp,
                    DbVersionTimestamp = dbVersionTimestamp,
                });
            }
            return docid;
        }
Exemplo n.º 21
0
        private static void FillCache(IndexSearcherHolder.IndexSearcherHoldingState state, List<string> fieldsToRead,IndexReader reader)
        {
            foreach (var field in fieldsToRead)
            {
                using (var termDocs = reader.TermDocs())
                {
                    using (var termEnum = reader.Terms(new Term(field)))
                    {
                        do
                        {
                            if (termEnum.Term == null || field != termEnum.Term.Field)
                                break;

                            if (LowPrecisionNumber(termEnum.Term))
                                continue;


                            var totalDocCountIncludedDeletes = termEnum.DocFreq();
                            termDocs.Seek(termEnum.Term);

                            while (termDocs.Next() && totalDocCountIncludedDeletes > 0)
                            {
                                totalDocCountIncludedDeletes -= 1;
                                if (reader.IsDeleted(termDocs.Doc))
                                    continue;

                                state.SetInCache(field, termDocs.Doc, termEnum.Term);
                            }
                        } while (termEnum.Next());
                    }
                }
            }
        }
Exemplo n.º 22
0
        private static void FillCache(IndexSearcherHolder.IndexSearcherHoldingState state, IEnumerable<string> fieldsToRead,IndexReader reader)
        {
            foreach (var field in fieldsToRead)
            {
	            var items = new LinkedList<IndexSearcherHolder.IndexSearcherHoldingState.CacheVal>[reader.MaxDoc];
                using (var termDocs = reader.TermDocs())
                {
                    using (var termEnum = reader.Terms(new Term(field)))
                    {
                        do
                        {
                            if (termEnum.Term == null || field != termEnum.Term.Field)
                                break;

                            Term term = termEnum.Term;
                            if (LowPrecisionNumber(term.Field, term.Text))
                                continue;


                            var totalDocCountIncludedDeletes = termEnum.DocFreq();
                            termDocs.Seek(termEnum.Term);

                            while (termDocs.Next() && totalDocCountIncludedDeletes > 0)
                            {
                                totalDocCountIncludedDeletes -= 1;
                                if (reader.IsDeleted(termDocs.Doc))
                                    continue;
								if(items[termDocs.Doc] == null)
									items[termDocs.Doc] = new LinkedList<IndexSearcherHolder.IndexSearcherHoldingState.CacheVal>();

	                            items[termDocs.Doc].AddLast(new IndexSearcherHolder.IndexSearcherHoldingState.CacheVal
	                            {
		                            Term = termEnum.Term
	                            });
                            }
                        } while (termEnum.Next());
                    }
                }
	            state.SetInCache(field, items);
            }
        }
Exemplo n.º 23
0
		// Apply buffered delete terms to the segment just flushed from ram
		// apply appropriately so that a delete term is only applied to
		// the documents buffered before it, not those buffered after it.
		private void  ApplyDeletesSelectively(System.Collections.Hashtable deleteTerms, System.Collections.IList deleteIds, IndexReader reader)
		{
			System.Collections.IEnumerator iter = new System.Collections.Hashtable(deleteTerms).GetEnumerator();
			while (iter.MoveNext())
			{
				System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current;
				Term term = (Term) entry.Key;
				
				TermDocs docs = reader.TermDocs(term);
				if (docs != null)
				{
					int num = ((DocumentsWriter.Num) entry.Value).GetNum();
					try
					{
						while (docs.Next())
						{
							int doc = docs.Doc();
							if (doc >= num)
							{
								break;
							}
							reader.DeleteDocument(doc);
						}
					}
					finally
					{
						docs.Close();
					}
				}
			}
			
			if (deleteIds.Count > 0)
			{
				iter = deleteIds.GetEnumerator();
				while (iter.MoveNext())
				{
					reader.DeleteDocument(((System.Int32) iter.Current));
				}
			}
		}
        /// <summary>
        /// loads multi-value facet data. This method uses a workarea to prepare loading.
        /// </summary>
        /// <param name="fieldName"></param>
        /// <param name="reader"></param>
        /// <param name="listFactory"></param>
        /// <param name="workArea"></param>
        public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory, BoboIndexReader.WorkArea workArea)
        {
            long t0 = Environment.TickCount;
            int maxdoc = reader.MaxDoc;
            BigNestedIntArray.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea);

            TermEnum tenum = null;
            TermDocs tdoc = null;
            ITermValueList list = (listFactory == null ? (ITermValueList)new TermStringList() : listFactory.CreateTermList());
            List<int> minIDList = new List<int>();
            List<int> maxIDList = new List<int>();
            List<int> freqList = new List<int>();
            OpenBitSet bitset = new OpenBitSet();
            int negativeValueCount = GetNegativeValueCount(reader, string.Intern(fieldName));
            int t = 0; // current term number
            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            t++;

            _overflow = false;
            try
            {
                tdoc = reader.TermDocs();
                tenum = reader.Terms(new Term(fieldName, ""));
                if (tenum != null)
                {
                    do
                    {
                        Term term = tenum.Term;
                        if (term == null || !fieldName.Equals(term.Field))
                            break;

                        string val = term.Text;

                        if (val != null)
                        {
                            list.Add(val);

                            tdoc.Seek(tenum);
                            //freqList.add(tenum.docFreq()); // removed because the df doesn't take into account the num of deletedDocs
                            int df = 0;
                            int minID = -1;
                            int maxID = -1;
                            int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                            if (tdoc.Next())
                            {
                                df++;
                                int docid = tdoc.Doc;

                                if (!loader.Add(docid, valId))
                                    LogOverflow(fieldName);
                                minID = docid;
                                bitset.Set(docid);
                                while (tdoc.Next())
                                {
                                    df++;
                                    docid = tdoc.Doc;

                                    if (!loader.Add(docid, valId))
                                        LogOverflow(fieldName);
                                    bitset.Set(docid);
                                }
                                maxID = docid;
                            }
                            freqList.Add(df);
                            minIDList.Add(minID);
                            maxIDList.Add(maxID);
                        }

                        t++;
                    }
                    while (tenum.Next());
                }
            }
            finally
            {
                try
                {
                    if (tdoc != null)
                    {
                        tdoc.Dispose();
                    }
                }
                finally
                {
                    if (tenum != null)
                    {
                        tenum.Dispose();
                    }
                }
            }

            list.Seal();

            try
            {
                _nestedArray.Load(maxdoc + 1, loader);
            }
            catch (System.IO.IOException e)
            {
                throw e;
            }
            catch (Exception e)
            {
                throw new RuntimeException("failed to load due to " + e.ToString(), e);
            }

            this.valArray = list;
            this.freqs = freqList.ToArray();
            this.minIDs = minIDList.ToArray();
            this.maxIDs = maxIDList.ToArray();

            int doc = 0;
            while (doc <= maxdoc && !_nestedArray.Contains(doc, 0, true))
            {
                ++doc;
            }
            if (doc <= maxdoc)
            {
                this.minIDs[0] = doc;
                doc = maxdoc;
                while (doc > 0 && !_nestedArray.Contains(doc, 0, true))
                {
                    --doc;
                }
                if (doc > 0)
                {
                    this.maxIDs[0] = doc;
                }
            }
            this.freqs[0] = maxdoc + 1 - (int)bitset.Cardinality();
        }
Exemplo n.º 25
0
        public virtual void  testSkipTo(int indexDivisor)
        {
            Directory   dir    = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            Term ta = new Term("content", "aaa");

            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer, "aaa aaa aaa aaa");
            }

            Term tb = new Term("content", "bbb");

            for (int i = 0; i < 16; i++)
            {
                AddDoc(writer, "bbb bbb bbb bbb");
            }

            Term tc = new Term("content", "ccc");

            for (int i = 0; i < 50; i++)
            {
                AddDoc(writer, "ccc ccc ccc ccc");
            }

            // assure that we deal with a single segment
            writer.Optimize();
            writer.Close();

            IndexReader reader = IndexReader.Open(dir, null, true, indexDivisor);

            TermDocs tdocs = reader.TermDocs();

            // without optimization (assumption skipInterval == 16)

            // with next
            tdocs.Seek(ta);
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(0, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(1, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.SkipTo(0));
            Assert.AreEqual(2, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(4));
            Assert.AreEqual(4, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(9));
            Assert.AreEqual(9, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(10));

            // without next
            tdocs.Seek(ta);
            Assert.IsTrue(tdocs.SkipTo(0));
            Assert.AreEqual(0, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(4));
            Assert.AreEqual(4, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(9));
            Assert.AreEqual(9, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(10));

            // exactly skipInterval documents and therefore with optimization

            // with next
            tdocs.Seek(tb);
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(10, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(11, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.SkipTo(5));
            Assert.AreEqual(12, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(15));
            Assert.AreEqual(15, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(24));
            Assert.AreEqual(24, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(25));
            Assert.AreEqual(25, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(26));

            // without next
            tdocs.Seek(tb);
            Assert.IsTrue(tdocs.SkipTo(5));
            Assert.AreEqual(10, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(15));
            Assert.AreEqual(15, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(24));
            Assert.AreEqual(24, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(25));
            Assert.AreEqual(25, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(26));

            // much more than skipInterval documents and therefore with optimization

            // with next
            tdocs.Seek(tc);
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(26, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(27, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.SkipTo(5));
            Assert.AreEqual(28, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(40));
            Assert.AreEqual(40, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(57));
            Assert.AreEqual(57, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(74));
            Assert.AreEqual(74, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(75));
            Assert.AreEqual(75, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(76));

            //without next
            tdocs.Seek(tc);
            Assert.IsTrue(tdocs.SkipTo(5));
            Assert.AreEqual(26, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(40));
            Assert.AreEqual(40, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(57));
            Assert.AreEqual(57, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(74));
            Assert.AreEqual(74, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(75));
            Assert.AreEqual(75, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(76));

            tdocs.Close();
            reader.Close();
            dir.Close();
        }
Exemplo n.º 26
0
		private void  AssertTermDocsCount(System.String msg, IndexReader reader, Term term, int expected)
		{
			TermDocs tdocs = null;
			
			try
			{
				tdocs = reader.TermDocs(term);
				Assert.IsNotNull(tdocs, msg + ", null TermDocs");
				int count = 0;
				while (tdocs.Next())
				{
					count++;
				}
				Assert.AreEqual(expected, count, msg + ", count mismatch");
			}
			finally
			{
				if (tdocs != null)
					tdocs.Close();
			}
		}
 public MatchAllDocIdSetIterator(IndexReader reader)
 {
     _termDocs = reader.TermDocs(null);
     _docid = -1;
 }
        public override void Load(string fieldName, IndexReader reader, TermListFactory listFactory, BoboIndexReader.WorkArea workArea)
        {
            long t0 = System.Environment.TickCount;
            int maxdoc = reader.MaxDoc;
            BigNestedIntArray.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea);
            BigNestedIntArray.BufferedLoader weightLoader = GetBufferedLoader(maxdoc, null);

            TermEnum tenum = null;
            TermDocs tdoc = null;
            var list = (listFactory == null ? new TermStringList() : listFactory.CreateTermList());
            List<int> minIDList = new List<int>();
            List<int> maxIDList = new List<int>();
            List<int> freqList = new List<int>();
            OpenBitSet bitset = new OpenBitSet(maxdoc + 1);
            int negativeValueCount = GetNegativeValueCount(reader, string.Intern(fieldName));
            int t = 0; // current term number
            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            t++;

            _overflow = false;

            string pre = null;

            int df = 0;
            int minID = -1;
            int maxID = -1;
            int valId = 0;

            try
            {
                tdoc = reader.TermDocs();
                tenum = reader.Terms(new Term(fieldName, ""));
                if (tenum != null)
                {
                    do
                    {
                        Term term = tenum.Term;
                        if (term == null || !fieldName.Equals(term.Field))
                            break;

                        string val = term.Text;

                        if (val != null)
                        {
                            int weight = 0;
                            string[] split = val.Split(new char[] { '\0' }, StringSplitOptions.RemoveEmptyEntries);
                            if (split.Length > 1)
                            {
                                val = split[0];
                                weight = int.Parse(split[split.Length - 1]);
                            }
                            if (pre == null || !val.Equals(pre))
                            {
                                if (pre != null)
                                {
                                    freqList.Add(df);
                                    minIDList.Add(minID);
                                    maxIDList.Add(maxID);
                                }

                                list.Add(val);

                                df = 0;
                                minID = -1;
                                maxID = -1;
                                valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                                t++;
                            }

                            tdoc.Seek(tenum);
                            if (tdoc.Next())
                            {
                                df++;
                                int docid = tdoc.Doc;

                                if (!loader.Add(docid, valId)) LogOverflow(fieldName);
                                else weightLoader.Add(docid, weight);

                                if (docid < minID) minID = docid;
                                bitset.FastSet(docid);
                                while (tdoc.Next())
                                {
                                    df++;
                                    docid = tdoc.Doc;

                                    if (!loader.Add(docid, valId)) LogOverflow(fieldName);
                                    else weightLoader.Add(docid, weight);

                                    bitset.FastSet(docid);
                                }
                                if (docid > maxID) maxID = docid;
                            }
                            pre = val;
                        }

                    }
                    while (tenum.Next());
                    if (pre != null)
                    {
                        freqList.Add(df);
                        minIDList.Add(minID);
                        maxIDList.Add(maxID);
                    }
                }
            }
            finally
            {
                try
                {
                    if (tdoc != null)
                    {
                        tdoc.Dispose();
                    }
                }
                finally
                {
                    if (tenum != null)
                    {
                        tenum.Dispose();
                    }
                }
            }

            list.Seal();

            try
            {
                _nestedArray.Load(maxdoc + 1, loader);
                _weightArray.Load(maxdoc + 1, weightLoader);
            }
            catch (System.IO.IOException e)
            {
                throw e;
            }
            catch (Exception e)
            {
                throw new RuntimeException("failed to load due to " + e.ToString(), e);
            }

            this.valArray = list;
            this.freqs = freqList.ToArray();
            this.minIDs = minIDList.ToArray();
            this.maxIDs = maxIDList.ToArray();

            int doc = 0;
            while (doc <= maxdoc && !_nestedArray.Contains(doc, 0, true))
            {
                ++doc;
            }
            if (doc <= maxdoc)
            {
                this.minIDs[0] = doc;
                doc = maxdoc;
                while (doc > 0 && !_nestedArray.Contains(doc, 0, true))
                {
                    --doc;
                }
                if (doc > 0)
                {
                    this.maxIDs[0] = doc;
                }
            }
            this.freqs[0] = maxdoc + 1 - (int)bitset.Cardinality();   
        }
Exemplo n.º 29
0
 public override TermDocs TermDocs()
 {
     return(in_Renamed.TermDocs());
 }
Exemplo n.º 30
0
 public override TermDocs TermDocs(IState state)
 {
     EnsureOpen();
     return(in_Renamed.TermDocs(state));
 }
Exemplo n.º 31
0
            public virtual void  Seek(Term term)
            {
                IndexReader reader = ((IndexReader)Enclosing_Instance.fieldToReader[term.Field()]);

                termDocs = reader != null?reader.TermDocs(term) : null;
            }
Exemplo n.º 32
0
        public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory)
        {
            string field = string.Intern(fieldName);
            int maxDoc = reader.MaxDoc;

            if (orderArray == null) // we want to reuse the memory
            {
                orderArray = NewInstance(termCountSize, maxDoc);
            }
            else
            {
                orderArray.EnsureCapacity(maxDoc); // no need to fill to 0, we are reseting the data anyway
            }

            List<int> minIDList = new List<int>();
            List<int> maxIDList = new List<int>();
            List<int> freqList = new List<int>();

            int length = maxDoc + 1;
            ITermValueList list = listFactory == null ? new TermStringList() : listFactory.CreateTermList();
            TermDocs termDocs = reader.TermDocs();
            TermEnum termEnum = reader.Terms(new Term(field));
            int t = 0; // current term number

            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            //int df = 0;
            t++;
            try
            {
                do
                {
                    Term term = termEnum.Term;
                    if (term == null || string.CompareOrdinal(term.Field, field) != 0)
                        break;

                    if (t >= orderArray.MaxValue())
                    {
                        throw new System.IO.IOException("maximum number of value cannot exceed: " + orderArray.MaxValue());
                    }
                    // Alexey: well, we could get now more than one term per document. Effectively, we could build facet againsts tokenized field
                    /*// we expect that there is at most one term per document
                    if (t >= length)
                    {
                        throw new RuntimeException("there are more terms than " + "documents in field \"" + field + "\", but it's impossible to sort on " + "tokenized fields");
                    }*/
                    // store term text
                    list.Add(term.Text);
                    termDocs.Seek(termEnum);
                    // freqList.add(termEnum.docFreq()); // doesn't take into account deldocs
                    int minID = -1;
                    int maxID = -1;
                    int df = 0;
                    if (termDocs.Next())
                    {
                        df++;
                        int docid = termDocs.Doc;
                        orderArray.Add(docid, t);
                        minID = docid;
                        while (termDocs.Next())
                        {
                            df++;
                            docid = termDocs.Doc;
                            orderArray.Add(docid, t);
                        }
                        maxID = docid;
                    }
                    freqList.Add(df);
                    minIDList.Add(minID);
                    maxIDList.Add(maxID);

                    t++;
                } while (termEnum.Next());
            }
            finally
            {
                termDocs.Dispose();
                termEnum.Dispose();
            }
            list.Seal();

            this.valArray = list;
            this.freqs = freqList.ToArray();
            this.minIDs = minIDList.ToArray();
            this.maxIDs = maxIDList.ToArray();
        }
Exemplo n.º 33
0
 protected internal virtual TermDocs TermDocs(IndexReader reader)
 {
     return(reader.TermDocs());
 }
Exemplo n.º 34
0
		// Apply buffered delete terms, queries and docIDs to the
		// provided reader
		private bool ApplyDeletes(IndexReader reader, int docIDStart)
		{
			lock (this)
			{
				
				int docEnd = docIDStart + reader.MaxDoc();
				bool any = false;
				
                System.Diagnostics.Debug.Assert(CheckDeleteTerm(null));

				// Delete by term
				TermDocs docs = reader.TermDocs();
				try
				{
                    foreach(KeyValuePair<Term,BufferedDeletes.Num> entry in deletesFlushed.terms)
					{
						Term term = entry.Key;
						// LUCENE-2086: we should be iterating a TreeMap,
                        // here, so terms better be in order:
                        System.Diagnostics.Debug.Assert(CheckDeleteTerm(term));
						docs.Seek(term);
						int limit = entry.Value.GetNum();
						while (docs.Next())
						{
							int docID = docs.Doc();
							if (docIDStart + docID >= limit)
								break;
							reader.DeleteDocument(docID);
							any = true;
						}
					}
				}
				finally
				{
					docs.Close();
				}
				
				// Delete by docID
                foreach(int docID in deletesFlushed.docIDs)
				{
					if (docID >= docIDStart && docID < docEnd)
					{
						reader.DeleteDocument(docID - docIDStart);
						any = true;
					}
				}
				
				// Delete by query
				IndexSearcher searcher = new IndexSearcher(reader);
                foreach(KeyValuePair<Query,int> entry in new Support.Dictionary<Query,int>(deletesFlushed.queries))
				{
					Query query = entry.Key;
					int limit = entry.Value;
					Weight weight = query.Weight(searcher);
					Scorer scorer = weight.Scorer(reader, true, false);
					if (scorer != null)
					{
						while (true)
						{
							int doc = scorer.NextDoc();
							if (((long) docIDStart) + doc >= limit)
								break;
							reader.DeleteDocument(doc);
							any = true;
						}
					}
				}
				searcher.Close();
				return any;
			}
		}
Exemplo n.º 35
0
		protected internal virtual TermDocs TermDocs(IndexReader reader)
		{
			return reader.TermDocs();
		}
Exemplo n.º 36
0
		// There are two ways we can determine the max_results
		// most recent items:  
		//
		// One is to instantiate Lucene documents for each of
		// the document IDs in primary_matches.  This is a
		// fairly expensive operation.
		//
		// The other is to walk through the list of all
		// document IDs in descending time order.  This is
		// a less expensive operation, but adds up over time
		// on large data sets.
		//
		// We can walk about 2.5 docs for every Document we
		// instantiate.  So what we'll do, if we have more
		// matches than available hits, is walk (m * 1.25)
		// docs to see if we can fill out the top 100 hits.
		// If not, we'll fall back to creating documents
		// for all of them.

		private static ArrayList ScanRecentDocs (IndexReader	    primary_reader,
						    IndexReader		    secondary_reader,
						    BetterBitArray	    primary_matches,
						    Dictionary<int, Hit>    hits_by_id,
						    int			    max_results,
						    ref int		    total_number_of_matches,
						    HitFilter		    hit_filter,
						    string		    index_name)
		{
			Stopwatch a = new Stopwatch ();
			a.Start ();

			TermDocs docs = primary_reader.TermDocs ();
			TermEnum enumerator = primary_reader.Terms (new Term ("InvertedTimestamp", String.Empty));
			ArrayList results = new ArrayList (max_results);
			int docs_found = 0;
			int docs_walked = 0;
			int hit_filter_removed = 0;
			int max_docs = (int) (primary_matches.TrueCount * 1.25);

			Term term;
			TermDocs secondary_term_docs = null;
			if (secondary_reader != null)
				secondary_term_docs = secondary_reader.TermDocs ();

			do {
				term = enumerator.Term ();
			
				if (term.Field () != "InvertedTimestamp")
					break;

				docs.Seek (enumerator);

				while (docs.Next ()
				       && docs_found < max_results
				       && docs_walked < max_docs) {
					int doc_id = docs.Doc ();

					if (primary_matches.Get (doc_id)) {
						Document doc = primary_reader.Document (doc_id);
						Hit hit = CreateHit (doc, secondary_reader, secondary_term_docs);

						// If we have a HitFilter, apply it.
						if (hit_filter != null && ! hit_filter (hit)) {
							if (Debug)
								Log.Debug ("Filtered out {0}", hit.Uri);
							hit_filter_removed ++;
							continue;
						}
						hits_by_id [doc_id] = hit;
						// Add the result, last modified first
						results.Add (hit);
						docs_found++;
					}
			
					docs_walked++;
				}
			} while (enumerator.Next ()
				 && docs_found < max_results
				 && docs_walked < max_docs);

			docs.Close ();
			if (secondary_term_docs != null)
				secondary_term_docs.Close ();

			// If we've found all the docs we can return in a subset!
			// Fantastic, we've probably short circuited a slow search.
			if (docs_found != max_results) {
				// Otherwise bad luck! Not all docs found
				// Start afresh - this time traversing all results
				results = null;
			} else {
				// Adjust total_number_of_matches. We need to do this to avoid scenarios like the following:
				// max_hits = 100. Matched 100 results. But hit filter removed 30. So 70 results will be returned.
				// We want to avoid saying "Showing top 70 of 100". Note that since we are not passing
				// every document in the index through the hit_filter, when we say "Showing top 100 of 1234", the
				// 1234 could actually be much less. But since max_hits was 100, that will not mislead the user.
				total_number_of_matches -= hit_filter_removed;
			}

			a.Stop ();
			if (Debug) {
				Log.Debug (">>> {0}: Walked {1} items, populated an enum with {2} items in {3}", index_name, docs_walked, docs_found, a);
				
				if (docs_found == max_results)
					Log.Debug (">>> {0}: Successfully short circuited timestamp ordering!", index_name);
			}

			return results;
		}
Exemplo n.º 37
0
        public static void  VerifyEquals(IndexReader r1, IndexReader r2, System.String idField)
        {
            Assert.AreEqual(r1.NumDocs(), r2.NumDocs());
            bool hasDeletes = !(r1.MaxDoc() == r2.MaxDoc() && r1.NumDocs() == r1.MaxDoc());

            int[] r2r1 = new int[r2.MaxDoc()];             // r2 id to r1 id mapping

            TermDocs termDocs1 = r1.TermDocs();
            TermDocs termDocs2 = r2.TermDocs();

            // create mapping from id2 space to id2 based on idField
            idField = StringHelper.Intern(idField);
            TermEnum termEnum = r1.Terms(new Term(idField, ""));

            do
            {
                Term term = termEnum.Term();
                if (term == null || (System.Object)term.Field() != (System.Object)idField)
                {
                    break;
                }

                termDocs1.Seek(termEnum);
                if (!termDocs1.Next())
                {
                    // This doc is deleted and wasn't replaced
                    termDocs2.Seek(termEnum);
                    Assert.IsFalse(termDocs2.Next());
                    continue;
                }

                int id1 = termDocs1.Doc();
                Assert.IsFalse(termDocs1.Next());

                termDocs2.Seek(termEnum);
                Assert.IsTrue(termDocs2.Next());
                int id2 = termDocs2.Doc();
                Assert.IsFalse(termDocs2.Next());

                r2r1[id2] = id1;

                // verify stored fields are equivalent
                try
                {
                    VerifyEquals(r1.Document(id1), r2.Document(id2));
                }
                catch (System.Exception t)
                {
                    System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
                    System.Console.Out.WriteLine("  d1=" + r1.Document(id1));
                    System.Console.Out.WriteLine("  d2=" + r2.Document(id2));
                    throw t;
                }

                try
                {
                    // verify term vectors are equivalent
                    VerifyEquals(r1.GetTermFreqVectors(id1), r2.GetTermFreqVectors(id2));
                }
                catch (System.Exception e)
                {
                    System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
                    TermFreqVector[] tv1 = r1.GetTermFreqVectors(id1);
                    System.Console.Out.WriteLine("  d1=" + tv1);
                    if (tv1 != null)
                    {
                        for (int i = 0; i < tv1.Length; i++)
                        {
                            System.Console.Out.WriteLine("    " + i + ": " + tv1[i]);
                        }
                    }

                    TermFreqVector[] tv2 = r2.GetTermFreqVectors(id2);
                    System.Console.Out.WriteLine("  d2=" + tv2);
                    if (tv2 != null)
                    {
                        for (int i = 0; i < tv2.Length; i++)
                        {
                            System.Console.Out.WriteLine("    " + i + ": " + tv2[i]);
                        }
                    }

                    throw e;
                }
            }while (termEnum.Next());

            termEnum.Close();

            // Verify postings
            TermEnum termEnum1 = r1.Terms(new Term("", ""));
            TermEnum termEnum2 = r2.Terms(new Term("", ""));

            // pack both doc and freq into single element for easy sorting
            long[] info1 = new long[r1.NumDocs()];
            long[] info2 = new long[r2.NumDocs()];

            for (; ;)
            {
                Term term1, term2;

                // iterate until we get some docs
                int len1;
                for (; ;)
                {
                    len1  = 0;
                    term1 = termEnum1.Term();
                    if (term1 == null)
                    {
                        break;
                    }
                    termDocs1.Seek(termEnum1);
                    while (termDocs1.Next())
                    {
                        int d1 = termDocs1.Doc();
                        int f1 = termDocs1.Freq();
                        info1[len1] = (((long)d1) << 32) | f1;
                        len1++;
                    }
                    if (len1 > 0)
                    {
                        break;
                    }
                    if (!termEnum1.Next())
                    {
                        break;
                    }
                }

                // iterate until we get some docs
                int len2;
                for (; ;)
                {
                    len2  = 0;
                    term2 = termEnum2.Term();
                    if (term2 == null)
                    {
                        break;
                    }
                    termDocs2.Seek(termEnum2);
                    while (termDocs2.Next())
                    {
                        int d2 = termDocs2.Doc();
                        int f2 = termDocs2.Freq();
                        info2[len2] = (((long)r2r1[d2]) << 32) | f2;
                        len2++;
                    }
                    if (len2 > 0)
                    {
                        break;
                    }
                    if (!termEnum2.Next())
                    {
                        break;
                    }
                }

                if (!hasDeletes)
                {
                    Assert.AreEqual(termEnum1.DocFreq(), termEnum2.DocFreq());
                }

                Assert.AreEqual(len1, len2);
                if (len1 == 0)
                {
                    break;                     // no more terms
                }
                Assert.AreEqual(term1, term2);

                // sort info2 to get it into ascending docid
                System.Array.Sort(info2, 0, len2 - 0);

                // now compare
                for (int i = 0; i < len1; i++)
                {
                    Assert.AreEqual(info1[i], info2[i]);
                }

                termEnum1.Next();
                termEnum2.Next();
            }
        }
Exemplo n.º 38
0
 protected internal virtual TermDocs TermDocs(IndexReader reader)
 {
     return term == null ? reader.TermDocs(null):reader.TermDocs();
 }
Exemplo n.º 39
0
			protected internal double[] ComputeDistances(IndexReader reader)
			{
				double[] retArray = null;
				var termDocs = reader.TermDocs();
				var termEnum = reader.Terms(new Term(Constants.SpatialShapeFieldName));
				try
				{
					do
					{
						Term term = termEnum.Term();
						if (term == null)
							break;

						Debug.Assert(Constants.SpatialShapeFieldName.Equals(term.Field()));

						Shape termval;
						try
						{
							termval = SpatialIndex.RavenSpatialContext.ReadShape(term.Text()); // read shape
						}
						catch (InvalidShapeException)
						{
							continue;
						}

						var pt = termval as Point;
						if (pt == null)
							continue;

						var distance = SpatialIndex.RavenSpatialContext.GetDistCalc().Distance(pt, originPt);

						if (retArray == null)
							// late init
							retArray = new double[reader.MaxDoc()];
						termDocs.Seek(termEnum);
						while (termDocs.Next())
						{
							retArray[termDocs.Doc()] = distance;
						}
					} while (termEnum.Next());
				}
				finally
				{
					termDocs.Close();
					termEnum.Close();
				}
				return retArray ?? new double[reader.MaxDoc()];
			}
Exemplo n.º 40
0
        private OpenBitSet FastBits(IndexReader reader)
        {

            OpenBitSet bits = new OpenBitSet(reader.MaxDoc());
            bits.Set(0, reader.MaxDoc()); //assume all are valid
            Term startTerm = new Term(fieldName);
            TermEnum te = reader.Terms(startTerm);
            if (te != null)
            {
                Term currTerm = te.Term();

                while ((currTerm != null) && (currTerm.Field() == startTerm.Field())) //term fieldnames are interned
                {
                    if (te.DocFreq() > 1)
                    {
                        int lastDoc = -1;
                        //unset potential duplicates
                        TermDocs td = reader.TermDocs(currTerm);
                        td.Next();
                        if (keepMode == KM_USE_FIRST_OCCURRENCE)
                        {
                            td.Next();
                        }
                        do
                        {
                            lastDoc = td.Doc();
                            bits.Clear(lastDoc);
                        } while (td.Next());
                        if (keepMode == KM_USE_LAST_OCCURRENCE)
                        {
                            //restore the last bit
                            bits.Set(lastDoc);
                        }
                    }
                    if (!te.Next())
                    {
                        break;
                    }
                    currTerm = te.Term();
                }
            }
            return bits;
        }
Exemplo n.º 41
0
		// Apply buffered delete terms, queries and docIDs to the
		// provided reader
		private bool ApplyDeletes(IndexReader reader, int docIDStart)
		{
			lock (this)
			{
				
				int docEnd = docIDStart + reader.MaxDoc();
				bool any = false;
				
                System.Diagnostics.Debug.Assert(CheckDeleteTerm(null));

				// Delete by term
                //System.Collections.IEnumerator iter = new System.Collections.Hashtable(deletesFlushed.terms).GetEnumerator();
				System.Collections.IEnumerator iter = deletesFlushed.terms.GetEnumerator();
				TermDocs docs = reader.TermDocs();
				try
				{
					while (iter.MoveNext())
					{
						System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current;
						Term term = (Term) entry.Key;
						// LUCENE-2086: we should be iterating a TreeMap,
                        // here, so terms better be in order:
                        System.Diagnostics.Debug.Assert(CheckDeleteTerm(term));
						docs.Seek(term);
						int limit = ((BufferedDeletes.Num) entry.Value).GetNum();
						while (docs.Next())
						{
							int docID = docs.Doc();
							if (docIDStart + docID >= limit)
								break;
							reader.DeleteDocument(docID);
							any = true;
						}
					}
				}
				finally
				{
					docs.Close();
				}
				
				// Delete by docID
				iter = deletesFlushed.docIDs.GetEnumerator();
				while (iter.MoveNext())
				{
					int docID = ((System.Int32) iter.Current);
					if (docID >= docIDStart && docID < docEnd)
					{
						reader.DeleteDocument(docID - docIDStart);
						any = true;
					}
				}
				
				// Delete by query
				IndexSearcher searcher = new IndexSearcher(reader);
				iter = new System.Collections.Hashtable(deletesFlushed.queries).GetEnumerator();
				while (iter.MoveNext())
				{
					System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current;
					Query query = (Query) entry.Key;
					int limit = ((System.Int32) entry.Value);
					Weight weight = query.Weight(searcher);
					Scorer scorer = weight.Scorer(reader, true, false);
					if (scorer != null)
					{
						while (true)
						{
							int doc = scorer.NextDoc();
							if (((long) docIDStart) + doc >= limit)
								break;
							reader.DeleteDocument(doc);
							any = true;
						}
					}
				}
				searcher.Close();
				return any;
			}
		}
Exemplo n.º 42
0
        private OpenBitSet CorrectBits(IndexReader reader)
        {

            OpenBitSet bits = new OpenBitSet(reader.MaxDoc()); //assume all are INvalid
            Term startTerm = new Term(fieldName);
            TermEnum te = reader.Terms(startTerm);
            if (te != null)
            {
                Term currTerm = te.Term();
                while ((currTerm != null) && (currTerm.Field() == startTerm.Field())) //term fieldnames are interned
                {
                    int lastDoc = -1;
                    //set non duplicates
                    TermDocs td = reader.TermDocs(currTerm);
                    if (td.Next())
                    {
                        if (keepMode == KM_USE_FIRST_OCCURRENCE)
                        {
                            bits.Set(td.Doc());
                        }
                        else
                        {
                            do
                            {
                                lastDoc = td.Doc();
                            } while (td.Next());
                            bits.Set(lastDoc);
                        }
                    }
                    if (!te.Next())
                    {
                        break;
                    }
                    currTerm = te.Term();
                }
            }
            return bits;
        }
Exemplo n.º 43
0
            protected internal override StringIndex CreateValue(IndexReader reader, Entry entryKey, IState state)
            {
                System.String field           = StringHelper.Intern(entryKey.field);
                int[]         retArray        = new int[reader.MaxDoc];
                int[]         retArrayOrdered = new int[reader.MaxDoc];
                for (int i = 0; i < retArrayOrdered.Length; i++)
                {
                    retArrayOrdered[i] = -1;
                }

                var length = reader.MaxDoc + 1;
                UnmanagedStringArray mterms   = new UnmanagedStringArray(length);
                TermDocs             termDocs = reader.TermDocs(state);

                SegmentTermEnum termEnum = (SegmentTermEnum)reader.Terms(new Term(field), state);
                int             t        = 0; // current term number
                int             docIndex = 0;

                // an entry for documents that have no terms in this field
                // should a document with no terms be at top or bottom?
                // this puts them at the top - if it is changed, FieldDocSortedHitQueue
                // needs to change as well.
                t++;

                try
                {
                    do
                    {
                        if (termEnum.termBuffer.Field != field || t >= length)
                        {
                            break;
                        }

                        // store term text
                        mterms.Add(termEnum.termBuffer.TextAsSpan);

                        termDocs.Seek(termEnum, state);
                        while (termDocs.Next(state))
                        {
                            var pt = retArray[termDocs.Doc];
                            retArray[termDocs.Doc] = t;

                            if (pt == 0)
                            {
                                retArrayOrdered[docIndex++] = termDocs.Doc;
                            }
                        }

                        t++;
                    }while (termEnum.Next(state));
                }
                finally
                {
                    termDocs.Close();
                    termEnum.Close();
                }

                StringIndex value_Renamed = new StringIndex(retArray, retArrayOrdered, mterms);

                return(value_Renamed);
            }
Exemplo n.º 44
0
            public virtual void  Seek(Term term, IState state)
            {
                IndexReader reader = Enclosing_Instance.fieldToReader[term.Field];

                termDocs = reader != null?reader.TermDocs(term, state) : null;
            }
Exemplo n.º 45
0
        public static RavenJObject[] ReadAllEntriesFromIndex(IndexReader reader)
        {
            if (reader.MaxDoc > 512 * 1024)
            {
                throw new InvalidOperationException("Refusing to extract all index entires from an index with " + reader.MaxDoc +
                                                    " entries, because of the probable time / memory costs associated with that." +
                                                    Environment.NewLine +
                                                    "Viewing Index Entries are a debug tool, and should not be used on indexes of this size. You might want to try Luke, instead.");
            }
            var results = new RavenJObject[reader.MaxDoc];
            using (var termDocs = reader.TermDocs())
            using (var termEnum = reader.Terms())
            {
                while (termEnum.Next())
                {
                    var term = termEnum.Term;
                    if (term == null)
                        break;

                    var text = term.Text;

                    termDocs.Seek(termEnum);
                    for (int i = 0; i < termEnum.DocFreq() && termDocs.Next(); i++)
                    {
                        RavenJObject result = results[termDocs.Doc];
                        if (result == null)
                            results[termDocs.Doc] = result = new RavenJObject();
                        var propertyName = term.Field;
                        if (propertyName.EndsWith("_ConvertToJson") ||
                            propertyName.EndsWith("_IsArray"))
                            continue;
                        if (result.ContainsKey(propertyName))
                        {
                            switch (result[propertyName].Type)
                            {
                                case JTokenType.Array:
                                    ((RavenJArray)result[propertyName]).Add(text);
                                    break;
                                case JTokenType.String:
                                    result[propertyName] = new RavenJArray
									{
										result[propertyName],
										text
									};
                                    break;
                                default:
                                    throw new ArgumentException("No idea how to handle " + result[propertyName].Type);
                            }
                        }
                        else
                        {
                            result[propertyName] = text;
                        }
                    }
                }
            }
            return results;
        }
Exemplo n.º 46
0
        // Apply buffered delete terms, queries and docIDs to the
        // provided reader
        private bool ApplyDeletes(IndexReader reader, int docIDStart)
        {
            lock (this)
            {
                int docEnd = docIDStart + reader.MaxDoc();
                bool any = false;

                // Delete by term
                IEnumerator<KeyValuePair<object, object>> iter = deletesFlushed.terms.GetEnumerator();
                while (iter.MoveNext())
                {
                    KeyValuePair<object, object> entry = (KeyValuePair<object, object>)iter.Current;
                    Term term = (Term)entry.Key;

                    TermDocs docs = reader.TermDocs(term);
                    if (docs != null)
                    {
                        int limit = ((BufferedDeletes.Num)entry.Value).GetNum();
                        try
                        {
                            while (docs.Next())
                            {
                                int docID = docs.Doc();
                                if (docIDStart + docID >= limit)
                                    break;
                                reader.DeleteDocument(docID);
                                any = true;
                            }
                        }
                        finally
                        {
                            docs.Close();
                        }
                    }
                }

                // Delete by docID
                IEnumerator<object> iter2 = deletesFlushed.docIDs.GetEnumerator();
                while (iter2.MoveNext())
                {
                    int docID = (int)iter2.Current;
                    if (docID >= docIDStart && docID < docEnd)
                    {
                        reader.DeleteDocument(docID - docIDStart);
                        any = true;
                    }
                }

                // Delete by query
                IndexSearcher searcher = new IndexSearcher(reader);
                iter = deletesFlushed.queries.GetEnumerator();
                while (iter.MoveNext())
                {
                    KeyValuePair<object, object> entry = (KeyValuePair<object, object>)iter.Current;
                    Query query = (Query)entry.Key;
                    int limit = (int)entry.Value;
                    Weight weight = query.Weight(searcher);
                    Scorer scorer = weight.Scorer(reader);
                    while (scorer.Next())
                    {
                        int docID = scorer.Doc();
                        if (docIDStart + docID >= limit)
                            break;
                        reader.DeleteDocument(docID);
                        any = true;
                    }
                }
                searcher.Close();
                return any;
            }
        }
Exemplo n.º 47
0
 public override TermDocs TermDocs()
 {
     EnsureOpen();
     return(in_Renamed.TermDocs());
 }
Exemplo n.º 48
0
		////////////////////////////////////////////////////////////////

		static private void ScoreHits (Dictionary<int, Hit>   hits_by_id,
					       IndexReader reader,
					       ICollection term_list)
		{
			LNS.Similarity similarity;
			similarity = LNS.Similarity.GetDefault ();

			TermDocs term_docs = reader.TermDocs ();
			Hit hit;

			foreach (Term term in term_list) {

				double idf;
				idf = similarity.Idf (reader.DocFreq (term), reader.MaxDoc ());

				int hit_count;
				hit_count = hits_by_id.Count;

				term_docs.Seek (term);
				while (term_docs.Next () && hit_count > 0) {
					
					int id;
					id = term_docs.Doc ();

					if (hits_by_id.TryGetValue (id, out hit)) {
						double tf;
						tf = similarity.Tf (term_docs.Freq ());
						hit.Score += tf * idf;
						--hit_count;
					}
				}
			}

			term_docs.Close ();
		}
 // constructor
 internal ValueSourceScorer(ValueSourceQuery enclosingInstance, Similarity similarity, IndexReader reader, ValueSourceWeight w)
     : base(similarity)
 {
     InitBlock(enclosingInstance);
     this.weight = w;
     this.qWeight = w.Value;
     // this is when/where the values are first created.
     vals = Enclosing_Instance.valSrc.GetValues(reader);
     termDocs = reader.TermDocs(null);
 }
Exemplo n.º 50
0
		private static ArrayList   FindRecentResults (IndexReader	    primary_reader,
							      IndexReader	    secondary_reader,
							      BetterBitArray	    primary_matches,
							      Dictionary<int, Hit>  hits_by_id,
							      int		    max_results,
							      ref int		    total_number_of_matches,
							      HitFilter		    hit_filter,
							      string		    index_name)
		{
			Stopwatch b = new Stopwatch ();
			b.Start ();
			
			int count = 0;
			Document doc;

			ArrayList all_docs = null;
			TopScores top_docs = null;
			TermDocs term_docs = null;

			if (primary_matches.TrueCount > max_results)
				top_docs = new TopScores (max_results);
			else
				all_docs = new ArrayList (primary_matches.TrueCount);

			if (secondary_reader != null)
				term_docs = secondary_reader.TermDocs ();

			for (int match_index = primary_matches.Count; ; match_index --) {
				// Walk across the matches backwards, since newer
				// documents are more likely to be at the end of
				// the index.
				match_index = primary_matches.GetPreviousTrueIndex (match_index);
				if (match_index < 0)
					break;

				count++;

				doc = primary_reader.Document (match_index, fields_timestamp_uri);

				// Check the timestamp --- if we have already reached our
				// limit, we might be able to reject it immediately.
				string timestamp_str;
				long timestamp_num = 0;

				timestamp_str = doc.Get ("Timestamp");
				if (timestamp_str == null) {
					Logger.Log.Warn ("No timestamp on {0}!", GetUriFromDocument (doc));
				} else {
					timestamp_num = Int64.Parse (doc.Get ("Timestamp"));
					if (top_docs != null && ! top_docs.WillAccept (timestamp_num))
						continue;
				}

				// Get the actual hit now
				// doc was created with only 2 fields, so first get the complete lucene document for primary document.
				// Also run our hit_filter now, if we have one. Since we insist of returning max_results
				// most recent hits, any hits that would be filtered out should happen now and not later.
				Hit hit = CreateHit (primary_reader.Document (match_index), secondary_reader, term_docs);
				if (hit_filter != null && ! hit_filter (hit)) {
					if (Debug)
						Log.Debug ("Filtered out {0}", hit.Uri);
					total_number_of_matches --;
					continue;
				}

				hits_by_id [match_index] = hit;

				// Add the document to the appropriate data structure.
				// We use the timestamp_num as the score, so high
				// scores correspond to more-recent timestamps.
				if (all_docs != null)
					all_docs.Add (hit);
				else
					top_docs.Add (timestamp_num, hit);
			}

			if (term_docs != null)
				term_docs.Close ();

			b.Stop ();

			if (Debug)
				Log.Debug (">>> {0}: Instantiated and scanned {1} documents in {2}", index_name, count, b);

			if (all_docs != null) {
				// Sort results before sending
				all_docs.Sort ();
				return all_docs;
			} else {
				return top_docs.TopScoringObjects;
			}
		}