internal MatchAllScorer(MatchAllDocsQuery enclosingInstance, IndexReader reader, Similarity similarity, Weight w, byte[] norms):base(similarity) { InitBlock(enclosingInstance); this.termDocs = reader.TermDocs(null); score = w.Value; this.norms = norms; }
internal MatchAllScorer(MatchAllDocsQuery enclosingInstance, IndexReader reader, Similarity similarity, Weight w, byte[] norms, IState state) : base(similarity) { InitBlock(enclosingInstance); this.termDocs = reader.TermDocs(null, state); score = w.Value; this.norms = norms; }
protected internal override string[] CreateValue(IndexReader reader, Entry entryKey, IState state) { System.String field = StringHelper.Intern(entryKey.field); System.String[] retArray = new System.String[reader.MaxDoc]; TermDocs termDocs = reader.TermDocs(state); TermEnum termEnum = reader.Terms(new Term(field), state); try { do { Term term = termEnum.Term; if (term == null || (System.Object)term.Field != (System.Object)field) { break; } System.String termval = term.Text; termDocs.Seek(termEnum, state); while (termDocs.Next(state)) { retArray[termDocs.Doc] = termval; } }while (termEnum.Next(state)); } finally { termDocs.Close(); termEnum.Close(); } return(retArray); }
protected internal override float[] CreateValue(IndexReader reader, Entry entryKey, IState state) { Entry entry = entryKey; System.String field = entry.field; FloatParser parser = (FloatParser)entry.custom; if (parser == null) { try { return(wrapper.GetFloats(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_FLOAT_PARSER, state)); } catch (System.FormatException) { return(wrapper.GetFloats(reader, field, Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_FLOAT_PARSER, state)); } } float[] retArray = null; TermDocs termDocs = reader.TermDocs(state); TermEnum termEnum = reader.Terms(new Term(field), state); try { do { Term term = termEnum.Term; if (term == null || (System.Object)term.Field != (System.Object)field) { break; } float termval = parser.ParseFloat(term.Text); if (retArray == null) { // late init retArray = new float[reader.MaxDoc]; } termDocs.Seek(termEnum, state); while (termDocs.Next(state)) { retArray[termDocs.Doc] = termval; } }while (termEnum.Next(state)); } catch (StopFillCacheException) { } finally { termDocs.Close(); termEnum.Close(); } if (retArray == null) { // no values retArray = new float[reader.MaxDoc]; } return(retArray); }
// constructor internal ValueSourceScorer(ValueSourceQuery enclosingInstance, Similarity similarity, IndexReader reader, ValueSourceWeight w) : base(similarity) { InitBlock(enclosingInstance); this.weight = w; this.qWeight = w.Value; // this is when/where the values are first created. vals = Enclosing_Instance.valSrc.GetValues(reader); termDocs = reader.TermDocs(null); }
public virtual void TestMutipleDocument() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new KeywordAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); Document doc = new Document(); doc.Add(new Field("partnum", "Q36", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); doc = new Document(); doc.Add(new Field("partnum", "Q37", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); writer.Close(); IndexReader reader = IndexReader.Open((Directory)dir, true, null); TermDocs td = reader.TermDocs(new Term("partnum", "Q36"), null); Assert.IsTrue(td.Next(null)); td = reader.TermDocs(new Term("partnum", "Q37"), null); Assert.IsTrue(td.Next(null)); }
public override BitArray Bits(IndexReader reader) { BitArray bitArray = new BitArray(reader.MaxDoc()); TermDocs termDocs = reader.TermDocs(new Term("score", "5")); while (termDocs.Next()) { bitArray.Set(termDocs.Doc(), true); } return bitArray; }
public static int Count(Term t, IndexReader r) { int count = 0; TermDocs td = r.TermDocs(t); while (td.Next()) { td.Doc(); count++; } td.Close(); return count; }
private void VerifyTermDocs(Directory dir, Term term, int numDocs) { IndexReader reader = IndexReader.Open(dir); TermDocs termDocs = reader.TermDocs(term); int count = 0; while (termDocs.Next()) { count++; } Assert.AreEqual(numDocs, count); reader.Close(); }
public static int Count(Term t, IndexReader r) { int count = 0; TermDocs td = r.TermDocs(t, null); while (td.Next(null)) { var d = td.Doc; count++; } td.Close(); return(count); }
public override DocIdSet GetDocIdSet(IndexReader reader) { OpenBitSet bitSet = new OpenBitSet(reader.NumDocs()); TermDocs termDocs = reader.TermDocs(new Term("TenantId", _tenantId)); while (termDocs.Next()) { if (termDocs.Freq > 0) { bitSet.Set(termDocs.Doc); } } return bitSet; }
public static int Count(Term t, IndexReader r) { int count = 0; TermDocs td = r.TermDocs(t); while (td.Next()) { td.Doc(); count++; } td.Close(); return(count); }
public virtual void TestAllTermDocs() { IndexReader reader = OpenReader(); int NUM_DOCS = 2; TermDocs td = reader.TermDocs(null); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(td.Next()); Assert.AreEqual(i, td.Doc()); Assert.AreEqual(1, td.Freq()); } td.Close(); reader.Close(); }
protected internal override short[] CreateValue(IndexReader reader, Entry entryKey, IState state) { Entry entry = entryKey; System.String field = entry.field; ShortParser parser = (ShortParser)entry.custom; if (parser == null) { return(wrapper.GetShorts(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_SHORT_PARSER, state)); } short[] retArray = new short[reader.MaxDoc]; TermDocs termDocs = reader.TermDocs(state); TermEnum termEnum = reader.Terms(new Term(field), state); try { do { Term term = termEnum.Term; if (term == null || (System.Object)term.Field != (System.Object)field) { break; } short termval = parser.ParseShort(term.Text); termDocs.Seek(termEnum, state); while (termDocs.Next(state)) { retArray[termDocs.Doc] = termval; } }while (termEnum.Next(state)); } catch (StopFillCacheException) { } finally { termDocs.Close(); termEnum.Close(); } return(retArray); }
/// <summary> /// Get the DocIdSet. /// </summary> /// <param name="reader">Applcible reader.</param> /// <returns>The set.</returns> public override DocIdSet GetDocIdSet(IndexReader reader) { OpenBitSet result = new OpenBitSet(reader.MaxDoc()); TermDocs td = reader.TermDocs(); try { foreach (Term t in this.terms) { td.Seek(t); while (td.Next()) { result.Set(td.Doc()); } } } finally { td.Close(); } return result; }
private static Dictionary<string, int[]> FillCache(IndexReader reader, int docBase, string field) { using (var termDocs = reader.TermDocs()) { var items = new Dictionary<string, int[]>(); var docsForTerm = new List<int>(); using (var termEnum = reader.Terms(new Term(field))) { do { if (termEnum.Term == null || field != termEnum.Term.Field) break; Term term = termEnum.Term; if (LowPrecisionNumber(term.Field, term.Text)) continue; var totalDocCountIncludedDeletes = termEnum.DocFreq(); termDocs.Seek(termEnum.Term); while (termDocs.Next() && totalDocCountIncludedDeletes > 0) { var curDoc = termDocs.Doc; totalDocCountIncludedDeletes -= 1; if (reader.IsDeleted(curDoc)) continue; docsForTerm.Add(curDoc + docBase); } docsForTerm.Sort(); items[term.Text] = docsForTerm.ToArray(); docsForTerm.Clear(); } while (termEnum.Next()); } return items; } }
public override DocIdSetIterator Iterator() { // Synchronization needed because deleted docs BitVector // can change after call to hasDeletions until TermDocs creation. // We only use an iterator with termDocs, when this was requested (e.g. range contains 0) // and the index has deletions TermDocs termDocs; lock (reader) { termDocs = IsCacheable ? null : reader.TermDocs(null); } if (termDocs != null) { // a DocIdSetIterator using TermDocs to iterate valid docIds return(new AnonymousClassDocIdSetIterator(termDocs, this)); } else { // a DocIdSetIterator generating docIds by incrementing a variable - // this one can be used if there are no deletions are on the index return(new AnonymousClassDocIdSetIterator1(this)); } }
public override DocIdSet GetDocIdSet(IndexReader reader) { var bits = new OpenBitSet(reader.MaxDoc()); TermDocs termDocs = reader.TermDocs(); List<double> area = _shape.Area; int sz = area.Count; // iterate through each boxid for (int i = 0; i < sz; i++) { double boxId = area[i]; termDocs.Seek(new Term(_fieldName, NumericUtils.DoubleToPrefixCoded(boxId))); // iterate through all documents // which have this boxId while (termDocs.Next()) { bits.FastSet(termDocs.Doc()); } } return bits; }
public static void VerifyEquals(IndexReader r1, IndexReader r2, System.String idField) { Assert.AreEqual(r1.NumDocs(), r2.NumDocs()); bool hasDeletes = !(r1.MaxDoc() == r2.MaxDoc() && r1.NumDocs() == r1.MaxDoc()); int[] r2r1 = new int[r2.MaxDoc()]; // r2 id to r1 id mapping TermDocs termDocs1 = r1.TermDocs(); TermDocs termDocs2 = r2.TermDocs(); // create mapping from id2 space to id2 based on idField idField = StringHelper.Intern(idField); TermEnum termEnum = r1.Terms(new Term(idField, "")); do { Term term = termEnum.Term(); if (term == null || (System.Object) term.Field() != (System.Object) idField) break; termDocs1.Seek(termEnum); if (!termDocs1.Next()) { // This doc is deleted and wasn't replaced termDocs2.Seek(termEnum); Assert.IsFalse(termDocs2.Next()); continue; } int id1 = termDocs1.Doc(); Assert.IsFalse(termDocs1.Next()); termDocs2.Seek(termEnum); Assert.IsTrue(termDocs2.Next()); int id2 = termDocs2.Doc(); Assert.IsFalse(termDocs2.Next()); r2r1[id2] = id1; // verify stored fields are equivalent try { VerifyEquals(r1.Document(id1), r2.Document(id2)); } catch (System.Exception t) { System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term); System.Console.Out.WriteLine(" d1=" + r1.Document(id1)); System.Console.Out.WriteLine(" d2=" + r2.Document(id2)); throw t; } try { // verify term vectors are equivalent VerifyEquals(r1.GetTermFreqVectors(id1), r2.GetTermFreqVectors(id2)); } catch (System.Exception e) { System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2); TermFreqVector[] tv1 = r1.GetTermFreqVectors(id1); System.Console.Out.WriteLine(" d1=" + tv1); if (tv1 != null) for (int i = 0; i < tv1.Length; i++) { System.Console.Out.WriteLine(" " + i + ": " + tv1[i]); } TermFreqVector[] tv2 = r2.GetTermFreqVectors(id2); System.Console.Out.WriteLine(" d2=" + tv2); if (tv2 != null) for (int i = 0; i < tv2.Length; i++) { System.Console.Out.WriteLine(" " + i + ": " + tv2[i]); } throw e; } } while (termEnum.Next()); termEnum.Close(); // Verify postings TermEnum termEnum1 = r1.Terms(new Term("", "")); TermEnum termEnum2 = r2.Terms(new Term("", "")); // pack both doc and freq into single element for easy sorting long[] info1 = new long[r1.NumDocs()]; long[] info2 = new long[r2.NumDocs()]; for (; ; ) { Term term1, term2; // iterate until we get some docs int len1; for (; ; ) { len1 = 0; term1 = termEnum1.Term(); if (term1 == null) break; termDocs1.Seek(termEnum1); while (termDocs1.Next()) { int d1 = termDocs1.Doc(); int f1 = termDocs1.Freq(); info1[len1] = (((long) d1) << 32) | f1; len1++; } if (len1 > 0) break; if (!termEnum1.Next()) break; } // iterate until we get some docs int len2; for (; ; ) { len2 = 0; term2 = termEnum2.Term(); if (term2 == null) break; termDocs2.Seek(termEnum2); while (termDocs2.Next()) { int d2 = termDocs2.Doc(); int f2 = termDocs2.Freq(); info2[len2] = (((long) r2r1[d2]) << 32) | f2; len2++; } if (len2 > 0) break; if (!termEnum2.Next()) break; } if (!hasDeletes) Assert.AreEqual(termEnum1.DocFreq(), termEnum2.DocFreq()); Assert.AreEqual(len1, len2); if (len1 == 0) break; // no more terms Assert.AreEqual(term1, term2); // sort info2 to get it into ascending docid System.Array.Sort(info2, 0, len2 - 0); // now compare for (int i = 0; i < len1; i++) { Assert.AreEqual(info1[i], info2[i]); } termEnum1.Next(); termEnum2.Next(); } }
private int CheckDbAndIndex(DbDataReader dbreader, IndexReader ixreader, List<Difference> result) { var versionId = dbreader.GetInt32(0); var dbNodeTimestamp = dbreader.GetInt64(1); var dbVersionTimestamp = dbreader.GetInt64(2); var termDocs = ixreader.TermDocs(new Lucene.Net.Index.Term(LucObject.FieldName.VersionId, Lucene.Net.Util.NumericUtils.IntToPrefixCoded(versionId))); Lucene.Net.Documents.Document doc = null; int docid = -1; if (termDocs.Next()) { docid = termDocs.Doc(); doc = ixreader.Document(docid); var indexNodeTimestamp = ParseLong(doc.Get(LucObject.FieldName.NodeTimestamp)); var indexVersionTimestamp = ParseLong(doc.Get(LucObject.FieldName.VersionTimestamp)); var nodeId = ParseInt(doc.Get(LucObject.FieldName.NodeId)); var version = doc.Get(LucObject.FieldName.Version); var p = doc.Get(LucObject.FieldName.Path); if (termDocs.Next()) { result.Add(new Difference(IndexDifferenceKind.MoreDocument) { DocId = docid, NodeId = nodeId, VersionId = versionId, Version = version, Path = p, DbNodeTimestamp = dbNodeTimestamp, DbVersionTimestamp = dbVersionTimestamp, IxNodeTimestamp = indexNodeTimestamp, IxVersionTimestamp = indexVersionTimestamp, }); } if (dbVersionTimestamp != indexVersionTimestamp) { result.Add(new Difference(IndexDifferenceKind.DifferentVersionTimestamp) { DocId = docid, VersionId = versionId, DbNodeTimestamp = dbNodeTimestamp, DbVersionTimestamp = dbVersionTimestamp, IxNodeTimestamp = indexNodeTimestamp, IxVersionTimestamp = indexVersionTimestamp, NodeId = nodeId, Version = version, Path = p }); } if (dbNodeTimestamp != indexNodeTimestamp) { var ok = false; var isLastDraft = doc.Get(LucObject.FieldName.IsLastDraft); if (isLastDraft != BooleanIndexHandler.YES) { var latestDocs = ixreader.TermDocs(new Lucene.Net.Index.Term(LucObject.FieldName.NodeId, Lucene.Net.Util.NumericUtils.IntToPrefixCoded(nodeId))); Lucene.Net.Documents.Document latestDoc = null; while (latestDocs.Next()) { var latestdocid = latestDocs.Doc(); var d = ixreader.Document(latestdocid); if (d.Get(LucObject.FieldName.IsLastDraft) != BooleanIndexHandler.YES) continue; latestDoc = d; break; } var latestPath = latestDoc.Get(LucObject.FieldName.Path); if (latestPath == p) ok = true; } if (!ok) { result.Add(new Difference(IndexDifferenceKind.DifferentNodeTimestamp) { DocId = docid, VersionId = versionId, DbNodeTimestamp = dbNodeTimestamp, DbVersionTimestamp = dbVersionTimestamp, IxNodeTimestamp = indexNodeTimestamp, IxVersionTimestamp = indexVersionTimestamp, NodeId = nodeId, Version = version, Path = p }); } } } else { result.Add(new Difference(IndexDifferenceKind.NotInIndex) { DocId = docid, VersionId = versionId, DbNodeTimestamp = dbNodeTimestamp, DbVersionTimestamp = dbVersionTimestamp, }); } return docid; }
private static void FillCache(IndexSearcherHolder.IndexSearcherHoldingState state, List<string> fieldsToRead,IndexReader reader) { foreach (var field in fieldsToRead) { using (var termDocs = reader.TermDocs()) { using (var termEnum = reader.Terms(new Term(field))) { do { if (termEnum.Term == null || field != termEnum.Term.Field) break; if (LowPrecisionNumber(termEnum.Term)) continue; var totalDocCountIncludedDeletes = termEnum.DocFreq(); termDocs.Seek(termEnum.Term); while (termDocs.Next() && totalDocCountIncludedDeletes > 0) { totalDocCountIncludedDeletes -= 1; if (reader.IsDeleted(termDocs.Doc)) continue; state.SetInCache(field, termDocs.Doc, termEnum.Term); } } while (termEnum.Next()); } } } }
private static void FillCache(IndexSearcherHolder.IndexSearcherHoldingState state, IEnumerable<string> fieldsToRead,IndexReader reader) { foreach (var field in fieldsToRead) { var items = new LinkedList<IndexSearcherHolder.IndexSearcherHoldingState.CacheVal>[reader.MaxDoc]; using (var termDocs = reader.TermDocs()) { using (var termEnum = reader.Terms(new Term(field))) { do { if (termEnum.Term == null || field != termEnum.Term.Field) break; Term term = termEnum.Term; if (LowPrecisionNumber(term.Field, term.Text)) continue; var totalDocCountIncludedDeletes = termEnum.DocFreq(); termDocs.Seek(termEnum.Term); while (termDocs.Next() && totalDocCountIncludedDeletes > 0) { totalDocCountIncludedDeletes -= 1; if (reader.IsDeleted(termDocs.Doc)) continue; if(items[termDocs.Doc] == null) items[termDocs.Doc] = new LinkedList<IndexSearcherHolder.IndexSearcherHoldingState.CacheVal>(); items[termDocs.Doc].AddLast(new IndexSearcherHolder.IndexSearcherHoldingState.CacheVal { Term = termEnum.Term }); } } while (termEnum.Next()); } } state.SetInCache(field, items); } }
// Apply buffered delete terms to the segment just flushed from ram // apply appropriately so that a delete term is only applied to // the documents buffered before it, not those buffered after it. private void ApplyDeletesSelectively(System.Collections.Hashtable deleteTerms, System.Collections.IList deleteIds, IndexReader reader) { System.Collections.IEnumerator iter = new System.Collections.Hashtable(deleteTerms).GetEnumerator(); while (iter.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current; Term term = (Term) entry.Key; TermDocs docs = reader.TermDocs(term); if (docs != null) { int num = ((DocumentsWriter.Num) entry.Value).GetNum(); try { while (docs.Next()) { int doc = docs.Doc(); if (doc >= num) { break; } reader.DeleteDocument(doc); } } finally { docs.Close(); } } } if (deleteIds.Count > 0) { iter = deleteIds.GetEnumerator(); while (iter.MoveNext()) { reader.DeleteDocument(((System.Int32) iter.Current)); } } }
/// <summary> /// loads multi-value facet data. This method uses a workarea to prepare loading. /// </summary> /// <param name="fieldName"></param> /// <param name="reader"></param> /// <param name="listFactory"></param> /// <param name="workArea"></param> public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory, BoboIndexReader.WorkArea workArea) { long t0 = Environment.TickCount; int maxdoc = reader.MaxDoc; BigNestedIntArray.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea); TermEnum tenum = null; TermDocs tdoc = null; ITermValueList list = (listFactory == null ? (ITermValueList)new TermStringList() : listFactory.CreateTermList()); List<int> minIDList = new List<int>(); List<int> maxIDList = new List<int>(); List<int> freqList = new List<int>(); OpenBitSet bitset = new OpenBitSet(); int negativeValueCount = GetNegativeValueCount(reader, string.Intern(fieldName)); int t = 0; // current term number list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); t++; _overflow = false; try { tdoc = reader.TermDocs(); tenum = reader.Terms(new Term(fieldName, "")); if (tenum != null) { do { Term term = tenum.Term; if (term == null || !fieldName.Equals(term.Field)) break; string val = term.Text; if (val != null) { list.Add(val); tdoc.Seek(tenum); //freqList.add(tenum.docFreq()); // removed because the df doesn't take into account the num of deletedDocs int df = 0; int minID = -1; int maxID = -1; int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; if (tdoc.Next()) { df++; int docid = tdoc.Doc; if (!loader.Add(docid, valId)) LogOverflow(fieldName); minID = docid; bitset.Set(docid); while (tdoc.Next()) { df++; docid = tdoc.Doc; if (!loader.Add(docid, valId)) LogOverflow(fieldName); bitset.Set(docid); } maxID = docid; } freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); } t++; } while (tenum.Next()); } } finally { try { if (tdoc != null) { tdoc.Dispose(); } } finally { if (tenum != null) { tenum.Dispose(); } } } list.Seal(); try { _nestedArray.Load(maxdoc + 1, loader); } catch (System.IO.IOException e) { throw e; } catch (Exception e) { throw new RuntimeException("failed to load due to " + e.ToString(), e); } this.valArray = list; this.freqs = freqList.ToArray(); this.minIDs = minIDList.ToArray(); this.maxIDs = maxIDList.ToArray(); int doc = 0; while (doc <= maxdoc && !_nestedArray.Contains(doc, 0, true)) { ++doc; } if (doc <= maxdoc) { this.minIDs[0] = doc; doc = maxdoc; while (doc > 0 && !_nestedArray.Contains(doc, 0, true)) { --doc; } if (doc > 0) { this.maxIDs[0] = doc; } } this.freqs[0] = maxdoc + 1 - (int)bitset.Cardinality(); }
public virtual void testSkipTo(int indexDivisor) { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Term ta = new Term("content", "aaa"); for (int i = 0; i < 10; i++) { AddDoc(writer, "aaa aaa aaa aaa"); } Term tb = new Term("content", "bbb"); for (int i = 0; i < 16; i++) { AddDoc(writer, "bbb bbb bbb bbb"); } Term tc = new Term("content", "ccc"); for (int i = 0; i < 50; i++) { AddDoc(writer, "ccc ccc ccc ccc"); } // assure that we deal with a single segment writer.Optimize(); writer.Close(); IndexReader reader = IndexReader.Open(dir, null, true, indexDivisor); TermDocs tdocs = reader.TermDocs(); // without optimization (assumption skipInterval == 16) // with next tdocs.Seek(ta); Assert.IsTrue(tdocs.Next()); Assert.AreEqual(0, tdocs.Doc()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.Next()); Assert.AreEqual(1, tdocs.Doc()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.SkipTo(0)); Assert.AreEqual(2, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(4)); Assert.AreEqual(4, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(9)); Assert.AreEqual(9, tdocs.Doc()); Assert.IsFalse(tdocs.SkipTo(10)); // without next tdocs.Seek(ta); Assert.IsTrue(tdocs.SkipTo(0)); Assert.AreEqual(0, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(4)); Assert.AreEqual(4, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(9)); Assert.AreEqual(9, tdocs.Doc()); Assert.IsFalse(tdocs.SkipTo(10)); // exactly skipInterval documents and therefore with optimization // with next tdocs.Seek(tb); Assert.IsTrue(tdocs.Next()); Assert.AreEqual(10, tdocs.Doc()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.Next()); Assert.AreEqual(11, tdocs.Doc()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.SkipTo(5)); Assert.AreEqual(12, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(15)); Assert.AreEqual(15, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(24)); Assert.AreEqual(24, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(25)); Assert.AreEqual(25, tdocs.Doc()); Assert.IsFalse(tdocs.SkipTo(26)); // without next tdocs.Seek(tb); Assert.IsTrue(tdocs.SkipTo(5)); Assert.AreEqual(10, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(15)); Assert.AreEqual(15, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(24)); Assert.AreEqual(24, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(25)); Assert.AreEqual(25, tdocs.Doc()); Assert.IsFalse(tdocs.SkipTo(26)); // much more than skipInterval documents and therefore with optimization // with next tdocs.Seek(tc); Assert.IsTrue(tdocs.Next()); Assert.AreEqual(26, tdocs.Doc()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.Next()); Assert.AreEqual(27, tdocs.Doc()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.SkipTo(5)); Assert.AreEqual(28, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(40)); Assert.AreEqual(40, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(57)); Assert.AreEqual(57, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(74)); Assert.AreEqual(74, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(75)); Assert.AreEqual(75, tdocs.Doc()); Assert.IsFalse(tdocs.SkipTo(76)); //without next tdocs.Seek(tc); Assert.IsTrue(tdocs.SkipTo(5)); Assert.AreEqual(26, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(40)); Assert.AreEqual(40, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(57)); Assert.AreEqual(57, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(74)); Assert.AreEqual(74, tdocs.Doc()); Assert.IsTrue(tdocs.SkipTo(75)); Assert.AreEqual(75, tdocs.Doc()); Assert.IsFalse(tdocs.SkipTo(76)); tdocs.Close(); reader.Close(); dir.Close(); }
private void AssertTermDocsCount(System.String msg, IndexReader reader, Term term, int expected) { TermDocs tdocs = null; try { tdocs = reader.TermDocs(term); Assert.IsNotNull(tdocs, msg + ", null TermDocs"); int count = 0; while (tdocs.Next()) { count++; } Assert.AreEqual(expected, count, msg + ", count mismatch"); } finally { if (tdocs != null) tdocs.Close(); } }
public MatchAllDocIdSetIterator(IndexReader reader) { _termDocs = reader.TermDocs(null); _docid = -1; }
public override void Load(string fieldName, IndexReader reader, TermListFactory listFactory, BoboIndexReader.WorkArea workArea) { long t0 = System.Environment.TickCount; int maxdoc = reader.MaxDoc; BigNestedIntArray.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea); BigNestedIntArray.BufferedLoader weightLoader = GetBufferedLoader(maxdoc, null); TermEnum tenum = null; TermDocs tdoc = null; var list = (listFactory == null ? new TermStringList() : listFactory.CreateTermList()); List<int> minIDList = new List<int>(); List<int> maxIDList = new List<int>(); List<int> freqList = new List<int>(); OpenBitSet bitset = new OpenBitSet(maxdoc + 1); int negativeValueCount = GetNegativeValueCount(reader, string.Intern(fieldName)); int t = 0; // current term number list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); t++; _overflow = false; string pre = null; int df = 0; int minID = -1; int maxID = -1; int valId = 0; try { tdoc = reader.TermDocs(); tenum = reader.Terms(new Term(fieldName, "")); if (tenum != null) { do { Term term = tenum.Term; if (term == null || !fieldName.Equals(term.Field)) break; string val = term.Text; if (val != null) { int weight = 0; string[] split = val.Split(new char[] { '\0' }, StringSplitOptions.RemoveEmptyEntries); if (split.Length > 1) { val = split[0]; weight = int.Parse(split[split.Length - 1]); } if (pre == null || !val.Equals(pre)) { if (pre != null) { freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); } list.Add(val); df = 0; minID = -1; maxID = -1; valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; t++; } tdoc.Seek(tenum); if (tdoc.Next()) { df++; int docid = tdoc.Doc; if (!loader.Add(docid, valId)) LogOverflow(fieldName); else weightLoader.Add(docid, weight); if (docid < minID) minID = docid; bitset.FastSet(docid); while (tdoc.Next()) { df++; docid = tdoc.Doc; if (!loader.Add(docid, valId)) LogOverflow(fieldName); else weightLoader.Add(docid, weight); bitset.FastSet(docid); } if (docid > maxID) maxID = docid; } pre = val; } } while (tenum.Next()); if (pre != null) { freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); } } } finally { try { if (tdoc != null) { tdoc.Dispose(); } } finally { if (tenum != null) { tenum.Dispose(); } } } list.Seal(); try { _nestedArray.Load(maxdoc + 1, loader); _weightArray.Load(maxdoc + 1, weightLoader); } catch (System.IO.IOException e) { throw e; } catch (Exception e) { throw new RuntimeException("failed to load due to " + e.ToString(), e); } this.valArray = list; this.freqs = freqList.ToArray(); this.minIDs = minIDList.ToArray(); this.maxIDs = maxIDList.ToArray(); int doc = 0; while (doc <= maxdoc && !_nestedArray.Contains(doc, 0, true)) { ++doc; } if (doc <= maxdoc) { this.minIDs[0] = doc; doc = maxdoc; while (doc > 0 && !_nestedArray.Contains(doc, 0, true)) { --doc; } if (doc > 0) { this.maxIDs[0] = doc; } } this.freqs[0] = maxdoc + 1 - (int)bitset.Cardinality(); }
public override TermDocs TermDocs() { return(in_Renamed.TermDocs()); }
public override TermDocs TermDocs(IState state) { EnsureOpen(); return(in_Renamed.TermDocs(state)); }
public virtual void Seek(Term term) { IndexReader reader = ((IndexReader)Enclosing_Instance.fieldToReader[term.Field()]); termDocs = reader != null?reader.TermDocs(term) : null; }
public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory) { string field = string.Intern(fieldName); int maxDoc = reader.MaxDoc; if (orderArray == null) // we want to reuse the memory { orderArray = NewInstance(termCountSize, maxDoc); } else { orderArray.EnsureCapacity(maxDoc); // no need to fill to 0, we are reseting the data anyway } List<int> minIDList = new List<int>(); List<int> maxIDList = new List<int>(); List<int> freqList = new List<int>(); int length = maxDoc + 1; ITermValueList list = listFactory == null ? new TermStringList() : listFactory.CreateTermList(); TermDocs termDocs = reader.TermDocs(); TermEnum termEnum = reader.Terms(new Term(field)); int t = 0; // current term number list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); //int df = 0; t++; try { do { Term term = termEnum.Term; if (term == null || string.CompareOrdinal(term.Field, field) != 0) break; if (t >= orderArray.MaxValue()) { throw new System.IO.IOException("maximum number of value cannot exceed: " + orderArray.MaxValue()); } // Alexey: well, we could get now more than one term per document. Effectively, we could build facet againsts tokenized field /*// we expect that there is at most one term per document if (t >= length) { throw new RuntimeException("there are more terms than " + "documents in field \"" + field + "\", but it's impossible to sort on " + "tokenized fields"); }*/ // store term text list.Add(term.Text); termDocs.Seek(termEnum); // freqList.add(termEnum.docFreq()); // doesn't take into account deldocs int minID = -1; int maxID = -1; int df = 0; if (termDocs.Next()) { df++; int docid = termDocs.Doc; orderArray.Add(docid, t); minID = docid; while (termDocs.Next()) { df++; docid = termDocs.Doc; orderArray.Add(docid, t); } maxID = docid; } freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); t++; } while (termEnum.Next()); } finally { termDocs.Dispose(); termEnum.Dispose(); } list.Seal(); this.valArray = list; this.freqs = freqList.ToArray(); this.minIDs = minIDList.ToArray(); this.maxIDs = maxIDList.ToArray(); }
protected internal virtual TermDocs TermDocs(IndexReader reader) { return(reader.TermDocs()); }
// Apply buffered delete terms, queries and docIDs to the // provided reader private bool ApplyDeletes(IndexReader reader, int docIDStart) { lock (this) { int docEnd = docIDStart + reader.MaxDoc(); bool any = false; System.Diagnostics.Debug.Assert(CheckDeleteTerm(null)); // Delete by term TermDocs docs = reader.TermDocs(); try { foreach(KeyValuePair<Term,BufferedDeletes.Num> entry in deletesFlushed.terms) { Term term = entry.Key; // LUCENE-2086: we should be iterating a TreeMap, // here, so terms better be in order: System.Diagnostics.Debug.Assert(CheckDeleteTerm(term)); docs.Seek(term); int limit = entry.Value.GetNum(); while (docs.Next()) { int docID = docs.Doc(); if (docIDStart + docID >= limit) break; reader.DeleteDocument(docID); any = true; } } } finally { docs.Close(); } // Delete by docID foreach(int docID in deletesFlushed.docIDs) { if (docID >= docIDStart && docID < docEnd) { reader.DeleteDocument(docID - docIDStart); any = true; } } // Delete by query IndexSearcher searcher = new IndexSearcher(reader); foreach(KeyValuePair<Query,int> entry in new Support.Dictionary<Query,int>(deletesFlushed.queries)) { Query query = entry.Key; int limit = entry.Value; Weight weight = query.Weight(searcher); Scorer scorer = weight.Scorer(reader, true, false); if (scorer != null) { while (true) { int doc = scorer.NextDoc(); if (((long) docIDStart) + doc >= limit) break; reader.DeleteDocument(doc); any = true; } } } searcher.Close(); return any; } }
protected internal virtual TermDocs TermDocs(IndexReader reader) { return reader.TermDocs(); }
// There are two ways we can determine the max_results // most recent items: // // One is to instantiate Lucene documents for each of // the document IDs in primary_matches. This is a // fairly expensive operation. // // The other is to walk through the list of all // document IDs in descending time order. This is // a less expensive operation, but adds up over time // on large data sets. // // We can walk about 2.5 docs for every Document we // instantiate. So what we'll do, if we have more // matches than available hits, is walk (m * 1.25) // docs to see if we can fill out the top 100 hits. // If not, we'll fall back to creating documents // for all of them. private static ArrayList ScanRecentDocs (IndexReader primary_reader, IndexReader secondary_reader, BetterBitArray primary_matches, Dictionary<int, Hit> hits_by_id, int max_results, ref int total_number_of_matches, HitFilter hit_filter, string index_name) { Stopwatch a = new Stopwatch (); a.Start (); TermDocs docs = primary_reader.TermDocs (); TermEnum enumerator = primary_reader.Terms (new Term ("InvertedTimestamp", String.Empty)); ArrayList results = new ArrayList (max_results); int docs_found = 0; int docs_walked = 0; int hit_filter_removed = 0; int max_docs = (int) (primary_matches.TrueCount * 1.25); Term term; TermDocs secondary_term_docs = null; if (secondary_reader != null) secondary_term_docs = secondary_reader.TermDocs (); do { term = enumerator.Term (); if (term.Field () != "InvertedTimestamp") break; docs.Seek (enumerator); while (docs.Next () && docs_found < max_results && docs_walked < max_docs) { int doc_id = docs.Doc (); if (primary_matches.Get (doc_id)) { Document doc = primary_reader.Document (doc_id); Hit hit = CreateHit (doc, secondary_reader, secondary_term_docs); // If we have a HitFilter, apply it. if (hit_filter != null && ! hit_filter (hit)) { if (Debug) Log.Debug ("Filtered out {0}", hit.Uri); hit_filter_removed ++; continue; } hits_by_id [doc_id] = hit; // Add the result, last modified first results.Add (hit); docs_found++; } docs_walked++; } } while (enumerator.Next () && docs_found < max_results && docs_walked < max_docs); docs.Close (); if (secondary_term_docs != null) secondary_term_docs.Close (); // If we've found all the docs we can return in a subset! // Fantastic, we've probably short circuited a slow search. if (docs_found != max_results) { // Otherwise bad luck! Not all docs found // Start afresh - this time traversing all results results = null; } else { // Adjust total_number_of_matches. We need to do this to avoid scenarios like the following: // max_hits = 100. Matched 100 results. But hit filter removed 30. So 70 results will be returned. // We want to avoid saying "Showing top 70 of 100". Note that since we are not passing // every document in the index through the hit_filter, when we say "Showing top 100 of 1234", the // 1234 could actually be much less. But since max_hits was 100, that will not mislead the user. total_number_of_matches -= hit_filter_removed; } a.Stop (); if (Debug) { Log.Debug (">>> {0}: Walked {1} items, populated an enum with {2} items in {3}", index_name, docs_walked, docs_found, a); if (docs_found == max_results) Log.Debug (">>> {0}: Successfully short circuited timestamp ordering!", index_name); } return results; }
public static void VerifyEquals(IndexReader r1, IndexReader r2, System.String idField) { Assert.AreEqual(r1.NumDocs(), r2.NumDocs()); bool hasDeletes = !(r1.MaxDoc() == r2.MaxDoc() && r1.NumDocs() == r1.MaxDoc()); int[] r2r1 = new int[r2.MaxDoc()]; // r2 id to r1 id mapping TermDocs termDocs1 = r1.TermDocs(); TermDocs termDocs2 = r2.TermDocs(); // create mapping from id2 space to id2 based on idField idField = StringHelper.Intern(idField); TermEnum termEnum = r1.Terms(new Term(idField, "")); do { Term term = termEnum.Term(); if (term == null || (System.Object)term.Field() != (System.Object)idField) { break; } termDocs1.Seek(termEnum); if (!termDocs1.Next()) { // This doc is deleted and wasn't replaced termDocs2.Seek(termEnum); Assert.IsFalse(termDocs2.Next()); continue; } int id1 = termDocs1.Doc(); Assert.IsFalse(termDocs1.Next()); termDocs2.Seek(termEnum); Assert.IsTrue(termDocs2.Next()); int id2 = termDocs2.Doc(); Assert.IsFalse(termDocs2.Next()); r2r1[id2] = id1; // verify stored fields are equivalent try { VerifyEquals(r1.Document(id1), r2.Document(id2)); } catch (System.Exception t) { System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term); System.Console.Out.WriteLine(" d1=" + r1.Document(id1)); System.Console.Out.WriteLine(" d2=" + r2.Document(id2)); throw t; } try { // verify term vectors are equivalent VerifyEquals(r1.GetTermFreqVectors(id1), r2.GetTermFreqVectors(id2)); } catch (System.Exception e) { System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2); TermFreqVector[] tv1 = r1.GetTermFreqVectors(id1); System.Console.Out.WriteLine(" d1=" + tv1); if (tv1 != null) { for (int i = 0; i < tv1.Length; i++) { System.Console.Out.WriteLine(" " + i + ": " + tv1[i]); } } TermFreqVector[] tv2 = r2.GetTermFreqVectors(id2); System.Console.Out.WriteLine(" d2=" + tv2); if (tv2 != null) { for (int i = 0; i < tv2.Length; i++) { System.Console.Out.WriteLine(" " + i + ": " + tv2[i]); } } throw e; } }while (termEnum.Next()); termEnum.Close(); // Verify postings TermEnum termEnum1 = r1.Terms(new Term("", "")); TermEnum termEnum2 = r2.Terms(new Term("", "")); // pack both doc and freq into single element for easy sorting long[] info1 = new long[r1.NumDocs()]; long[] info2 = new long[r2.NumDocs()]; for (; ;) { Term term1, term2; // iterate until we get some docs int len1; for (; ;) { len1 = 0; term1 = termEnum1.Term(); if (term1 == null) { break; } termDocs1.Seek(termEnum1); while (termDocs1.Next()) { int d1 = termDocs1.Doc(); int f1 = termDocs1.Freq(); info1[len1] = (((long)d1) << 32) | f1; len1++; } if (len1 > 0) { break; } if (!termEnum1.Next()) { break; } } // iterate until we get some docs int len2; for (; ;) { len2 = 0; term2 = termEnum2.Term(); if (term2 == null) { break; } termDocs2.Seek(termEnum2); while (termDocs2.Next()) { int d2 = termDocs2.Doc(); int f2 = termDocs2.Freq(); info2[len2] = (((long)r2r1[d2]) << 32) | f2; len2++; } if (len2 > 0) { break; } if (!termEnum2.Next()) { break; } } if (!hasDeletes) { Assert.AreEqual(termEnum1.DocFreq(), termEnum2.DocFreq()); } Assert.AreEqual(len1, len2); if (len1 == 0) { break; // no more terms } Assert.AreEqual(term1, term2); // sort info2 to get it into ascending docid System.Array.Sort(info2, 0, len2 - 0); // now compare for (int i = 0; i < len1; i++) { Assert.AreEqual(info1[i], info2[i]); } termEnum1.Next(); termEnum2.Next(); } }
protected internal virtual TermDocs TermDocs(IndexReader reader) { return term == null ? reader.TermDocs(null):reader.TermDocs(); }
protected internal double[] ComputeDistances(IndexReader reader) { double[] retArray = null; var termDocs = reader.TermDocs(); var termEnum = reader.Terms(new Term(Constants.SpatialShapeFieldName)); try { do { Term term = termEnum.Term(); if (term == null) break; Debug.Assert(Constants.SpatialShapeFieldName.Equals(term.Field())); Shape termval; try { termval = SpatialIndex.RavenSpatialContext.ReadShape(term.Text()); // read shape } catch (InvalidShapeException) { continue; } var pt = termval as Point; if (pt == null) continue; var distance = SpatialIndex.RavenSpatialContext.GetDistCalc().Distance(pt, originPt); if (retArray == null) // late init retArray = new double[reader.MaxDoc()]; termDocs.Seek(termEnum); while (termDocs.Next()) { retArray[termDocs.Doc()] = distance; } } while (termEnum.Next()); } finally { termDocs.Close(); termEnum.Close(); } return retArray ?? new double[reader.MaxDoc()]; }
private OpenBitSet FastBits(IndexReader reader) { OpenBitSet bits = new OpenBitSet(reader.MaxDoc()); bits.Set(0, reader.MaxDoc()); //assume all are valid Term startTerm = new Term(fieldName); TermEnum te = reader.Terms(startTerm); if (te != null) { Term currTerm = te.Term(); while ((currTerm != null) && (currTerm.Field() == startTerm.Field())) //term fieldnames are interned { if (te.DocFreq() > 1) { int lastDoc = -1; //unset potential duplicates TermDocs td = reader.TermDocs(currTerm); td.Next(); if (keepMode == KM_USE_FIRST_OCCURRENCE) { td.Next(); } do { lastDoc = td.Doc(); bits.Clear(lastDoc); } while (td.Next()); if (keepMode == KM_USE_LAST_OCCURRENCE) { //restore the last bit bits.Set(lastDoc); } } if (!te.Next()) { break; } currTerm = te.Term(); } } return bits; }
// Apply buffered delete terms, queries and docIDs to the // provided reader private bool ApplyDeletes(IndexReader reader, int docIDStart) { lock (this) { int docEnd = docIDStart + reader.MaxDoc(); bool any = false; System.Diagnostics.Debug.Assert(CheckDeleteTerm(null)); // Delete by term //System.Collections.IEnumerator iter = new System.Collections.Hashtable(deletesFlushed.terms).GetEnumerator(); System.Collections.IEnumerator iter = deletesFlushed.terms.GetEnumerator(); TermDocs docs = reader.TermDocs(); try { while (iter.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current; Term term = (Term) entry.Key; // LUCENE-2086: we should be iterating a TreeMap, // here, so terms better be in order: System.Diagnostics.Debug.Assert(CheckDeleteTerm(term)); docs.Seek(term); int limit = ((BufferedDeletes.Num) entry.Value).GetNum(); while (docs.Next()) { int docID = docs.Doc(); if (docIDStart + docID >= limit) break; reader.DeleteDocument(docID); any = true; } } } finally { docs.Close(); } // Delete by docID iter = deletesFlushed.docIDs.GetEnumerator(); while (iter.MoveNext()) { int docID = ((System.Int32) iter.Current); if (docID >= docIDStart && docID < docEnd) { reader.DeleteDocument(docID - docIDStart); any = true; } } // Delete by query IndexSearcher searcher = new IndexSearcher(reader); iter = new System.Collections.Hashtable(deletesFlushed.queries).GetEnumerator(); while (iter.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current; Query query = (Query) entry.Key; int limit = ((System.Int32) entry.Value); Weight weight = query.Weight(searcher); Scorer scorer = weight.Scorer(reader, true, false); if (scorer != null) { while (true) { int doc = scorer.NextDoc(); if (((long) docIDStart) + doc >= limit) break; reader.DeleteDocument(doc); any = true; } } } searcher.Close(); return any; } }
private OpenBitSet CorrectBits(IndexReader reader) { OpenBitSet bits = new OpenBitSet(reader.MaxDoc()); //assume all are INvalid Term startTerm = new Term(fieldName); TermEnum te = reader.Terms(startTerm); if (te != null) { Term currTerm = te.Term(); while ((currTerm != null) && (currTerm.Field() == startTerm.Field())) //term fieldnames are interned { int lastDoc = -1; //set non duplicates TermDocs td = reader.TermDocs(currTerm); if (td.Next()) { if (keepMode == KM_USE_FIRST_OCCURRENCE) { bits.Set(td.Doc()); } else { do { lastDoc = td.Doc(); } while (td.Next()); bits.Set(lastDoc); } } if (!te.Next()) { break; } currTerm = te.Term(); } } return bits; }
protected internal override StringIndex CreateValue(IndexReader reader, Entry entryKey, IState state) { System.String field = StringHelper.Intern(entryKey.field); int[] retArray = new int[reader.MaxDoc]; int[] retArrayOrdered = new int[reader.MaxDoc]; for (int i = 0; i < retArrayOrdered.Length; i++) { retArrayOrdered[i] = -1; } var length = reader.MaxDoc + 1; UnmanagedStringArray mterms = new UnmanagedStringArray(length); TermDocs termDocs = reader.TermDocs(state); SegmentTermEnum termEnum = (SegmentTermEnum)reader.Terms(new Term(field), state); int t = 0; // current term number int docIndex = 0; // an entry for documents that have no terms in this field // should a document with no terms be at top or bottom? // this puts them at the top - if it is changed, FieldDocSortedHitQueue // needs to change as well. t++; try { do { if (termEnum.termBuffer.Field != field || t >= length) { break; } // store term text mterms.Add(termEnum.termBuffer.TextAsSpan); termDocs.Seek(termEnum, state); while (termDocs.Next(state)) { var pt = retArray[termDocs.Doc]; retArray[termDocs.Doc] = t; if (pt == 0) { retArrayOrdered[docIndex++] = termDocs.Doc; } } t++; }while (termEnum.Next(state)); } finally { termDocs.Close(); termEnum.Close(); } StringIndex value_Renamed = new StringIndex(retArray, retArrayOrdered, mterms); return(value_Renamed); }
public virtual void Seek(Term term, IState state) { IndexReader reader = Enclosing_Instance.fieldToReader[term.Field]; termDocs = reader != null?reader.TermDocs(term, state) : null; }
public static RavenJObject[] ReadAllEntriesFromIndex(IndexReader reader) { if (reader.MaxDoc > 512 * 1024) { throw new InvalidOperationException("Refusing to extract all index entires from an index with " + reader.MaxDoc + " entries, because of the probable time / memory costs associated with that." + Environment.NewLine + "Viewing Index Entries are a debug tool, and should not be used on indexes of this size. You might want to try Luke, instead."); } var results = new RavenJObject[reader.MaxDoc]; using (var termDocs = reader.TermDocs()) using (var termEnum = reader.Terms()) { while (termEnum.Next()) { var term = termEnum.Term; if (term == null) break; var text = term.Text; termDocs.Seek(termEnum); for (int i = 0; i < termEnum.DocFreq() && termDocs.Next(); i++) { RavenJObject result = results[termDocs.Doc]; if (result == null) results[termDocs.Doc] = result = new RavenJObject(); var propertyName = term.Field; if (propertyName.EndsWith("_ConvertToJson") || propertyName.EndsWith("_IsArray")) continue; if (result.ContainsKey(propertyName)) { switch (result[propertyName].Type) { case JTokenType.Array: ((RavenJArray)result[propertyName]).Add(text); break; case JTokenType.String: result[propertyName] = new RavenJArray { result[propertyName], text }; break; default: throw new ArgumentException("No idea how to handle " + result[propertyName].Type); } } else { result[propertyName] = text; } } } } return results; }
// Apply buffered delete terms, queries and docIDs to the // provided reader private bool ApplyDeletes(IndexReader reader, int docIDStart) { lock (this) { int docEnd = docIDStart + reader.MaxDoc(); bool any = false; // Delete by term IEnumerator<KeyValuePair<object, object>> iter = deletesFlushed.terms.GetEnumerator(); while (iter.MoveNext()) { KeyValuePair<object, object> entry = (KeyValuePair<object, object>)iter.Current; Term term = (Term)entry.Key; TermDocs docs = reader.TermDocs(term); if (docs != null) { int limit = ((BufferedDeletes.Num)entry.Value).GetNum(); try { while (docs.Next()) { int docID = docs.Doc(); if (docIDStart + docID >= limit) break; reader.DeleteDocument(docID); any = true; } } finally { docs.Close(); } } } // Delete by docID IEnumerator<object> iter2 = deletesFlushed.docIDs.GetEnumerator(); while (iter2.MoveNext()) { int docID = (int)iter2.Current; if (docID >= docIDStart && docID < docEnd) { reader.DeleteDocument(docID - docIDStart); any = true; } } // Delete by query IndexSearcher searcher = new IndexSearcher(reader); iter = deletesFlushed.queries.GetEnumerator(); while (iter.MoveNext()) { KeyValuePair<object, object> entry = (KeyValuePair<object, object>)iter.Current; Query query = (Query)entry.Key; int limit = (int)entry.Value; Weight weight = query.Weight(searcher); Scorer scorer = weight.Scorer(reader); while (scorer.Next()) { int docID = scorer.Doc(); if (docIDStart + docID >= limit) break; reader.DeleteDocument(docID); any = true; } } searcher.Close(); return any; } }
public override TermDocs TermDocs() { EnsureOpen(); return(in_Renamed.TermDocs()); }
//////////////////////////////////////////////////////////////// static private void ScoreHits (Dictionary<int, Hit> hits_by_id, IndexReader reader, ICollection term_list) { LNS.Similarity similarity; similarity = LNS.Similarity.GetDefault (); TermDocs term_docs = reader.TermDocs (); Hit hit; foreach (Term term in term_list) { double idf; idf = similarity.Idf (reader.DocFreq (term), reader.MaxDoc ()); int hit_count; hit_count = hits_by_id.Count; term_docs.Seek (term); while (term_docs.Next () && hit_count > 0) { int id; id = term_docs.Doc (); if (hits_by_id.TryGetValue (id, out hit)) { double tf; tf = similarity.Tf (term_docs.Freq ()); hit.Score += tf * idf; --hit_count; } } } term_docs.Close (); }
private static ArrayList FindRecentResults (IndexReader primary_reader, IndexReader secondary_reader, BetterBitArray primary_matches, Dictionary<int, Hit> hits_by_id, int max_results, ref int total_number_of_matches, HitFilter hit_filter, string index_name) { Stopwatch b = new Stopwatch (); b.Start (); int count = 0; Document doc; ArrayList all_docs = null; TopScores top_docs = null; TermDocs term_docs = null; if (primary_matches.TrueCount > max_results) top_docs = new TopScores (max_results); else all_docs = new ArrayList (primary_matches.TrueCount); if (secondary_reader != null) term_docs = secondary_reader.TermDocs (); for (int match_index = primary_matches.Count; ; match_index --) { // Walk across the matches backwards, since newer // documents are more likely to be at the end of // the index. match_index = primary_matches.GetPreviousTrueIndex (match_index); if (match_index < 0) break; count++; doc = primary_reader.Document (match_index, fields_timestamp_uri); // Check the timestamp --- if we have already reached our // limit, we might be able to reject it immediately. string timestamp_str; long timestamp_num = 0; timestamp_str = doc.Get ("Timestamp"); if (timestamp_str == null) { Logger.Log.Warn ("No timestamp on {0}!", GetUriFromDocument (doc)); } else { timestamp_num = Int64.Parse (doc.Get ("Timestamp")); if (top_docs != null && ! top_docs.WillAccept (timestamp_num)) continue; } // Get the actual hit now // doc was created with only 2 fields, so first get the complete lucene document for primary document. // Also run our hit_filter now, if we have one. Since we insist of returning max_results // most recent hits, any hits that would be filtered out should happen now and not later. Hit hit = CreateHit (primary_reader.Document (match_index), secondary_reader, term_docs); if (hit_filter != null && ! hit_filter (hit)) { if (Debug) Log.Debug ("Filtered out {0}", hit.Uri); total_number_of_matches --; continue; } hits_by_id [match_index] = hit; // Add the document to the appropriate data structure. // We use the timestamp_num as the score, so high // scores correspond to more-recent timestamps. if (all_docs != null) all_docs.Add (hit); else top_docs.Add (timestamp_num, hit); } if (term_docs != null) term_docs.Close (); b.Stop (); if (Debug) Log.Debug (">>> {0}: Instantiated and scanned {1} documents in {2}", index_name, count, b); if (all_docs != null) { // Sort results before sending all_docs.Sort (); return all_docs; } else { return top_docs.TopScoringObjects; } }