public virtual void TestTerms() { TermEnum terms = reader.Terms(); Assert.IsTrue(terms != null); while (terms.Next() == true) { Term term = terms.Term(); Assert.IsTrue(term != null); //System.out.println("Term: " + term); System.String fieldValue = (System.String)DocHelper.nameValues[term.Field()]; Assert.IsTrue(fieldValue.IndexOf(term.Text()) != -1); } TermDocs termDocs = reader.TermDocs(); Assert.IsTrue(termDocs != null); termDocs.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "field")); Assert.IsTrue(termDocs.Next() == true); termDocs.Seek(new Term(DocHelper.NO_NORMS_KEY, DocHelper.NO_NORMS_TEXT)); Assert.IsTrue(termDocs.Next() == true); TermPositions positions = reader.TermPositions(); positions.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "field")); Assert.IsTrue(positions != null); Assert.IsTrue(positions.Doc() == 0); Assert.IsTrue(positions.NextPosition() >= 0); }
public override int Advance(int target) { if (_td.SkipTo(target)) { doc = _td.Doc; while (!_docset.Get(doc)) { if (_td.Next()) { doc = _td.Doc; } else { doc = DocIdSetIterator.NO_MORE_DOCS; break; } } return(doc); } else { doc = DocIdSetIterator.NO_MORE_DOCS; return(doc); } }
public override BitArray Bits(IndexReader reader) { // reader.GetVersion could be used to cache // Debug.WriteLine(reader.GetVersion()); // could be used to cache // if (cached reader == reader && _revFirst == if (_revFirst == All || _revLast == All) // optimization { return(new BitArray(reader.MaxDoc(), true)); } BitArray last_bits = new BitArray(reader.MaxDoc(), false); TermEnum t = reader.Terms(new Term(FieldName.RevisionLast, _revFirst.ToString(RevFormat))); TermDocs d = reader.TermDocs(); //if (t.SkipTo((new Term(FieldName.RevisionLast, revision.ToString(RevFormat))))) // extremely slow if (t.Term() != null) { while (t.Term().Field() == FieldName.RevisionLast) { d.Seek(t); while (d.Next()) { last_bits[d.Doc()] = true; } if (!t.Next()) { break; } } } // optimization, skip if we just using the head revision if (_revLast == Head) { return(last_bits); } BitArray first_bits = new BitArray(reader.MaxDoc(), true); t = reader.Terms(new Term("rev_first", (_revLast + 1).ToString(RevFormat))); //if (t.SkipTo((new Term("rev_first", (revision + 1).ToString(RevFormat))))) // extremely slow if (t.Term() != null) { while (t.Term().Field() == "rev_first") { d.Seek(t); while (d.Next()) { first_bits[d.Doc()] = false; } if (!t.Next()) { break; } } } return(last_bits.And(first_bits)); }
public virtual void TestTerms() { TermEnum terms = _reader.Terms(); Assert.IsTrue(terms != null); while (terms.Next() == true) { Term term = terms.Term; Assert.IsTrue(term != null); //System.out.println("Term: " + term); System.String fieldValue = (System.String)DocHelper.NameValues[term.Field]; Assert.IsTrue(fieldValue.IndexOf(term.Text) != -1); } TermDocs termDocs = _reader.TermDocs(); Assert.IsTrue(termDocs != null); termDocs.Seek(new Term(DocHelper.TextField1Key, "field")); Assert.IsTrue(termDocs.Next() == true); termDocs.Seek(new Term(DocHelper.NoNormsKey, DocHelper.NoNormsText)); Assert.IsTrue(termDocs.Next() == true); TermPositions positions = _reader.TermPositions(); positions.Seek(new Term(DocHelper.TextField1Key, "field")); Assert.IsTrue(positions != null); Assert.IsTrue(positions.Doc == 0); Assert.IsTrue(positions.NextPosition() >= 0); }
protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey) { System.String field = StringHelper.Intern(entryKey.field); System.String[] retArray = new System.String[reader.MaxDoc]; TermDocs termDocs = reader.TermDocs(); TermEnum termEnum = reader.Terms(new Term(field)); try { do { Term term = termEnum.Term; if (term == null || (System.Object)term.Field != (System.Object)field) { break; } System.String termval = term.Text; termDocs.Seek(termEnum); while (termDocs.Next()) { retArray[termDocs.Doc] = termval; } }while (termEnum.Next()); } finally { termDocs.Close(); termEnum.Close(); } return(retArray); }
public virtual void TestTerms() { try { TermEnum terms = reader.Terms(); Assert.IsTrue(terms != null); while (terms.Next() == true) { Term term = terms.Term(); Assert.IsTrue(term != null); //System.out.println("Term: " + term); System.String fieldValue = (System.String)DocHelper.nameValues[term.Field()]; Assert.IsTrue(fieldValue.IndexOf(term.Text()) != -1); } TermDocs termDocs = reader.TermDocs(); Assert.IsTrue(termDocs != null); termDocs.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "Field")); Assert.IsTrue(termDocs.Next() == true); TermPositions positions = reader.TermPositions(); positions.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "Field")); Assert.IsTrue(positions != null); Assert.IsTrue(positions.Doc() == 0); Assert.IsTrue(positions.NextPosition() >= 0); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } }
/// <summary>Deletes all documents that have a given <code>term</code> indexed. /// This is useful if one uses a document field to hold a unique ID string for /// the document. Then to delete such a document, one merely constructs a /// term with the appropriate field and the unique ID string as its text and /// passes it to this method. /// See {@link #DeleteDocument(int)} for information about when this deletion will /// become effective. /// /// </summary> /// <returns> the number of documents deleted /// </returns> /// <throws> StaleReaderException if the index has changed </throws> /// <summary> since this reader was opened /// </summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> LockObtainFailedException if another writer </throws> /// <summary> has this index open (<code>write.lock</code> could not /// be obtained) /// </summary> /// <throws> IOException if there is a low-level IO error </throws> public int DeleteDocuments(Term term) { EnsureOpen(); TermDocs docs = TermDocs(term); if (docs == null) { return(0); } int n = 0; try { while (docs.Next()) { DeleteDocument(docs.Doc()); n++; } } finally { docs.Close(); } return(n); }
public virtual int doTest(int iter, int ndocs, int maxTF, float percentDocs) { Directory dir = new RAMDirectory(); long start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); AddDocs(dir, ndocs, "foo", "val", maxTF, percentDocs); long end = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); System.Console.Out.WriteLine("milliseconds for creation of " + ndocs + " docs = " + (end - start)); IndexReader reader = IndexReader.Open(dir); TermEnum tenum = reader.Terms(new Term("foo", "val")); TermDocs tdocs = reader.TermDocs(); start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); int ret = 0; for (int i = 0; i < iter; i++) { tdocs.Seek(tenum); while (tdocs.Next()) { ret += tdocs.Doc(); } } end = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); System.Console.Out.WriteLine("milliseconds for " + iter + " TermDocs iteration: " + (end - start)); return(ret); }
private void AssertTermDocsCount(System.String msg, IndexReader reader, Term term, int expected) { TermDocs tdocs = null; try { tdocs = reader.TermDocs(term); Assert.IsNotNull(tdocs, msg + ", null TermDocs"); int count = 0; while (tdocs.Next()) { count++; } Assert.AreEqual(expected, count, msg + ", count mismatch"); } finally { if (tdocs != null) { try { tdocs.Close(); } catch (System.Exception e) { } } } }
public IndexerSetupResult Setup(IXDescriptor descriptor) { if (setup) { return(IndexerSetupResult.Failure); } hashFactory = new System.Security.Cryptography.SHA256Managed(); string _v = Convert.ToBase64String(hashFactory.ComputeHash(System.Text.UTF8Encoding.UTF8.GetBytes(descriptor.ToString()))); int df = indexSearcher.DocFreq(new Term(indexerDocumentDescriptorVersion, _v)); // set up searcher TermDocs term = indexSearcher.IndexReader.TermDocs(); List <Document> docs = new List <Document>(); while (term.Next()) { docs.Add(indexSearcher.Doc(term.Doc)); } return(IndexerSetupResult.Okay); }
private static Hit CreateHit(Document primary_doc, IndexReader secondary_reader, TermDocs term_docs, FieldSelector fields) { Hit hit = DocumentToHit(primary_doc); if (secondary_reader == null) { return(hit); } // Get the stringified version of the URI // exactly as it comes out of the index. Term term = new Term("Uri", primary_doc.Get("Uri")); term_docs.Seek(term); // Move to the first (and only) matching term doc term_docs.Next(); Document secondary_doc = (fields == null) ? secondary_reader.Document(term_docs.Doc()) : secondary_reader.Document(term_docs.Doc(), fields); // If we are using the secondary index, now we need to // merge the properties from the secondary index AddPropertiesToHit(hit, secondary_doc, false); return(hit); }
protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey) { Entry entry = entryKey; System.String field = entry.field; FloatParser parser = (FloatParser)entry.custom; if (parser == null) { try { return(wrapper.GetFloats(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_FLOAT_PARSER)); } catch (System.FormatException) { return(wrapper.GetFloats(reader, field, Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_FLOAT_PARSER)); } } float[] retArray = null; TermDocs termDocs = reader.TermDocs(); TermEnum termEnum = reader.Terms(new Term(field)); try { do { Term term = termEnum.Term; if (term == null || (System.Object)term.Field != (System.Object)field) { break; } float termval = parser.ParseFloat(term.Text); if (retArray == null) { // late init retArray = new float[reader.MaxDoc]; } termDocs.Seek(termEnum); while (termDocs.Next()) { retArray[termDocs.Doc] = termval; } }while (termEnum.Next()); } catch (StopFillCacheException) { } finally { termDocs.Close(); termEnum.Close(); } if (retArray == null) { // no values retArray = new float[reader.MaxDoc]; } return(retArray); }
public virtual void TestMerge() { //System.out.println("----------------TestMerge------------------"); SegmentMerger merger = new SegmentMerger(mergedDir, mergedSegment, false); merger.Add(reader1); merger.Add(reader2); try { int docsMerged = merger.Merge(); merger.CloseReaders(); Assert.IsTrue(docsMerged == 2); //Should be able to open a new SegmentReader against the new directory SegmentReader mergedReader = new SegmentReader(new SegmentInfo(mergedSegment, docsMerged, mergedDir)); Assert.IsTrue(mergedReader != null); Assert.IsTrue(mergedReader.NumDocs() == 2); Document newDoc1 = mergedReader.Document(0); Assert.IsTrue(newDoc1 != null); //There are 2 unstored fields on the document Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(doc1) - 2); Document newDoc2 = mergedReader.Document(1); Assert.IsTrue(newDoc2 != null); Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(doc2) - 2); TermDocs termDocs = mergedReader.TermDocs(new Term(DocHelper.TEXT_FIELD_2_KEY, "Field")); Assert.IsTrue(termDocs != null); Assert.IsTrue(termDocs.Next() == true); System.Collections.ICollection stored = mergedReader.GetIndexedFieldNames(true); Assert.IsTrue(stored != null); //System.out.println("stored size: " + stored.size()); Assert.IsTrue(stored.Count == 2); TermFreqVector vector = mergedReader.GetTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY); Assert.IsTrue(vector != null); System.String[] terms = vector.GetTerms(); Assert.IsTrue(terms != null); //System.out.println("Terms size: " + terms.length); Assert.IsTrue(terms.Length == 3); int[] freqs = vector.GetTermFrequencies(); Assert.IsTrue(freqs != null); //System.out.println("Freqs size: " + freqs.length); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; int freq = freqs[i]; //System.out.println("Term: " + term + " Freq: " + freq); Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != -1); Assert.IsTrue(DocHelper.FIELD_2_FREQS[i] == freq); } } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } //System.out.println("---------------------end TestMerge-------------------"); }
public virtual void TestMerge() { SegmentMerger merger = new SegmentMerger(mergedDir, mergedSegment); merger.Add(reader1); merger.Add(reader2); int docsMerged = merger.Merge(); merger.CloseReaders(); Assert.IsTrue(docsMerged == 2); //Should be able to open a new SegmentReader against the new directory SegmentReader mergedReader = SegmentReader.Get(new SegmentInfo(mergedSegment, docsMerged, mergedDir, false, true)); Assert.IsTrue(mergedReader != null); Assert.IsTrue(mergedReader.NumDocs() == 2); Document newDoc1 = mergedReader.Document(0); Assert.IsTrue(newDoc1 != null); //There are 2 unstored fields on the document Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(doc1) - DocHelper.unstored.Count); Document newDoc2 = mergedReader.Document(1); Assert.IsTrue(newDoc2 != null); Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(doc2) - DocHelper.unstored.Count); TermDocs termDocs = mergedReader.TermDocs(new Term(DocHelper.TEXT_FIELD_2_KEY, "field")); Assert.IsTrue(termDocs != null); Assert.IsTrue(termDocs.Next() == true); System.Collections.Generic.ICollection <string> stored = mergedReader.GetFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR); Assert.IsTrue(stored != null); //System.out.println("stored size: " + stored.size()); Assert.IsTrue(stored.Count == 4, "We do not have 4 fields that were indexed with term vector"); TermFreqVector vector = mergedReader.GetTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY); Assert.IsTrue(vector != null); System.String[] terms = vector.GetTerms(); Assert.IsTrue(terms != null); //System.out.println("Terms size: " + terms.length); Assert.IsTrue(terms.Length == 3); int[] freqs = vector.GetTermFrequencies(); Assert.IsTrue(freqs != null); //System.out.println("Freqs size: " + freqs.length); Assert.IsTrue(vector is TermPositionVector == true); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; int freq = freqs[i]; //System.out.println("Term: " + term + " Freq: " + freq); Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != -1); Assert.IsTrue(DocHelper.FIELD_2_FREQS[i] == freq); } TestSegmentReader.CheckNorms(mergedReader); }
public virtual bool Next() { if (termDocs == null) { return(false); } return(termDocs.Next()); }
public virtual bool Next(IState state) { if (termDocs == null) { return(false); } return(termDocs.Next(state)); }
private OpenBitSet FastBits(IndexReader reader) { OpenBitSet bits = new OpenBitSet(reader.MaxDoc); bits.Set(0, reader.MaxDoc); //assume all are valid Term startTerm = new Term(fieldName); TermEnum te = reader.Terms(startTerm); if (te != null) { Term currTerm = te.Term; while ((currTerm != null) && (currTerm.Field == startTerm.Field)) //term fieldnames are interned { if (te.DocFreq() > 1) { int lastDoc = -1; //unset potential duplicates TermDocs td = reader.TermDocs(currTerm); td.Next(); if (keepMode == KM_USE_FIRST_OCCURRENCE) { td.Next(); } do { lastDoc = td.Doc; bits.Clear(lastDoc); } while (td.Next()); if (keepMode == KM_USE_LAST_OCCURRENCE) { //restore the last bit bits.Set(lastDoc); } } if (!te.Next()) { break; } currTerm = te.Term; } } return(bits); }
static string GetProperty(IndexReader reader, string property) { TermDocs td = reader.TermDocs(GetPropertyId(property)); if (!td.Next()) { return(null); } return(reader.Document(td.Doc()).Get(ValueField)); }
public virtual void TestFilterIndexReader_Renamed() { RAMDirectory directory = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document d1 = new Document(); d1.Add(new Field("default", "one two", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d1); Document d2 = new Document(); d2.Add(new Field("default", "one three", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d2); Document d3 = new Document(); d3.Add(new Field("default", "two four", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d3); writer.Close(); IndexReader reader = new TestReader(IndexReader.Open(directory)); Assert.IsTrue(reader.IsOptimized()); TermEnum terms = reader.Terms(); while (terms.Next()) { Assert.IsTrue(terms.Term().Text().IndexOf('e') != -1); } terms.Close(); TermPositions positions = reader.TermPositions(new Term("default", "one")); while (positions.Next()) { Assert.IsTrue((positions.Doc() % 2) == 1); } int NUM_DOCS = 3; TermDocs td = reader.TermDocs(null); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(td.Next()); Assert.AreEqual(i, td.Doc()); Assert.AreEqual(1, td.Freq()); } td.Close(); reader.Close(); directory.Close(); }
private static List <Document> GetDocumentsFromTermDocs(TermDocs termDocs, IndexReaderFrame readerFrame) { var docs = new List <Document>(); while (termDocs.Next()) { docs.Add(readerFrame.IndexReader.Document(termDocs.Doc())); } docs.Sort(new DocumentVersionComparer()); return(docs); }
public override BitArray Bits(IndexReader reader) { BitArray bitArray = new BitArray(reader.MaxDoc()); TermDocs termDocs = reader.TermDocs(new Term("score", "5")); while (termDocs.Next()) { bitArray.Set(termDocs.Doc(), true); } return(bitArray); }
public override DocIdSet GetDocIdSet(IndexReader reader) { BitArray bitArray = new BitArray(reader.MaxDoc); TermDocs termDocs = reader.TermDocs(new Term("score", "5")); while (termDocs.Next()) { bitArray.Set(termDocs.Doc, true); } return(new DocIdBitSet(bitArray)); }
public override int NextDoc() { if (_td.Next()) { _doc = _td.Doc; } else { _td.Dispose(); _doc = DocIdSetIterator.NO_MORE_DOCS; } return(_doc); }
private void VerifyTermDocs(Directory dir, Term term, int numDocs) { IndexReader reader = IndexReader.Open(dir); TermDocs termDocs = reader.TermDocs(term); int count = 0; while (termDocs.Next()) { count++; } Assert.AreEqual(numDocs, count); reader.Close(); }
public static int Count(Term t, IndexReader r) { int count = 0; TermDocs td = r.TermDocs(t); while (td.Next()) { td.Doc(); count++; } td.Close(); return(count); }
public static int Count(Term t, IndexReader r) { int count = 0; TermDocs td = r.TermDocs(t, null); while (td.Next(null)) { var d = td.Doc; count++; } td.Close(); return(count); }
private OpenBitSet CorrectBits(IndexReader reader) { OpenBitSet bits = new OpenBitSet(reader.MaxDoc); //assume all are INvalid Term startTerm = new Term(fieldName); TermEnum te = reader.Terms(startTerm); if (te != null) { Term currTerm = te.Term; while ((currTerm != null) && (currTerm.Field == startTerm.Field)) //term fieldnames are interned { int lastDoc = -1; //set non duplicates TermDocs td = reader.TermDocs(currTerm); if (td.Next()) { if (keepMode == KM_USE_FIRST_OCCURRENCE) { bits.Set(td.Doc); } else { do { lastDoc = td.Doc; } while (td.Next()); bits.Set(lastDoc); } } if (!te.Next()) { break; } currTerm = te.Term; } } return(bits); }
public override DocIdSet GetDocIdSet(IndexReader reader) { OpenBitSet bitSet = new OpenBitSet(reader.NumDocs()); TermDocs termDocs = reader.TermDocs(new Term("TenantId", _tenantId)); while (termDocs.Next()) { if (termDocs.Freq > 0) { bitSet.Set(termDocs.Doc); } } return(bitSet); }
public virtual void TestMultiTermDocs() { SqlServerDirectory.ProvisionDatabase(Connection, "test1", true); SqlServerDirectory.ProvisionDatabase(Connection, "test2", true); SqlServerDirectory.ProvisionDatabase(Connection, "test3", true); var ramDir1 = new SqlServerDirectory(Connection, new Options() { SchemaName = "test1" }); AddDoc(ramDir1, "test foo", true); var ramDir2 = new SqlServerDirectory(Connection, new Options() { SchemaName = "test2" }); AddDoc(ramDir2, "test blah", true); var ramDir3 = new SqlServerDirectory(Connection, new Options() { SchemaName = "test3" }); AddDoc(ramDir3, "test wow", true); IndexReader[] readers1 = new[] { IndexReader.Open(ramDir1, false), IndexReader.Open(ramDir3, false) }; IndexReader[] readers2 = new[] { IndexReader.Open(ramDir1, false), IndexReader.Open(ramDir2, false), IndexReader.Open(ramDir3, false) }; MultiReader mr2 = new MultiReader(readers1); MultiReader mr3 = new MultiReader(readers2); // test mixing up TermDocs and TermEnums from different readers. TermDocs td2 = mr2.TermDocs(); TermEnum te3 = mr3.Terms(new Term("body", "wow")); td2.Seek(te3); int ret = 0; // This should blow up if we forget to check that the TermEnum is from the same // reader as the TermDocs. while (td2.Next()) { ret += td2.Doc; } td2.Close(); te3.Close(); // really a dummy assert to ensure that we got some docs and to ensure that // nothing is optimized out. Assert.IsTrue(ret > 0); }
/// <summary> /// Deletes a number of documents that conform to the specified Term-s /// </summary> /// <param name="terms">Term-s to be deleted</param> /// <returns>A number of documents deleted</returns> public int OptimizedDeleteDocuments(Term[] terms) { int n = 0; lock (this) { if (directoryOwner) { AquireWriteLock(); } foreach (Term term in terms) { TermDocs docs = TermDocs(term); if (docs == null) { continue; } try { while (docs.Next()) { DoDelete(docs.Doc()); hasChanges = true; n++; } } finally { docs.Close(); } } // Release the lock ASAP if there are no changes if (!hasChanges && writeLock != null) { writeLock.Release(); writeLock = null; } } return(n); }
private void buttonShowFirstDoc_Click(object sender, System.EventArgs e) { if (term == null) return; if (_luke.IndexReader == null) { _luke.ShowStatus(_luke.resources.GetString("NoIndex")); return; } try { termDocs = _luke.IndexReader.TermDocs(term); termDocs.Next(); labelDocNum.Text = "1"; _ShowTermDoc(termDocs); } catch (Exception exc) { _luke.ShowStatus(exc.Message); } }
private static Hit CreateHit ( Document primary_doc, IndexReader secondary_reader, TermDocs term_docs, FieldSelector fields) { Hit hit = DocumentToHit (primary_doc); if (secondary_reader == null) return hit; // Get the stringified version of the URI // exactly as it comes out of the index. Term term = new Term ("Uri", primary_doc.Get ("Uri")); term_docs.Seek (term); // Move to the first (and only) matching term doc term_docs.Next (); Document secondary_doc = (fields == null) ? secondary_reader.Document (term_docs.Doc ()) : secondary_reader.Document (term_docs.Doc (), fields); // If we are using the secondary index, now we need to // merge the properties from the secondary index AddPropertiesToHit (hit, secondary_doc, false); return hit; }