public Document ( int n ) : Lucene.Net.Documents.Document | ||
n | int | |
return | Lucene.Net.Documents.Document |
public virtual void TestCompressionTools() { IFieldable binaryFldCompressed = new Field("binaryCompressed", CompressionTools.Compress(System.Text.UTF8Encoding.UTF8.GetBytes(binaryValCompressed)), Field.Store.YES); IFieldable stringFldCompressed = new Field("stringCompressed", CompressionTools.CompressString(binaryValCompressed), Field.Store.YES); Document doc = new Document(); doc.Add(binaryFldCompressed); doc.Add(stringFldCompressed); /* add the doc to a ram index */ MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED, null); writer.AddDocument(doc, null); writer.Close(); /* open a reader and fetch the document */ IndexReader reader = IndexReader.Open((Directory)dir, false, null); Document docFromReader = reader.Document(0, null); Assert.IsTrue(docFromReader != null); /* fetch the binary compressed field and compare it's content with the original one */ System.String binaryFldCompressedTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(CompressionTools.Decompress(docFromReader.GetBinaryValue("binaryCompressed", null)))); Assert.IsTrue(binaryFldCompressedTest.Equals(binaryValCompressed)); Assert.IsTrue(CompressionTools.DecompressString(docFromReader.GetBinaryValue("stringCompressed", null)).Equals(binaryValCompressed)); reader.Close(); dir.Close(); }
public override IComparable Generate(IndexReader reader, int doc) { var ravenDoc = reader.Document(doc); var payingTagField = ravenDoc.GetField("PayingTag_" + CustomTagId); var queriedPayingTag = payingTagField != null && Boolean.Parse(payingTagField.StringValue); var tagValue = Int32.Parse(ravenDoc.GetField("TagId").StringValue); var pointsValue = Int32.Parse(ravenDoc.GetField("Points").StringValue); CustomerDocumentOrderWithRandomEffect.OrderCategory cat; if (tagValue == CustomTagId && queriedPayingTag ) { cat = CustomerDocumentOrderWithRandomEffect.OrderCategory.TagAndPaying; } else if (queriedPayingTag) { cat = CustomerDocumentOrderWithRandomEffect.OrderCategory.OnlyPaying; } else if (tagValue == CustomTagId ) { cat = CustomerDocumentOrderWithRandomEffect.OrderCategory.OnlyTag; } else { cat = CustomerDocumentOrderWithRandomEffect.OrderCategory.NoneOfTheAbove; } return new CustomerDocumentOrderWithRandomEffect() { Category = cat, Points = pointsValue }; }
private int CopyFieldsWithDeletions(FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader, IState state) { int docCount = 0; int maxDoc = reader.MaxDoc; if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int j = 0; j < maxDoc;) { if (reader.IsDeleted(j)) { // skip deleted docs ++j; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = j, numDocs = 0; do { j++; numDocs++; if (j >= maxDoc) { break; } if (reader.IsDeleted(j)) { j++; break; } }while (numDocs < MAX_RAW_MERGE_DOCS); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs, state); fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs, state); docCount += numDocs; checkAbort.Work(300 * numDocs, state); } } else { for (int j = 0; j < maxDoc; j++) { if (reader.IsDeleted(j)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to doc then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Document doc = reader.Document(j, state); fieldsWriter.AddDocument(doc, state); docCount++; checkAbort.Work(300, state); } } return(docCount); }
public virtual void TestSkipToFirsttimeMiss() { DisjunctionMaxQuery dq = new DisjunctionMaxQuery(0.0f); dq.Add(Tq("id", "d1")); dq.Add(Tq("dek", "DOES_NOT_EXIST")); QueryUtils.Check(dq, s); Weight dw = dq.Weight(s, null); Scorer ds = dw.Scorer(r, true, false, null); bool skipOk = ds.Advance(3, null) != DocIdSetIterator.NO_MORE_DOCS; if (skipOk) { Assert.Fail("firsttime skipTo found a match? ... " + r.Document(ds.DocID(), null).Get("id", null)); } }
public virtual void DoTestDocument() { sis.Read(dir); IndexReader reader = OpenReader(); Assert.IsTrue(reader != null); Document newDoc1 = reader.Document(0); Assert.IsTrue(newDoc1 != null); Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(doc1) - DocHelper.unstored.Count); Document newDoc2 = reader.Document(1); Assert.IsTrue(newDoc2 != null); Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(doc2) - DocHelper.unstored.Count); TermFreqVector vector = reader.GetTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY); Assert.IsTrue(vector != null); TestSegmentReader.CheckNorms(reader); }
// [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestIndexedBit() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, ClassEnvRule.similarity, ClassEnvRule.timeZone); Document doc = new Document(); FieldType onlyStored = new FieldType(); onlyStored.IsStored = true; doc.Add(new Field("field", "value", onlyStored)); doc.Add(new StringField("field2", "value", Field.Store.YES)); w.AddDocument(doc); IndexReader r = w.Reader; w.Dispose(); Assert.IsFalse(r.Document(0).GetField("field").FieldType.IsIndexed); Assert.IsTrue(r.Document(0).GetField("field2").FieldType.IsIndexed); r.Dispose(); dir.Dispose(); }
private void DoTestHits(ScoreDoc[] hits, int expectedCount, IndexReader reader) { int hitCount = hits.Length; Assert.AreEqual(expectedCount, hitCount, "wrong number of hits"); for (int i = 0; i < hitCount; i++) { reader.Document(hits[i].Doc); reader.GetTermVectors(hits[i].Doc); } }
private void CheckContents(IndexReader ir, string indexname) { IBits liveDocs = MultiFields.GetLiveDocs(ir); for (int i = 0; i < ir.MaxDoc; i++) { if (liveDocs == null || liveDocs.Get(i)) { assertEquals(indexname, ir.Document(i).Get("indexname")); } } }
public virtual void Test() { Assert.IsTrue(Dir != null); Assert.IsTrue(FieldInfos != null); IndexReader reader = DirectoryReader.Open(Dir); Document doc = reader.Document(0); Assert.IsTrue(doc != null); Assert.IsTrue(doc.GetField(DocHelper.TEXT_FIELD_1_KEY) != null); Field field = (Field)doc.GetField(DocHelper.TEXT_FIELD_2_KEY); Assert.IsTrue(field != null); Assert.IsTrue(field.FieldType.StoreTermVectors); Assert.IsFalse(field.FieldType.OmitNorms); Assert.IsTrue(field.FieldType.IndexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); field = (Field)doc.GetField(DocHelper.TEXT_FIELD_3_KEY); Assert.IsTrue(field != null); Assert.IsFalse(field.FieldType.StoreTermVectors); Assert.IsTrue(field.FieldType.OmitNorms); Assert.IsTrue(field.FieldType.IndexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); field = (Field)doc.GetField(DocHelper.NO_TF_KEY); Assert.IsTrue(field != null); Assert.IsFalse(field.FieldType.StoreTermVectors); Assert.IsFalse(field.FieldType.OmitNorms); Assert.IsTrue(field.FieldType.IndexOptions == FieldInfo.IndexOptions.DOCS_ONLY); DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(DocHelper.TEXT_FIELD_3_KEY); reader.Document(0, visitor); IList <IndexableField> fields = visitor.Document.Fields; Assert.AreEqual(1, fields.Count); Assert.AreEqual(DocHelper.TEXT_FIELD_3_KEY, fields[0].Name); reader.Dispose(); }
internal virtual void loadDoc(IndexReader ir) { // beware of deleted docs in the future Document doc = ir.Document(rand.Next(ir.MaxDoc), new AnonymousClassFieldSelector(this), null); var fields = doc.GetFields(); for (int i = 0; i < fields.Count; i++) { IFieldable f = fields[i]; Enclosing_Instance.ValidateField(f); } }
public virtual void TestTermVectorCorruption() { // LUCENENET specific - log the current locking strategy used and HResult values // for assistance troubleshooting problems on Linux/macOS LogNativeFSFactoryDebugInfo(); Directory dir = NewDirectory(); for (int iter = 0; iter < 2; iter++) { IndexWriter writer = new IndexWriter(dir, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(new LogDocMergePolicy())); Document document = new Document(); FieldType customType = new FieldType(); customType.IsStored = true; Field storedField = NewField("stored", "stored", customType); document.Add(storedField); writer.AddDocument(document); writer.AddDocument(document); document = new Document(); document.Add(storedField); FieldType customType2 = new FieldType(StringField.TYPE_NOT_STORED); customType2.StoreTermVectors = true; customType2.StoreTermVectorPositions = true; customType2.StoreTermVectorOffsets = true; Field termVectorField = NewField("termVector", "termVector", customType2); document.Add(termVectorField); writer.AddDocument(document); writer.ForceMerge(1); writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir); for (int i = 0; i < reader.NumDocs; i++) { reader.Document(i); reader.GetTermVectors(i); } reader.Dispose(); writer = new IndexWriter(dir, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(new LogDocMergePolicy())); Directory[] indexDirs = new Directory[] { new MockDirectoryWrapper(Random, new RAMDirectory(dir, NewIOContext(Random))) }; writer.AddIndexes(indexDirs); writer.ForceMerge(1); writer.Dispose(); } dir.Dispose(); }
// append fields from storedFieldReaders public override Document Document(int n) { Document result = new Document(); for (int i = 0; i < storedFieldReaders.Count; i++) { IndexReader reader = (IndexReader)storedFieldReaders[i]; foreach (Field field in reader.Document(n).Fields()) { result.Add(field); } } return(result); }
public virtual void TestTermVectorCorruption() { Directory dir = NewDirectory(); for (int iter = 0; iter < 2; iter++) { IndexWriter writer = new IndexWriter(dir, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(new LogDocMergePolicy())); Document document = new Document(); FieldType customType = new FieldType(); customType.Stored = true; Field storedField = NewField("stored", "stored", customType); document.Add(storedField); writer.AddDocument(document); writer.AddDocument(document); document = new Document(); document.Add(storedField); FieldType customType2 = new FieldType(StringField.TYPE_NOT_STORED); customType2.StoreTermVectors = true; customType2.StoreTermVectorPositions = true; customType2.StoreTermVectorOffsets = true; Field termVectorField = NewField("termVector", "termVector", customType2); document.Add(termVectorField); writer.AddDocument(document); writer.ForceMerge(1); writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir); for (int i = 0; i < reader.NumDocs; i++) { reader.Document(i); reader.GetTermVectors(i); } reader.Dispose(); writer = new IndexWriter(dir, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(new LogDocMergePolicy())); Directory[] indexDirs = new Directory[] { new MockDirectoryWrapper(Random(), new RAMDirectory(dir, NewIOContext(Random()))) }; writer.AddIndexes(indexDirs); writer.ForceMerge(1); writer.Dispose(); } dir.Dispose(); }
static void MakeLatestVersionLookupPerReader(IDictionary<string, Tuple<NuGetVersion, string, int>> lookup, IndexReader reader, string readerName, bool includePrerelease, bool includeUnlisted) { for (int n = 0; n < reader.MaxDoc; n++) { if (reader.IsDeleted(n)) { continue; } Document document = reader.Document(n); NuGetVersion version = GetVersion(document); if (version == null) { continue; } bool isListed = GetListed(document); if (isListed || includeUnlisted) { if (!version.IsPrerelease || includePrerelease) { string id = GetId(document); if (id == null) { continue; } Tuple<NuGetVersion, string, int> existingVersion; if (lookup.TryGetValue(id, out existingVersion)) { if (version > existingVersion.Item1) { lookup[id] = Tuple.Create(version, readerName, n); } } else { lookup.Add(id, Tuple.Create(version, readerName, n)); } } } } }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> IOException </throws> private int MergeFields() { fieldInfos = new FieldInfos(); // merge field names int docCount = 0; for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false); fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false); } fieldInfos.Write(directory, segment + ".fnm"); FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're // in merge mode, we use this FieldSelector FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this); try { for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; int maxDoc = reader.MaxDoc(); for (int j = 0; j < maxDoc; j++) { if (!reader.IsDeleted(j)) { // skip deleted docs fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge)); docCount++; } } } } finally { fieldsWriter.Close(); } return(docCount); }
public virtual void TestBinaryFieldInIndex() { IFieldable binaryFldStored = new Field("binaryStored", System.Text.UTF8Encoding.UTF8.GetBytes(binaryValStored), Field.Store.YES); IFieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO); // binary fields with store off are not allowed Assert.Throws <ArgumentException>( () => new Field("fail", System.Text.Encoding.UTF8.GetBytes(binaryValStored), Field.Store.NO)); Document doc = new Document(); doc.Add(binaryFldStored); doc.Add(stringFldStored); /* test for field count */ Assert.AreEqual(2, doc.fields_ForNUnit.Count); /* add the doc to a ram index */ MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED, null); writer.AddDocument(doc, null); writer.Close(); /* open a reader and fetch the document */ IndexReader reader = IndexReader.Open((Directory)dir, false, null); Document docFromReader = reader.Document(0, null); Assert.IsTrue(docFromReader != null); /* fetch the binary stored field and compare it's content with the original one */ System.String binaryFldStoredTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(docFromReader.GetBinaryValue("binaryStored", null))); Assert.IsTrue(binaryFldStoredTest.Equals(binaryValStored)); /* fetch the string field and compare it's content with the original one */ System.String stringFldStoredTest = docFromReader.Get("stringStored", null); Assert.IsTrue(stringFldStoredTest.Equals(binaryValStored)); /* delete the document from index */ reader.DeleteDocument(0, null); Assert.AreEqual(0, reader.NumDocs()); reader.Close(); dir.Close(); }
public virtual void TestCloseInnerReader() { Directory dir1 = GetDir1(Random); CompositeReader ir1 = DirectoryReader.Open(dir1); Assert.AreEqual(1, ir1.GetSequentialSubReaders()[0].RefCount); // with overlapping ParallelCompositeReader pr = new ParallelCompositeReader(true, new CompositeReader[] { ir1 }, new CompositeReader[] { ir1 }); IndexReader psub = pr.GetSequentialSubReaders()[0]; Assert.AreEqual(1, psub.RefCount); ir1.Dispose(); Assert.AreEqual(1, psub.RefCount, "refCount of synthetic subreader should be unchanged"); try { psub.Document(0); Assert.Fail("Subreader should be already closed because inner reader was closed!"); } #pragma warning disable 168 catch (ObjectDisposedException e) #pragma warning restore 168 { // pass } try { pr.Document(0); Assert.Fail("ParallelCompositeReader should be already closed because inner reader was closed!"); } #pragma warning disable 168 catch (ObjectDisposedException e) #pragma warning restore 168 { // pass } // noop: pr.Dispose(); Assert.AreEqual(0, psub.RefCount); dir1.Dispose(); }
} // doIndex private static void removeAllDuplicateAndDeletedFiles(IndexableFileInfo[] fileInfos, string LuceneIndexDir, IndexCreationMode indexCreationMode) { if (indexCreationMode != IndexCreationMode.AppendToExistingIndex) { return; } IndexReader reader = IndexReader.Open(LuceneIndexDir); try { int numDocs = reader.NumDocs(); for (int i = 0; i < numDocs; i++) { Document docToCheck = reader.Document(i); bool removeDocFromIndex = true; string filenameField = docToCheck.GetField("filename").StringValue(); string lastModified = (docToCheck.GetField("LastModified").StringValue()); foreach (IndexableFileInfo fi in fileInfos) { if (String.Compare(fi.Filename, filenameField, true) == 0 && DateTools.DateToString(fi.LastModified, DateTools.Resolution.SECOND) == lastModified) { removeDocFromIndex = false; break; } } // foreach if (removeDocFromIndex) { reader.DeleteDocument(i); if (!reader.HasDeletions()) { throw new Exception("error: deletion failed!!"); } } } // for each lucene doc } finally { reader.Close(); } LuceneIndexer indexer = new LuceneIndexer(LuceneIndexDir, indexCreationMode); // open up the index again indexer.CloseIndexWriter(OptimizeMode.DoOptimization); // just to optimize the index (which removes deleted items). }
void ProcessReader(IndexReader indexReader, string readerName, ref int perIndexDocumentNumber) { for (int perReaderDocumentNumber = 0; perReaderDocumentNumber < indexReader.MaxDoc; perReaderDocumentNumber++) { if (indexReader.IsDeleted(perReaderDocumentNumber)) { ProcessDocument(indexReader, readerName, perReaderDocumentNumber, perIndexDocumentNumber, null, isDelete: true); } else { Document document = indexReader.Document(perReaderDocumentNumber); ProcessDocument(indexReader, readerName, perReaderDocumentNumber, perIndexDocumentNumber, document, isDelete: false); } perIndexDocumentNumber++; } }
public DeleteThreads(TestIndexWriterReader enclosingInstance, IndexWriter mainWriter) { InitBlock(enclosingInstance); this.mainWriter = mainWriter; IndexReader reader = mainWriter.GetReader(); int maxDoc = reader.MaxDoc(); random = Enclosing_Instance.NewRandom(); int iter = random.Next(maxDoc); for (int x = 0; x < iter; x++) { int doc = random.Next(iter); System.String id = reader.Document(doc).Get("id"); toDeleteTerms.Add(new Term("id", id)); } }
public virtual void DoTest(int[] docs) { if (dataset.Count == 0) { for (int i = 0; i < data.Length; i++) { dataset.Add(data[i], data[i]); } } Directory dir = MakeIndex(); IndexReader reader = IndexReader.Open(dir); for (int i = 0; i < docs.Length; i++) { Document d = reader.Document(docs[i], SELECTOR); d.Get(MAGIC_FIELD); System.Collections.IList fields = d.GetFields(); for (System.Collections.IEnumerator fi = fields.GetEnumerator(); fi.MoveNext();) { Fieldable f = null; try { f = (Fieldable)fi.Current; System.String fname = f.Name(); System.String fval = f.StringValue(); Assert.IsNotNull(docs[i] + " FIELD: " + fname, fval); System.String[] vals = fval.Split('#'); if (!dataset.Contains(vals[0]) || !dataset.Contains(vals[1])) { Assert.Fail("FIELD:" + fname + ",VAL:" + fval); } } catch (System.Exception e) { throw new Exception(docs[i] + " WTF: " + f.Name(), e); } } } reader.Close(); }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> IOException </throws> private int MergeFields() { fieldInfos = new FieldInfos(); // merge field names int docCount = 0; for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false); fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false); } fieldInfos.Write(directory, segment + ".fnm"); FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; int maxDoc = reader.MaxDoc(); for (int j = 0; j < maxDoc; j++) { if (!reader.IsDeleted(j)) { // skip deleted docs fieldsWriter.AddDocument(reader.Document(j)); docCount++; } } } } finally { fieldsWriter.Close(); } return(docCount); }
private bool VerifyIndex(Directory directory, int startAt) { bool fail = false; IndexReader reader = DirectoryReader.Open(directory); int max = reader.MaxDoc; for (int i = 0; i < max; i++) { Document temp = reader.Document(i); //System.out.println("doc "+i+"="+temp.GetField("count").StringValue); //compare the index doc number to the value that it should be if (!temp.GetField("count").GetStringValue().Equals((i + startAt) + "", StringComparison.Ordinal)) { fail = true; Console.WriteLine("Document " + (i + startAt) + " is returning document " + temp.GetField("count").GetStringValue()); } } reader.Dispose(); return(fail); }
private void CheckExpecteds(System.Collections.BitArray expecteds) { IndexReader r = IndexReader.Open(dir); //Perhaps not the most efficient approach but meets our needs here. for (int i = 0; i < r.MaxDoc(); i++) { if (!r.IsDeleted(i)) { System.String sval = r.Document(i).Get(FIELD_RECORD_ID); if (sval != null) { int val = System.Int32.Parse(sval); Assert.IsTrue(expecteds.Get(val), "Did not expect document #" + val); expecteds.Set(val, false); } } } r.Close(); Assert.AreEqual(0, SupportClass.BitSetSupport.Cardinality(expecteds), "Should have 0 docs remaining "); }
public override IEnumerable<Row> Execute(IEnumerable<Row> rows) { if (_indexDirectory == null) yield break; try { _reader = IndexReader.Open(_indexDirectory, true); } catch (Exception) { Warn("Failed to open lucene index in {0}.", _indexDirectory.Directory.FullName); yield break; } var docCount = _reader.NumDocs(); Info("Found {0} documents in lucene index.", docCount); for (var i = 0; i < docCount; i++) { if (_reader.IsDeleted(i)) continue; var doc = _reader.Document(i); var row = new Row(); foreach (var field in doc.GetFields().Where(field => field.IsStored)) { switch (field.Name) { case "dropped": row[field.Name] = Convert.ToBoolean(field.StringValue); break; default: row[field.Name] = field.StringValue; break; } } yield return row; } }
// [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass // LUCENE-1727: make sure doc fields are stored in order public virtual void TestStoredFieldsOrder() { Directory d = NewDirectory(); IndexWriter w = new IndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); Document doc = new Document(); FieldType customType = new FieldType(); customType.IsStored = true; doc.Add(NewField("zzz", "a b c", customType)); doc.Add(NewField("aaa", "a b c", customType)); doc.Add(NewField("zzz", "1 2 3", customType)); w.AddDocument(doc); IndexReader r = w.GetReader(); Document doc2 = r.Document(0); IEnumerator <IIndexableField> it = doc2.Fields.GetEnumerator(); Assert.IsTrue(it.MoveNext()); Field f = (Field)it.Current; Assert.AreEqual(f.Name, "zzz"); Assert.AreEqual(f.GetStringValue(), "a b c"); Assert.IsTrue(it.MoveNext()); f = (Field)it.Current; Assert.AreEqual(f.Name, "aaa"); Assert.AreEqual(f.GetStringValue(), "a b c"); Assert.IsTrue(it.MoveNext()); f = (Field)it.Current; Assert.AreEqual(f.Name, "zzz"); Assert.AreEqual(f.GetStringValue(), "1 2 3"); Assert.IsFalse(it.MoveNext()); r.Dispose(); w.Dispose(); d.Dispose(); }
private void CheckExpecteds(BitArray expecteds) { IndexReader r = DirectoryReader.Open(Dir); //Perhaps not the most efficient approach but meets our //needs here. IBits liveDocs = MultiFields.GetLiveDocs(r); for (int i = 0; i < r.MaxDoc; i++) { if (liveDocs == null || liveDocs.Get(i)) { string sval = r.Document(i).Get(FIELD_RECORD_ID); if (sval != null) { int val = Convert.ToInt32(sval); Assert.IsTrue(expecteds.SafeGet(val), "Did not expect document #" + val); expecteds.SafeSet(val, false); } } } r.Dispose(); Assert.AreEqual(0, expecteds.Cardinality(), "Should have 0 docs remaining "); }
public override IComparable Generate(IndexReader reader, int doc) { var document = reader.Document(doc); var name = document.GetField("Name").StringValue; return name.Substring(name.Length - len, len); }
public virtual void TestUpdateDocument() { bool optimize = true; Directory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); // create the index CreateIndexNoClose(!optimize, "index1", writer); // writer.flush(false, true, true); // get a reader IndexReader r1 = writer.GetReader(); Assert.IsTrue(r1.IsCurrent()); System.String id10 = r1.Document(10).GetField("id").StringValue(); Document newDoc = r1.Document(10); newDoc.RemoveField("id"); newDoc.Add(new Field("id", System.Convert.ToString(8000), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.UpdateDocument(new Term("id", id10), newDoc); Assert.IsFalse(r1.IsCurrent()); IndexReader r2 = writer.GetReader(); Assert.IsTrue(r2.IsCurrent()); Assert.AreEqual(0, Count(new Term("id", id10), r2)); Assert.AreEqual(1, Count(new Term("id", System.Convert.ToString(8000)), r2)); r1.Close(); writer.Close(); Assert.IsTrue(r2.IsCurrent()); IndexReader r3 = IndexReader.Open(dir1); Assert.IsTrue(r3.IsCurrent()); Assert.IsTrue(r2.IsCurrent()); Assert.AreEqual(0, Count(new Term("id", id10), r3)); Assert.AreEqual(1, Count(new Term("id", System.Convert.ToString(8000)), r3)); writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "a b c", Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(doc); Assert.IsTrue(r2.IsCurrent()); Assert.IsTrue(r3.IsCurrent()); writer.Close(); Assert.IsFalse(r2.IsCurrent()); Assert.IsTrue(!r3.IsCurrent()); r2.Close(); r3.Close(); dir1.Close(); }
private void DoTestHits(ScoreDoc[] hits, int expectedCount, IndexReader reader) { int hitCount = hits.Length; Assert.AreEqual(expectedCount, hitCount, "wrong number of hits"); for (int i = 0; i < hitCount; i++) { reader.Document(hits[i].Doc); reader.GetTermVectors(hits[i].Doc); } }
/// <summary> /// Executes the search query build by the parser or another source. /// </summary> /// <param name="query">Query to execute on the index</param> /// <param name="indexReader">The reader used to read from the index</param> /// <returns>Returns the found search results</returns> private IEnumerable<SearchHit> PerformSearchQuery(Query query, IndexReader indexReader) { var foundItems = new List<SearchHit>(); IndexSearcher searcher = new IndexSearcher(indexReader); var results = searcher.Search(query, Int16.MaxValue); foreach (var scoreDoc in results.ScoreDocs) { var document = indexReader.Document(scoreDoc.Doc); // Collect the search results from the index. foundItems.Add(new SearchHit() { Id = Int32.Parse(document.Get("id")), Name = document.Get("recipe-name"), Description = document.Get("recipe-description") }); } return foundItems; }
public override Document Document(int n, FieldSelector fieldSelector) { EnsureOpen(); return(in_Renamed.Document(n, fieldSelector)); }
//convenience method public static TokenStream GetTokenStream(IndexReader reader, int docId, String field, Analyzer analyzer) { Document doc = reader.Document(docId); return GetTokenStream(doc, field, analyzer); }
// There are two ways we can determine the max_results // most recent items: // // One is to instantiate Lucene documents for each of // the document IDs in primary_matches. This is a // fairly expensive operation. // // The other is to walk through the list of all // document IDs in descending time order. This is // a less expensive operation, but adds up over time // on large data sets. // // We can walk about 2.5 docs for every Document we // instantiate. So what we'll do, if we have more // matches than available hits, is walk (m * 1.25) // docs to see if we can fill out the top 100 hits. // If not, we'll fall back to creating documents // for all of them. private static ArrayList ScanRecentDocs (IndexReader primary_reader, IndexReader secondary_reader, BetterBitArray primary_matches, Dictionary<int, Hit> hits_by_id, int max_results, ref int total_number_of_matches, HitFilter hit_filter, string index_name) { Stopwatch a = new Stopwatch (); a.Start (); TermDocs docs = primary_reader.TermDocs (); TermEnum enumerator = primary_reader.Terms (new Term ("InvertedTimestamp", String.Empty)); ArrayList results = new ArrayList (max_results); int docs_found = 0; int docs_walked = 0; int hit_filter_removed = 0; int max_docs = (int) (primary_matches.TrueCount * 1.25); Term term; TermDocs secondary_term_docs = null; if (secondary_reader != null) secondary_term_docs = secondary_reader.TermDocs (); do { term = enumerator.Term (); if (term.Field () != "InvertedTimestamp") break; docs.Seek (enumerator); while (docs.Next () && docs_found < max_results && docs_walked < max_docs) { int doc_id = docs.Doc (); if (primary_matches.Get (doc_id)) { Document doc = primary_reader.Document (doc_id); Hit hit = CreateHit (doc, secondary_reader, secondary_term_docs); // If we have a HitFilter, apply it. if (hit_filter != null && ! hit_filter (hit)) { if (Debug) Log.Debug ("Filtered out {0}", hit.Uri); hit_filter_removed ++; continue; } hits_by_id [doc_id] = hit; // Add the result, last modified first results.Add (hit); docs_found++; } docs_walked++; } } while (enumerator.Next () && docs_found < max_results && docs_walked < max_docs); docs.Close (); if (secondary_term_docs != null) secondary_term_docs.Close (); // If we've found all the docs we can return in a subset! // Fantastic, we've probably short circuited a slow search. if (docs_found != max_results) { // Otherwise bad luck! Not all docs found // Start afresh - this time traversing all results results = null; } else { // Adjust total_number_of_matches. We need to do this to avoid scenarios like the following: // max_hits = 100. Matched 100 results. But hit filter removed 30. So 70 results will be returned. // We want to avoid saying "Showing top 70 of 100". Note that since we are not passing // every document in the index through the hit_filter, when we say "Showing top 100 of 1234", the // 1234 could actually be much less. But since max_hits was 100, that will not mislead the user. total_number_of_matches -= hit_filter_removed; } a.Stop (); if (Debug) { Log.Debug (">>> {0}: Walked {1} items, populated an enum with {2} items in {3}", index_name, docs_walked, docs_found, a); if (docs_found == max_results) Log.Debug (">>> {0}: Successfully short circuited timestamp ordering!", index_name); } return results; }
private void CompareChildHits(IndexReader r, IndexReader joinR, TopDocs results, TopDocs joinResults) { assertEquals(results.TotalHits, joinResults.TotalHits); assertEquals(results.ScoreDocs.Length, joinResults.ScoreDocs.Length); for (int hitCount = 0; hitCount < results.ScoreDocs.Length; hitCount++) { ScoreDoc hit = results.ScoreDocs[hitCount]; ScoreDoc joinHit = joinResults.ScoreDocs[hitCount]; Document doc1 = r.Document(hit.Doc); Document doc2 = joinR.Document(joinHit.Doc); assertEquals("hit " + hitCount + " differs", doc1.Get("childID"), doc2.Get("childID")); // don't compare scores -- they are expected to differ assertTrue(hit is FieldDoc); assertTrue(joinHit is FieldDoc); FieldDoc hit0 = (FieldDoc)hit; FieldDoc joinHit0 = (FieldDoc)joinHit; assertArrayEquals(hit0.Fields, joinHit0.Fields); } }
private int CheckDbAndIndex(DbDataReader dbreader, IndexReader ixreader, List<Difference> result) { var versionId = dbreader.GetInt32(0); var dbNodeTimestamp = dbreader.GetInt64(1); var dbVersionTimestamp = dbreader.GetInt64(2); var termDocs = ixreader.TermDocs(new Lucene.Net.Index.Term(LucObject.FieldName.VersionId, Lucene.Net.Util.NumericUtils.IntToPrefixCoded(versionId))); Lucene.Net.Documents.Document doc = null; int docid = -1; if (termDocs.Next()) { docid = termDocs.Doc(); doc = ixreader.Document(docid); var indexNodeTimestamp = ParseLong(doc.Get(LucObject.FieldName.NodeTimestamp)); var indexVersionTimestamp = ParseLong(doc.Get(LucObject.FieldName.VersionTimestamp)); var nodeId = ParseInt(doc.Get(LucObject.FieldName.NodeId)); var version = doc.Get(LucObject.FieldName.Version); var p = doc.Get(LucObject.FieldName.Path); if (termDocs.Next()) { result.Add(new Difference(IndexDifferenceKind.MoreDocument) { DocId = docid, NodeId = nodeId, VersionId = versionId, Version = version, Path = p, DbNodeTimestamp = dbNodeTimestamp, DbVersionTimestamp = dbVersionTimestamp, IxNodeTimestamp = indexNodeTimestamp, IxVersionTimestamp = indexVersionTimestamp, }); } if (dbVersionTimestamp != indexVersionTimestamp) { result.Add(new Difference(IndexDifferenceKind.DifferentVersionTimestamp) { DocId = docid, VersionId = versionId, DbNodeTimestamp = dbNodeTimestamp, DbVersionTimestamp = dbVersionTimestamp, IxNodeTimestamp = indexNodeTimestamp, IxVersionTimestamp = indexVersionTimestamp, NodeId = nodeId, Version = version, Path = p }); } if (dbNodeTimestamp != indexNodeTimestamp) { var ok = false; var isLastDraft = doc.Get(LucObject.FieldName.IsLastDraft); if (isLastDraft != BooleanIndexHandler.YES) { var latestDocs = ixreader.TermDocs(new Lucene.Net.Index.Term(LucObject.FieldName.NodeId, Lucene.Net.Util.NumericUtils.IntToPrefixCoded(nodeId))); Lucene.Net.Documents.Document latestDoc = null; while (latestDocs.Next()) { var latestdocid = latestDocs.Doc(); var d = ixreader.Document(latestdocid); if (d.Get(LucObject.FieldName.IsLastDraft) != BooleanIndexHandler.YES) continue; latestDoc = d; break; } var latestPath = latestDoc.Get(LucObject.FieldName.Path); if (latestPath == p) ok = true; } if (!ok) { result.Add(new Difference(IndexDifferenceKind.DifferentNodeTimestamp) { DocId = docid, VersionId = versionId, DbNodeTimestamp = dbNodeTimestamp, DbVersionTimestamp = dbVersionTimestamp, IxNodeTimestamp = indexNodeTimestamp, IxVersionTimestamp = indexVersionTimestamp, NodeId = nodeId, Version = version, Path = p }); } } } else { result.Add(new Difference(IndexDifferenceKind.NotInIndex) { DocId = docid, VersionId = versionId, DbNodeTimestamp = dbNodeTimestamp, DbVersionTimestamp = dbVersionTimestamp, }); } return docid; }
private static Hit CreateHit ( Document primary_doc, IndexReader secondary_reader, TermDocs term_docs, FieldSelector fields) { Hit hit = DocumentToHit (primary_doc); if (secondary_reader == null) return hit; // Get the stringified version of the URI // exactly as it comes out of the index. Term term = new Term ("Uri", primary_doc.Get ("Uri")); term_docs.Seek (term); // Move to the first (and only) matching term doc term_docs.Next (); Document secondary_doc = (fields == null) ? secondary_reader.Document (term_docs.Doc ()) : secondary_reader.Document (term_docs.Doc (), fields); // If we are using the secondary index, now we need to // merge the properties from the secondary index AddPropertiesToHit (hit, secondary_doc, false); return hit; }
private void CheckContents(IndexReader ir, string indexname) { Bits liveDocs = MultiFields.GetLiveDocs(ir); for (int i = 0; i < ir.MaxDoc; i++) { if (liveDocs == null || liveDocs.Get(i)) { assertEquals(indexname, ir.Document(i).Get("indexname")); } } }
private int CopyFieldsWithDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader) { int docCount = 0; int maxDoc = reader.MaxDoc(); if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int j = 0; j < maxDoc; ) { if (reader.IsDeleted(j)) { // skip deleted docs ++j; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = j, numDocs = 0; do { j++; numDocs++; if (j >= maxDoc) break; if (reader.IsDeleted(j)) { j++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; checkAbort.Work(300 * numDocs); } } else { for (int j = 0; j < maxDoc; j++) { if (reader.IsDeleted(j)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to doc then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Document doc = reader.Document(j, fieldSelectorMerge); fieldsWriter.AddDocument(doc); docCount++; checkAbort.Work(300); } } return docCount; }
private void CompareHits(IndexReader r, IndexReader joinR, TopDocs results, TopGroups<int> joinResults) { // results is 'complete'; joinResults is a subset int resultUpto = 0; int joinGroupUpto = 0; ScoreDoc[] hits = results.ScoreDocs; IGroupDocs<int>[] groupDocs = joinResults.Groups; while (joinGroupUpto < groupDocs.Length) { IGroupDocs<int> group = groupDocs[joinGroupUpto++]; ScoreDoc[] groupHits = group.ScoreDocs; assertNotNull(group.GroupValue); Document parentDoc = joinR.Document(group.GroupValue); string parentID = parentDoc.Get("parentID"); //System.out.println("GROUP groupDoc=" + group.groupDoc + " parent=" + parentDoc); assertNotNull(parentID); assertTrue(groupHits.Length > 0); for (int hitIDX = 0; hitIDX < groupHits.Length; hitIDX++) { Document nonJoinHit = r.Document(hits[resultUpto++].Doc); Document joinHit = joinR.Document(groupHits[hitIDX].Doc); assertEquals(parentID, nonJoinHit.Get("parentID")); assertEquals(joinHit.Get("childID"), nonJoinHit.Get("childID")); } if (joinGroupUpto < groupDocs.Length) { // Advance non-join hit to the next parentID: //System.out.println(" next joingroupUpto=" + joinGroupUpto + " gd.Length=" + groupDocs.Length + " parentID=" + parentID); while (true) { assertTrue(resultUpto < hits.Length); if (!parentID.Equals(r.Document(hits[resultUpto].Doc).Get("parentID"))) { break; } resultUpto++; } } } }
private int CopyFieldsNoDeletions(FieldSelector fieldSelectorMerge, FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader) { int maxDoc = reader.MaxDoc(); int docCount = 0; if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" while (docCount < maxDoc) { int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len); fieldsWriter.AddRawDocuments(stream, rawDocLengths, len); docCount += len; checkAbort.Work(300 * len); } } else { for (; docCount < maxDoc; docCount++) { // NOTE: it's very important to first assign to doc then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Document doc = reader.Document(docCount, fieldSelectorMerge); fieldsWriter.AddDocument(doc); checkAbort.Work(300); } } return docCount; }
private void assertCompressedFields29(Directory dir, bool shouldStillBeCompressed) { int count = 0; int TEXT_PLAIN_LENGTH = TEXT_TO_COMPRESS.Length * 2; // FieldSelectorResult.SIZE returns 2*number_of_chars for String fields: int BINARY_PLAIN_LENGTH = BINARY_TO_COMPRESS.Length; IndexReader reader = IndexReader.Open(dir, true); try { // look into sub readers and check if raw merge is on/off var readers = new System.Collections.Generic.List <IndexReader>(); ReaderUtil.GatherSubReaders(readers, reader); foreach (IndexReader ir in readers) { FieldsReader fr = ((SegmentReader)ir).GetFieldsReader(); Assert.IsTrue(shouldStillBeCompressed != fr.CanReadRawDocs(), "for a 2.9 index, FieldsReader.canReadRawDocs() must be false and other way round for a trunk index"); } // test that decompression works correctly for (int i = 0; i < reader.MaxDoc; i++) { if (!reader.IsDeleted(i)) { Document d = reader.Document(i); if (d.Get("content3") != null) { continue; } count++; IFieldable compressed = d.GetFieldable("compressed"); if (int.Parse(d.Get("id")) % 2 == 0) { Assert.IsFalse(compressed.IsBinary); Assert.AreEqual(TEXT_TO_COMPRESS, compressed.StringValue, "incorrectly decompressed string"); } else { Assert.IsTrue(compressed.IsBinary); Assert.IsTrue(BINARY_TO_COMPRESS.SequenceEqual(compressed.GetBinaryValue()), "incorrectly decompressed binary"); } } } //check if field was decompressed after optimize for (int i = 0; i < reader.MaxDoc; i++) { if (!reader.IsDeleted(i)) { Document d = reader.Document(i, new AnonymousFieldSelector()); if (d.Get("content3") != null) { continue; } count++; // read the size from the binary value using BinaryReader (this prevents us from doing the shift ops ourselves): // ugh, Java uses Big-Endian streams, so we need to do it manually. byte[] encodedSize = d.GetFieldable("compressed").GetBinaryValue().Take(4).Reverse().ToArray(); int actualSize = BitConverter.ToInt32(encodedSize, 0); int compressedSize = int.Parse(d.Get("compressedSize")); bool binary = int.Parse(d.Get("id")) % 2 > 0; int shouldSize = shouldStillBeCompressed ? compressedSize : (binary ? BINARY_PLAIN_LENGTH : TEXT_PLAIN_LENGTH); Assert.AreEqual(shouldSize, actualSize, "size incorrect"); if (!shouldStillBeCompressed) { Assert.IsFalse(compressedSize == actualSize, "uncompressed field should have another size than recorded in index"); } } } Assert.AreEqual(34 * 2, count, "correct number of tests"); } finally { reader.Dispose(); } }
protected override Field[] GetFields(IndexReader reader, int docId, string fieldName) { var field = ContentFieldName ?? fieldName; var doc = reader.Document(docId, new MapFieldSelector(new[] {field})); return doc.GetFields(field); // according to Document class javadoc, this never returns null }
public virtual void searchIndex(System.String dirName, System.String oldName) { //QueryParser parser = new QueryParser("contents", new WhitespaceAnalyzer()); //Query query = parser.parse("handle:1"); dirName = FullDir(dirName); Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName)); IndexSearcher searcher = new IndexSearcher(dir, true); IndexReader reader = searcher.IndexReader; _TestUtil.CheckIndex(dir); for (int i = 0; i < 35; i++) { if (!reader.IsDeleted(i)) { Document d = reader.Document(i); var fields = d.GetFields(); if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22.")) { if (d.GetField("content3") == null) { int numFields = oldName.StartsWith("29.") ? 7 : 5; Assert.AreEqual(numFields, fields.Count); Field f = d.GetField("id"); Assert.AreEqual("" + i, f.StringValue); f = (Field)d.GetField("utf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue); f = (Field)d.GetField("autf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue); f = (Field)d.GetField("content2"); Assert.AreEqual("here is more content with aaa aaa aaa", f.StringValue); f = (Field)d.GetField("fie\u2C77ld"); Assert.AreEqual("field with non-ascii name", f.StringValue); } } } // Only ID 7 is deleted else { Assert.AreEqual(7, i); } } ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; // First document should be #21 since it's norm was // increased: Document d2 = searcher.Doc(hits[0].Doc); Assert.AreEqual("21", d2.Get("id"), "didn't get the right document first"); TestHits(hits, 34, searcher.IndexReader); if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22.")) { // Test on indices >= 2.3 hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); } searcher.Close(); dir.Close(); }
/// <summary> /// checks that stored fields of all documents are the same /// </summary> public void AssertStoredFieldsEquals(string info, IndexReader leftReader, IndexReader rightReader) { Debug.Assert(leftReader.MaxDoc == rightReader.MaxDoc); for (int i = 0; i < leftReader.MaxDoc; i++) { Document leftDoc = leftReader.Document(i); Document rightDoc = rightReader.Document(i); // TODO: I think this is bogus because we don't document what the order should be // from these iterators, etc. I think the codec/IndexReader should be free to order this stuff // in whatever way it wants (e.g. maybe it packs related fields together or something) // To fix this, we sort the fields in both documents by name, but // we still assume that all instances with same name are in order: Comparison<IndexableField> comp = (a, b) => String.Compare(a.Name(), b.Name(), StringComparison.Ordinal); leftDoc.Fields.Sort(comp); rightDoc.Fields.Sort(comp); var leftIterator = leftDoc.GetEnumerator(); var rightIterator = rightDoc.GetEnumerator(); while (leftIterator.MoveNext()) { Assert.IsTrue(rightIterator.MoveNext(), info); AssertStoredFieldEquals(info, leftIterator.Current, rightIterator.Current); } Assert.IsFalse(rightIterator.MoveNext(), info); } }
public static void VerifyEquals(IndexReader r1, IndexReader r2, System.String idField) { Assert.AreEqual(r1.NumDocs(), r2.NumDocs()); bool hasDeletes = !(r1.MaxDoc() == r2.MaxDoc() && r1.NumDocs() == r1.MaxDoc()); int[] r2r1 = new int[r2.MaxDoc()]; // r2 id to r1 id mapping TermDocs termDocs1 = r1.TermDocs(); TermDocs termDocs2 = r2.TermDocs(); // create mapping from id2 space to id2 based on idField idField = StringHelper.Intern(idField); TermEnum termEnum = r1.Terms(new Term(idField, "")); do { Term term = termEnum.Term(); if (term == null || (System.Object) term.Field() != (System.Object) idField) break; termDocs1.Seek(termEnum); if (!termDocs1.Next()) { // This doc is deleted and wasn't replaced termDocs2.Seek(termEnum); Assert.IsFalse(termDocs2.Next()); continue; } int id1 = termDocs1.Doc(); Assert.IsFalse(termDocs1.Next()); termDocs2.Seek(termEnum); Assert.IsTrue(termDocs2.Next()); int id2 = termDocs2.Doc(); Assert.IsFalse(termDocs2.Next()); r2r1[id2] = id1; // verify stored fields are equivalent try { VerifyEquals(r1.Document(id1), r2.Document(id2)); } catch (System.Exception t) { System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term); System.Console.Out.WriteLine(" d1=" + r1.Document(id1)); System.Console.Out.WriteLine(" d2=" + r2.Document(id2)); throw t; } try { // verify term vectors are equivalent VerifyEquals(r1.GetTermFreqVectors(id1), r2.GetTermFreqVectors(id2)); } catch (System.Exception e) { System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2); TermFreqVector[] tv1 = r1.GetTermFreqVectors(id1); System.Console.Out.WriteLine(" d1=" + tv1); if (tv1 != null) for (int i = 0; i < tv1.Length; i++) { System.Console.Out.WriteLine(" " + i + ": " + tv1[i]); } TermFreqVector[] tv2 = r2.GetTermFreqVectors(id2); System.Console.Out.WriteLine(" d2=" + tv2); if (tv2 != null) for (int i = 0; i < tv2.Length; i++) { System.Console.Out.WriteLine(" " + i + ": " + tv2[i]); } throw e; } } while (termEnum.Next()); termEnum.Close(); // Verify postings TermEnum termEnum1 = r1.Terms(new Term("", "")); TermEnum termEnum2 = r2.Terms(new Term("", "")); // pack both doc and freq into single element for easy sorting long[] info1 = new long[r1.NumDocs()]; long[] info2 = new long[r2.NumDocs()]; for (; ; ) { Term term1, term2; // iterate until we get some docs int len1; for (; ; ) { len1 = 0; term1 = termEnum1.Term(); if (term1 == null) break; termDocs1.Seek(termEnum1); while (termDocs1.Next()) { int d1 = termDocs1.Doc(); int f1 = termDocs1.Freq(); info1[len1] = (((long) d1) << 32) | f1; len1++; } if (len1 > 0) break; if (!termEnum1.Next()) break; } // iterate until we get some docs int len2; for (; ; ) { len2 = 0; term2 = termEnum2.Term(); if (term2 == null) break; termDocs2.Seek(termEnum2); while (termDocs2.Next()) { int d2 = termDocs2.Doc(); int f2 = termDocs2.Freq(); info2[len2] = (((long) r2r1[d2]) << 32) | f2; len2++; } if (len2 > 0) break; if (!termEnum2.Next()) break; } if (!hasDeletes) Assert.AreEqual(termEnum1.DocFreq(), termEnum2.DocFreq()); Assert.AreEqual(len1, len2); if (len1 == 0) break; // no more terms Assert.AreEqual(term1, term2); // sort info2 to get it into ascending docid System.Array.Sort(info2, 0, len2 - 0); // now compare for (int i = 0; i < len1; i++) { Assert.AreEqual(info1[i], info2[i]); } termEnum1.Next(); termEnum2.Next(); } }
// Runs test, with multiple threads, using the specific // failure to trigger an IOException public virtual void TestMultipleThreadsFailure(Func <IConcurrentMergeScheduler> newScheduler, Failure failure) { int NUM_THREADS = 3; for (int iter = 0; iter < 2; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } MockDirectoryWrapper dir = NewMockDirectory(); var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(2) .SetMergeScheduler(newScheduler()) .SetMergePolicy(NewLogMergePolicy(4)); IndexWriter writer = new IndexWriter(dir, config); var scheduler = config.mergeScheduler as IConcurrentMergeScheduler; if (scheduler != null) { scheduler.SetSuppressExceptions(); } IndexerThread[] threads = new IndexerThread[NUM_THREADS]; for (int i = 0; i < NUM_THREADS; i++) { threads[i] = new IndexerThread(writer, true, NewField); } for (int i = 0; i < NUM_THREADS; i++) { threads[i].Start(); } Thread.Sleep(10); dir.FailOn(failure); failure.SetDoFail(); for (int i = 0; i < NUM_THREADS; i++) { threads[i].Join(); Assert.IsTrue(threads[i].Error == null, "hit unexpected Throwable"); } bool success = false; try { writer.Dispose(false); success = true; } catch (IOException) { failure.ClearDoFail(); writer.Dispose(false); } if (VERBOSE) { Console.WriteLine("TEST: success=" + success); } if (success) { IndexReader reader = DirectoryReader.Open(dir); IBits delDocs = MultiFields.GetLiveDocs(reader); for (int j = 0; j < reader.MaxDoc; j++) { if (delDocs == null || !delDocs.Get(j)) { reader.Document(j); reader.GetTermVectors(j); } } reader.Dispose(); } dir.Dispose(); } }
private static ArrayList FindRecentResults (IndexReader primary_reader, IndexReader secondary_reader, BetterBitArray primary_matches, Dictionary<int, Hit> hits_by_id, int max_results, ref int total_number_of_matches, HitFilter hit_filter, string index_name) { Stopwatch b = new Stopwatch (); b.Start (); int count = 0; Document doc; ArrayList all_docs = null; TopScores top_docs = null; TermDocs term_docs = null; if (primary_matches.TrueCount > max_results) top_docs = new TopScores (max_results); else all_docs = new ArrayList (primary_matches.TrueCount); if (secondary_reader != null) term_docs = secondary_reader.TermDocs (); for (int match_index = primary_matches.Count; ; match_index --) { // Walk across the matches backwards, since newer // documents are more likely to be at the end of // the index. match_index = primary_matches.GetPreviousTrueIndex (match_index); if (match_index < 0) break; count++; doc = primary_reader.Document (match_index, fields_timestamp_uri); // Check the timestamp --- if we have already reached our // limit, we might be able to reject it immediately. string timestamp_str; long timestamp_num = 0; timestamp_str = doc.Get ("Timestamp"); if (timestamp_str == null) { Logger.Log.Warn ("No timestamp on {0}!", GetUriFromDocument (doc)); } else { timestamp_num = Int64.Parse (doc.Get ("Timestamp")); if (top_docs != null && ! top_docs.WillAccept (timestamp_num)) continue; } // Get the actual hit now // doc was created with only 2 fields, so first get the complete lucene document for primary document. // Also run our hit_filter now, if we have one. Since we insist of returning max_results // most recent hits, any hits that would be filtered out should happen now and not later. Hit hit = CreateHit (primary_reader.Document (match_index), secondary_reader, term_docs); if (hit_filter != null && ! hit_filter (hit)) { if (Debug) Log.Debug ("Filtered out {0}", hit.Uri); total_number_of_matches --; continue; } hits_by_id [match_index] = hit; // Add the document to the appropriate data structure. // We use the timestamp_num as the score, so high // scores correspond to more-recent timestamps. if (all_docs != null) all_docs.Add (hit); else top_docs.Add (timestamp_num, hit); } if (term_docs != null) term_docs.Close (); b.Stop (); if (Debug) Log.Debug (">>> {0}: Instantiated and scanned {1} documents in {2}", index_name, count, b); if (all_docs != null) { // Sort results before sending all_docs.Sort (); return all_docs; } else { return top_docs.TopScoringObjects; } }
public virtual void SearchIndex(Directory dir, string oldName) { //QueryParser parser = new QueryParser("contents", new MockAnalyzer(random)); //Query query = parser.parse("handle:1"); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); TestUtil.CheckIndex(dir); // true if this is a 4.0+ index bool is40Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("content5") != null; // true if this is a 4.2+ index bool is42Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("dvSortedSet") != null; Debug.Assert(is40Index); // NOTE: currently we can only do this on trunk! Bits liveDocs = MultiFields.GetLiveDocs(reader); for (int i = 0; i < 35; i++) { if (liveDocs.Get(i)) { Document d = reader.Document(i); IList <IndexableField> fields = d.Fields; bool isProxDoc = d.GetField("content3") == null; if (isProxDoc) { int numFields = is40Index ? 7 : 5; Assert.AreEqual(numFields, fields.Count); IndexableField f = d.GetField("id"); Assert.AreEqual("" + i, f.StringValue); f = d.GetField("utf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue); f = d.GetField("autf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue); f = d.GetField("content2"); Assert.AreEqual("here is more content with aaa aaa aaa", f.StringValue); f = d.GetField("fie\u2C77ld"); Assert.AreEqual("field with non-ascii name", f.StringValue); } Fields tfvFields = reader.GetTermVectors(i); Assert.IsNotNull(tfvFields, "i=" + i); Terms tfv = tfvFields.Terms("utf8"); Assert.IsNotNull(tfv, "docID=" + i + " index=" + oldName); } else { // Only ID 7 is deleted Assert.AreEqual(7, i); } } if (is40Index) { // check docvalues fields NumericDocValues dvByte = MultiDocValues.GetNumericValues(reader, "dvByte"); BinaryDocValues dvBytesDerefFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefFixed"); BinaryDocValues dvBytesDerefVar = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefVar"); SortedDocValues dvBytesSortedFixed = MultiDocValues.GetSortedValues(reader, "dvBytesSortedFixed"); SortedDocValues dvBytesSortedVar = MultiDocValues.GetSortedValues(reader, "dvBytesSortedVar"); BinaryDocValues dvBytesStraightFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightFixed"); BinaryDocValues dvBytesStraightVar = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightVar"); NumericDocValues dvDouble = MultiDocValues.GetNumericValues(reader, "dvDouble"); NumericDocValues dvFloat = MultiDocValues.GetNumericValues(reader, "dvFloat"); NumericDocValues dvInt = MultiDocValues.GetNumericValues(reader, "dvInt"); NumericDocValues dvLong = MultiDocValues.GetNumericValues(reader, "dvLong"); NumericDocValues dvPacked = MultiDocValues.GetNumericValues(reader, "dvPacked"); NumericDocValues dvShort = MultiDocValues.GetNumericValues(reader, "dvShort"); SortedSetDocValues dvSortedSet = null; if (is42Index) { dvSortedSet = MultiDocValues.GetSortedSetValues(reader, "dvSortedSet"); } for (int i = 0; i < 35; i++) { int id = Convert.ToInt32(reader.Document(i).Get("id")); Assert.AreEqual(id, dvByte.Get(i)); sbyte[] bytes = new sbyte[] { (sbyte)((int)((uint)id >> 24)), (sbyte)((int)((uint)id >> 16)), (sbyte)((int)((uint)id >> 8)), (sbyte)id }; BytesRef expectedRef = new BytesRef((byte[])(Array)bytes); BytesRef scratch = new BytesRef(); dvBytesDerefFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesDerefVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesSortedFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesSortedVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesStraightFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesStraightVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); Assert.AreEqual((double)id, BitConverter.Int64BitsToDouble(dvDouble.Get(i)), 0D); Assert.AreEqual((float)id, Number.IntBitsToFloat((int)dvFloat.Get(i)), 0F); Assert.AreEqual(id, dvInt.Get(i)); Assert.AreEqual(id, dvLong.Get(i)); Assert.AreEqual(id, dvPacked.Get(i)); Assert.AreEqual(id, dvShort.Get(i)); if (is42Index) { dvSortedSet.Document = i; long ord = dvSortedSet.NextOrd(); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dvSortedSet.NextOrd()); dvSortedSet.LookupOrd(ord, scratch); Assert.AreEqual(expectedRef, scratch); } } } ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; // First document should be #0 Document doc = searcher.IndexReader.Document(hits[0].Doc); assertEquals("didn't get the right document first", "0", doc.Get("id")); DoTestHits(hits, 34, searcher.IndexReader); if (is40Index) { hits = searcher.Search(new TermQuery(new Term("content5", "aaa")), null, 1000).ScoreDocs; DoTestHits(hits, 34, searcher.IndexReader); hits = searcher.Search(new TermQuery(new Term("content6", "aaa")), null, 1000).ScoreDocs; DoTestHits(hits, 34, searcher.IndexReader); } hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); reader.Dispose(); }
protected virtual Field[] GetFields(IndexReader reader, int docId, String fieldName) { // according to javadoc, doc.getFields(fieldName) cannot be used with lazy loaded field??? Document doc = reader.Document(docId, new MapFieldSelector(new String[] { fieldName })); return doc.GetFields(fieldName); // according to Document class javadoc, this never returns null }
public virtual void DoTestNumbers(bool withPayloads) { Directory dir = NewDirectory(); Analyzer analyzer = withPayloads ? (Analyzer) new MockPayloadAnalyzer() : new MockAnalyzer(Random); iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetMergePolicy(NewLogMergePolicy()); // will rely on docids a bit for skipping RandomIndexWriter w = new RandomIndexWriter(Random, dir, iwc); FieldType ft = new FieldType(TextField.TYPE_STORED); ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; if (Random.NextBoolean()) { ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = Random.NextBoolean(); ft.StoreTermVectorPositions = Random.NextBoolean(); } int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new Field("numbers", English.Int32ToEnglish(i), ft)); doc.Add(new Field("oddeven", (i % 2) == 0 ? "even" : "odd", ft)); doc.Add(new StringField("id", "" + i, Field.Store.NO)); w.AddDocument(doc); } IndexReader reader = w.GetReader(); w.Dispose(); string[] terms = new string[] { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "hundred" }; foreach (string term in terms) { DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef(term)); int doc; while ((doc = dp.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { string storedNumbers = reader.Document(doc).Get("numbers"); int freq = dp.Freq; for (int i = 0; i < freq; i++) { dp.NextPosition(); int start = dp.StartOffset; if (Debugging.AssertsEnabled) { Debugging.Assert(start >= 0); } int end = dp.EndOffset; if (Debugging.AssertsEnabled) { Debugging.Assert(end >= 0 && end >= start); } // check that the offsets correspond to the term in the src text Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals(term, StringComparison.Ordinal)); if (withPayloads) { // check that we have a payload and it starts with "pos" Assert.IsNotNull(dp.GetPayload()); BytesRef payload = dp.GetPayload(); Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:", StringComparison.Ordinal)); } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer! } } } // check we can skip correctly int numSkippingTests = AtLeast(50); for (int j = 0; j < numSkippingTests; j++) { int num = TestUtil.NextInt32(Random, 100, Math.Min(numDocs - 1, 999)); DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef("hundred")); int doc = dp.Advance(num); Assert.AreEqual(num, doc); int freq = dp.Freq; for (int i = 0; i < freq; i++) { string storedNumbers = reader.Document(doc).Get("numbers"); dp.NextPosition(); int start = dp.StartOffset; if (Debugging.AssertsEnabled) { Debugging.Assert(start >= 0); } int end = dp.EndOffset; if (Debugging.AssertsEnabled) { Debugging.Assert(end >= 0 && end >= start); } // check that the offsets correspond to the term in the src text Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals("hundred", StringComparison.Ordinal)); if (withPayloads) { // check that we have a payload and it starts with "pos" Assert.IsNotNull(dp.GetPayload()); BytesRef payload = dp.GetPayload(); Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:", StringComparison.Ordinal)); } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer! } } // check that other fields (without offsets) work correctly for (int i = 0; i < numDocs; i++) { DocsEnum dp = MultiFields.GetTermDocsEnum(reader, null, "id", new BytesRef("" + i), 0); Assert.AreEqual(i, dp.NextDoc()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc()); } reader.Dispose(); dir.Dispose(); }
protected virtual String[] GetFieldValues(IndexReader reader, int docId, String fieldName) { Document doc = reader.Document(docId, new MapFieldSelector(new String[] { fieldName })); return doc.GetValues(fieldName); // according to Document class javadoc, this never returns null }
private void ApplyFacetValueHit(FacetValue facetValue, Facet value, int docId, ParsedRange parsedRange, IndexReader indexReader) { facetValue.Hits++; if ( IndexQuery.IsDistinct == false && (value.Aggregation == FacetAggregation.Count || value.Aggregation == FacetAggregation.None) ) { return; } FacetValueState set; if (matches.TryGetValue(facetValue, out set) == false) { matches[facetValue] = set = new FacetValueState { Docs = new HashSet<int>(), Facet = value, Range = parsedRange }; } if (IndexQuery.IsDistinct) { if(IndexQuery.FieldsToFetch.Length == 0) throw new InvalidOperationException("Cannot process distinct facet query without specifying which fields to distinct upon."); if (set.AlreadySeen == null) set.AlreadySeen = new HashSet<StringCollectionValue>(); var document = indexReader.Document(docId); var fields = new List<string>(); foreach (var fieldName in IndexQuery.FieldsToFetch) { foreach (var field in document.GetFields(fieldName)) { if (field.StringValue == null) continue; fields.Add(field.StringValue); } } if (fields.Count == 0) throw new InvalidOperationException("Cannot apply distinct facet on [" + string.Join(", ", IndexQuery.FieldsToFetch) + "], did you forget to store them in the index? "); if (set.AlreadySeen.Add(new StringCollectionValue(fields)) == false) { facetValue.Hits--;// already seen, cancel this return; } } set.Docs.Add(docId); }
public static void AssertIndexEquals(IndexReader index1, IndexReader index2) { Assert.AreEqual(index1.NumDocs(), index2.NumDocs(), "IndexReaders have different values for numDocs."); Assert.AreEqual(index1.MaxDoc, index2.MaxDoc, "IndexReaders have different values for maxDoc."); Assert.AreEqual(index1.HasDeletions, index2.HasDeletions, "Only one IndexReader has deletions."); Assert.AreEqual(index1.IsOptimized(), index2.IsOptimized(), "Only one index is optimized."); // check field names System.Collections.Generic.ICollection<string> fieldsNames1 = index1.GetFieldNames(FieldOption.ALL); System.Collections.Generic.ICollection<string> fieldsNames2 = index1.GetFieldNames(FieldOption.ALL); System.Collections.Generic.ICollection<IFieldable> fields1 = null; System.Collections.Generic.ICollection<IFieldable> fields2 = null; Assert.AreEqual(fieldsNames1.Count, fieldsNames2.Count, "IndexReaders have different numbers of fields."); System.Collections.IEnumerator it1 = fieldsNames1.GetEnumerator(); System.Collections.IEnumerator it2 = fieldsNames2.GetEnumerator(); while (it1.MoveNext() && it2.MoveNext()) { Assert.AreEqual((System.String) it1.Current, (System.String) it2.Current, "Different field names."); } // check norms it1 = fieldsNames1.GetEnumerator(); while (it1.MoveNext()) { System.String curField = (System.String) it1.Current; byte[] norms1 = index1.Norms(curField); byte[] norms2 = index2.Norms(curField); if (norms1 != null && norms2 != null) { Assert.AreEqual(norms1.Length, norms2.Length); for (int i = 0; i < norms1.Length; i++) { Assert.AreEqual(norms1[i], norms2[i], "Norm different for doc " + i + " and field '" + curField + "'."); } } else { Assert.AreSame(norms1, norms2); } } // check deletions for (int i = 0; i < index1.MaxDoc; i++) { Assert.AreEqual(index1.IsDeleted(i), index2.IsDeleted(i), "Doc " + i + " only deleted in one index."); } // check stored fields for (int i = 0; i < index1.MaxDoc; i++) { if (!index1.IsDeleted(i)) { Document doc1 = index1.Document(i); Document doc2 = index2.Document(i); fields1 = doc1.GetFields(); fields2 = doc2.GetFields(); Assert.AreEqual(fields1.Count, fields2.Count, "Different numbers of fields for doc " + i + "."); it1 = fields1.GetEnumerator(); it2 = fields2.GetEnumerator(); while (it1.MoveNext() && it2.MoveNext()) { Field curField1 = (Field) it1.Current; Field curField2 = (Field) it2.Current; Assert.AreEqual(curField1.Name, curField2.Name, "Different fields names for doc " + i + "."); Assert.AreEqual(curField1.StringValue, curField2.StringValue, "Different field values for doc " + i + "."); } } } // check dictionary and posting lists TermEnum enum1 = index1.Terms(); TermEnum enum2 = index2.Terms(); TermPositions tp1 = index1.TermPositions(); TermPositions tp2 = index2.TermPositions(); while (enum1.Next()) { Assert.IsTrue(enum2.Next()); Assert.AreEqual(enum1.Term, enum2.Term, "Different term in dictionary."); tp1.Seek(enum1.Term); tp2.Seek(enum1.Term); while (tp1.Next()) { Assert.IsTrue(tp2.Next()); Assert.AreEqual(tp1.Doc, tp2.Doc, "Different doc id in postinglist of term " + enum1.Term + "."); Assert.AreEqual(tp1.Freq, tp2.Freq, "Different term frequence in postinglist of term " + enum1.Term + "."); for (int i = 0; i < tp1.Freq; i++) { Assert.AreEqual(tp1.NextPosition(), tp2.NextPosition(), "Different positions in postinglist of term " + enum1.Term + "."); } } } }