public virtual void TestNewestSegment_Mem() { Directory directory = NewDirectory(); IndexWriter writer = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); Assert.IsNull(writer.NewestSegment()); writer.Dispose(); directory.Dispose(); }
public virtual void TestAddDocument() { Document testDoc = new Document(); DocHelper.SetupDoc(testDoc); IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); writer.AddDocument(testDoc); writer.Commit(); SegmentCommitInfo info = writer.NewestSegment(); writer.Dispose(); //After adding the document, we should be able to read it back in SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random())); Assert.IsTrue(reader != null); Document doc = reader.Document(0); Assert.IsTrue(doc != null); //System.out.println("Document: " + doc); IndexableField[] fields = doc.GetFields("textField2"); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue.Equals(DocHelper.FIELD_2_TEXT)); Assert.IsTrue(fields[0].FieldType().StoreTermVectors); fields = doc.GetFields("textField1"); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue.Equals(DocHelper.FIELD_1_TEXT)); Assert.IsFalse(fields[0].FieldType().StoreTermVectors); fields = doc.GetFields("keyField"); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue.Equals(DocHelper.KEYWORD_TEXT)); fields = doc.GetFields(DocHelper.NO_NORMS_KEY); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue.Equals(DocHelper.NO_NORMS_TEXT)); fields = doc.GetFields(DocHelper.TEXT_FIELD_3_KEY); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue.Equals(DocHelper.FIELD_3_TEXT)); // test that the norms are not present in the segment if // omitNorms is true foreach (FieldInfo fi in reader.FieldInfos) { if (fi.Indexed) { Assert.IsTrue(fi.OmitsNorms() == (reader.GetNormValues(fi.Name) == null)); } } reader.Dispose(); }
private SegmentInfo IndexDoc(IndexWriter writer, System.String fileName) { System.IO.FileInfo file = new System.IO.FileInfo(System.IO.Path.Combine(workDir.FullName, fileName)); Document doc = FileDocument.Document(file); writer.AddDocument(doc); writer.Flush(); return writer.NewestSegment(); }
private SegmentInfo IndexDoc(IndexWriter writer, System.String fileName) { System.IO.DirectoryInfo file = new System.IO.DirectoryInfo(System.IO.Path.Combine(workDir.FullName, fileName)); Document doc = FileDocument.Document(file); doc.Add(new Field("contents", new System.IO.StreamReader(file.FullName))); writer.AddDocument(doc); writer.Commit(); return writer.NewestSegment(); }
/// <summary> /// Writes the document to the directory using the analyzer /// and the similarity score; returns the SegmentInfo /// describing the new segment /// </summary> public static SegmentCommitInfo WriteDoc(Random random, Directory dir, Analyzer analyzer, Similarity similarity, Document doc) { IndexWriter writer = new IndexWriter(dir, (new IndexWriterConfig(Util.LuceneTestCase.TEST_VERSION_CURRENT, analyzer)).SetSimilarity(similarity ?? IndexSearcher.DefaultSimilarity)); // LuceneTestCase.newIndexWriterConfig(random, //writer.SetNoCFSRatio(0.0); writer.AddDocument(doc); writer.Commit(); SegmentCommitInfo info = writer.NewestSegment(); writer.Dispose(); return info; }
public virtual void TestPreAnalyzedField() { IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("preanalyzed", new AnonymousClassTokenStream(this), TermVector.NO)); writer.AddDocument(doc); writer.Flush(); SegmentInfo info = writer.NewestSegment(); writer.Close(); SegmentReader reader = SegmentReader.Get(info); TermPositions termPositions = reader.TermPositions(new Term("preanalyzed", "term1")); Assert.IsTrue(termPositions.Next()); Assert.AreEqual(1, termPositions.Freq()); Assert.AreEqual(0, termPositions.NextPosition()); termPositions.Seek(new Term("preanalyzed", "term2")); Assert.IsTrue(termPositions.Next()); Assert.AreEqual(2, termPositions.Freq()); Assert.AreEqual(1, termPositions.NextPosition()); Assert.AreEqual(3, termPositions.NextPosition()); termPositions.Seek(new Term("preanalyzed", "term3")); Assert.IsTrue(termPositions.Next()); Assert.AreEqual(1, termPositions.Freq()); Assert.AreEqual(2, termPositions.NextPosition()); }
private SegmentCommitInfo IndexDoc(IndexWriter writer, string fileName) { FileInfo file = new FileInfo(Path.Combine(WorkDir.FullName, fileName)); Document doc = new Document(); StreamReader @is = new StreamReader(file.FullName); doc.Add(new TextField("contents", @is)); writer.AddDocument(doc); writer.Commit(); @is.Dispose(); return writer.NewestSegment(); }
public virtual void TestPositionIncrementGap() { Analyzer analyzer = new AnonymousClassAnalyzer(this); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("repeated", "repeated one", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("repeated", "repeated two", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Flush(); SegmentInfo info = writer.NewestSegment(); writer.Close(); SegmentReader reader = SegmentReader.Get(info); TermPositions termPositions = reader.TermPositions(new Term("repeated", "repeated")); Assert.IsTrue(termPositions.Next()); int freq = termPositions.Freq(); Assert.AreEqual(2, freq); Assert.AreEqual(0, termPositions.NextPosition()); Assert.AreEqual(502, termPositions.NextPosition()); }
public override void SetUp() { base.SetUp(); /* * for (int i = 0; i < testFields.Length; i++) { * fieldInfos.Add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]); * } */ Array.Sort(TestTerms); int tokenUpto = 0; for (int i = 0; i < TestTerms.Length; i++) { Positions[i] = new int[TERM_FREQ]; // first position must be 0 for (int j = 0; j < TERM_FREQ; j++) { // positions are always sorted in increasing order Positions[i][j] = (int)(j * 10 + new Random(1).NextDouble() * 10); TestToken token = Tokens[tokenUpto++] = new TestToken(this); token.Text = TestTerms[i]; token.Pos = Positions[i][j]; token.StartOffset = j * 10; token.EndOffset = j * 10 + TestTerms[i].Length; } } Array.Sort(Tokens); Dir = NewDirectory(); IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MyAnalyzer(this)).SetMaxBufferedDocs(-1).SetMergePolicy(NewLogMergePolicy(false, 10)).SetUseCompoundFile(false)); Document doc = new Document(); for (int i = 0; i < TestFields.Length; i++) { FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); if (TestFieldsStorePos[i] && TestFieldsStoreOff[i]) { customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorOffsets = true; } else if (TestFieldsStorePos[i] && !TestFieldsStoreOff[i]) { customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; } else if (!TestFieldsStorePos[i] && TestFieldsStoreOff[i]) { customType.StoreTermVectors = true; customType.StoreTermVectorOffsets = true; } else { customType.StoreTermVectors = true; } doc.Add(new Field(TestFields[i], "", customType)); } //Create 5 documents for testing, they all have the same //terms for (int j = 0; j < 5; j++) { writer.AddDocument(doc); } writer.Commit(); Seg = writer.NewestSegment(); writer.Dispose(); FieldInfos = SegmentReader.ReadFieldInfos(Seg); }
public void TestNewestSegment_Renamed() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); Assert.IsNull(writer.NewestSegment()); }
public virtual void TestTokenReuse() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this); IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); doc.Add(NewTextField("f1", "a 5 a a", Field.Store.YES)); writer.AddDocument(doc); writer.Commit(); SegmentCommitInfo info = writer.NewestSegment(); writer.Dispose(); SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random())); DocsAndPositionsEnum termPositions = MultiFields.GetTermPositionsEnum(reader, reader.LiveDocs, "f1", new BytesRef("a")); Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = termPositions.Freq(); Assert.AreEqual(3, freq); Assert.AreEqual(0, termPositions.NextPosition()); Assert.IsNotNull(termPositions.Payload); Assert.AreEqual(6, termPositions.NextPosition()); Assert.IsNull(termPositions.Payload); Assert.AreEqual(7, termPositions.NextPosition()); Assert.IsNull(termPositions.Payload); reader.Dispose(); }
public virtual void TestPreAnalyzedField() { IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); Document doc = new Document(); doc.Add(new TextField("preanalyzed", new TokenStreamAnonymousInnerClassHelper(this))); writer.AddDocument(doc); writer.Commit(); SegmentCommitInfo info = writer.NewestSegment(); writer.Dispose(); SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random())); DocsAndPositionsEnum termPositions = reader.TermPositionsEnum(new Term("preanalyzed", "term1")); Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(1, termPositions.Freq()); Assert.AreEqual(0, termPositions.NextPosition()); termPositions = reader.TermPositionsEnum(new Term("preanalyzed", "term2")); Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(2, termPositions.Freq()); Assert.AreEqual(1, termPositions.NextPosition()); Assert.AreEqual(3, termPositions.NextPosition()); termPositions = reader.TermPositionsEnum(new Term("preanalyzed", "term3")); Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(1, termPositions.Freq()); Assert.AreEqual(2, termPositions.NextPosition()); reader.Dispose(); }
public virtual void TestPositionIncrementGap() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this); IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); doc.Add(NewTextField("repeated", "repeated one", Field.Store.YES)); doc.Add(NewTextField("repeated", "repeated two", Field.Store.YES)); writer.AddDocument(doc); writer.Commit(); SegmentCommitInfo info = writer.NewestSegment(); writer.Dispose(); SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random())); DocsAndPositionsEnum termPositions = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "repeated", new BytesRef("repeated")); Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = termPositions.Freq(); Assert.AreEqual(2, freq); Assert.AreEqual(0, termPositions.NextPosition()); Assert.AreEqual(502, termPositions.NextPosition()); reader.Dispose(); }
public override void SetUp() { base.SetUp(); /* for (int i = 0; i < testFields.length; i++) { fieldInfos.add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]); } */ System.Array.Sort(testTerms); int tokenUpto = 0; for (int i = 0; i < testTerms.Length; i++) { positions[i] = new int[TERM_FREQ]; offsets[i] = new TermVectorOffsetInfo[TERM_FREQ]; // first position must be 0 for (int j = 0; j < TERM_FREQ; j++) { // positions are always sorted in increasing order positions[i][j] = (int) (j * 10 + (new System.Random().NextDouble()) * 10); // offsets are always sorted in increasing order offsets[i][j] = new TermVectorOffsetInfo(j * 10, j * 10 + testTerms[i].Length); TestToken token = tokens[tokenUpto++] = new TestToken(this); token.text = testTerms[i]; token.pos = positions[i][j]; token.startOffset = offsets[i][j].GetStartOffset(); token.endOffset = offsets[i][j].GetEndOffset(); } } System.Array.Sort(tokens); IndexWriter writer = new IndexWriter(dir, new MyAnalyzer(this), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(false); Document doc = new Document(); for (int i = 0; i < testFields.Length; i++) { Field.TermVector tv; if (testFieldsStorePos[i] && testFieldsStoreOff[i]) tv = Field.TermVector.WITH_POSITIONS_OFFSETS; else if (testFieldsStorePos[i] && !testFieldsStoreOff[i]) tv = Field.TermVector.WITH_POSITIONS; else if (!testFieldsStorePos[i] && testFieldsStoreOff[i]) tv = Field.TermVector.WITH_OFFSETS; else tv = Field.TermVector.YES; doc.Add(new Field(testFields[i], "", Field.Store.NO, Field.Index.ANALYZED, tv)); } //Create 5 documents for testing, they all have the same //terms for (int j = 0; j < 5; j++) writer.AddDocument(doc); writer.Flush(); seg = writer.NewestSegment().name; writer.Close(); fieldInfos = new FieldInfos(dir, seg + "." + IndexFileNames.FIELD_INFOS_EXTENSION); }
public virtual void TestLazyPerformance() { System.String tmpIODir = SupportClass.AppSettings.Get("tempDir", ""); System.String userName = System.Environment.UserName; System.String path = tmpIODir + System.IO.Path.DirectorySeparatorChar.ToString() + "lazyDir" + userName; System.IO.FileInfo file = new System.IO.FileInfo(path); _TestUtil.RmDir(file); FSDirectory tmpDir = FSDirectory.GetDirectory(file); Assert.IsTrue(tmpDir != null); IndexWriter writer = new IndexWriter(tmpDir, new WhitespaceAnalyzer(), true); writer.SetUseCompoundFile(false); writer.AddDocument(testDoc); writer.Close(); segmentName = writer.NewestSegment().name; Assert.IsTrue(fieldInfos != null); FieldsReader reader; long lazyTime = 0; long regularTime = 0; int length = 50; System.Collections.Hashtable lazyFieldNames = new System.Collections.Hashtable(); lazyFieldNames.Add(DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LARGE_LAZY_FIELD_KEY); SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(new System.Collections.Hashtable(), lazyFieldNames); for (int i = 0; i < length; i++) { reader = new FieldsReader(tmpDir, segmentName, fieldInfos); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); Lucene.Net.Documents.Document doc; doc = reader.Doc(0, null); //Load all of them Assert.IsTrue(doc != null, "doc is null and it shouldn't be"); Fieldable field = doc.GetFieldable(DocHelper.LARGE_LAZY_FIELD_KEY); Assert.IsTrue(field.IsLazy() == false, "field is lazy"); System.String value_Renamed; long start; long finish; start = System.DateTime.Now.Millisecond; //On my machine this was always 0ms. value_Renamed = field.StringValue(); finish = System.DateTime.Now.Millisecond; Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); Assert.IsTrue(field != null, "field is null and it shouldn't be"); regularTime += (finish - start); reader.Close(); reader = null; doc = null; //Hmmm, are we still in cache??? System.GC.Collect(); reader = new FieldsReader(tmpDir, segmentName, fieldInfos); doc = reader.Doc(0, fieldSelector); field = doc.GetFieldable(DocHelper.LARGE_LAZY_FIELD_KEY); Assert.IsTrue(field.IsLazy() == true, "field is not lazy"); start = System.DateTime.Now.Millisecond; //On my machine this took around 50 - 70ms value_Renamed = field.StringValue(); finish = System.DateTime.Now.Millisecond; Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); lazyTime += (finish - start); reader.Close(); } System.Console.Out.WriteLine("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads"); System.Console.Out.WriteLine("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads"); }
public virtual void TestAddDocument() { Document testDoc = new Document(); DocHelper.SetupDoc(testDoc); Analyzer analyzer = new WhitespaceAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(testDoc); writer.Flush(); SegmentInfo info = writer.NewestSegment(); writer.Close(); //After adding the document, we should be able to read it back in SegmentReader reader = SegmentReader.Get(info); Assert.IsTrue(reader != null); Document doc = reader.Document(0); Assert.IsTrue(doc != null); //System.out.println("Document: " + doc); Fieldable[] fields = doc.GetFields("textField2"); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.FIELD_2_TEXT)); Assert.IsTrue(fields[0].IsTermVectorStored()); fields = doc.GetFields("textField1"); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.FIELD_1_TEXT)); Assert.IsFalse(fields[0].IsTermVectorStored()); fields = doc.GetFields("keyField"); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.KEYWORD_TEXT)); fields = doc.GetFields(DocHelper.NO_NORMS_KEY); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.NO_NORMS_TEXT)); fields = doc.GetFields(DocHelper.TEXT_FIELD_3_KEY); Assert.IsTrue(fields != null && fields.Length == 1); Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.FIELD_3_TEXT)); // test that the norms are not present in the segment if // omitNorms is true for (int i = 0; i < reader.core_ForNUnit.fieldInfos_ForNUnit.Size(); i++) { FieldInfo fi = reader.core_ForNUnit.fieldInfos_ForNUnit.FieldInfo(i); if (fi.isIndexed_ForNUnit) { Assert.IsTrue(fi.omitNorms_ForNUnit == !reader.HasNorms(fi.name_ForNUnit)); } } }
public override void SetUp() { base.SetUp(); fieldInfos = new FieldInfos(); DocHelper.SetupDoc(testDoc); fieldInfos.Add(testDoc); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetUseCompoundFile(false); writer.AddDocument(testDoc); writer.Close(); segmentName = writer.NewestSegment().name; }
public virtual void TestTokenReuse() { Analyzer analyzer = new AnonymousClassAnalyzer1(this); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("f1", "a 5 a a", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Flush(); SegmentInfo info = writer.NewestSegment(); writer.Close(); SegmentReader reader = SegmentReader.Get(info); TermPositions termPositions = reader.TermPositions(new Term("f1", "a")); Assert.IsTrue(termPositions.Next()); int freq = termPositions.Freq(); Assert.AreEqual(3, freq); Assert.AreEqual(0, termPositions.NextPosition()); Assert.AreEqual(true, termPositions.IsPayloadAvailable()); Assert.AreEqual(6, termPositions.NextPosition()); Assert.AreEqual(false, termPositions.IsPayloadAvailable()); Assert.AreEqual(7, termPositions.NextPosition()); Assert.AreEqual(false, termPositions.IsPayloadAvailable()); }
/// <summary> Writes the document to the directory using the analyzer /// and the similarity score; returns the SegmentInfo /// describing the new segment /// </summary> /// <param name="dir"> /// </param> /// <param name="analyzer"> /// </param> /// <param name="similarity"> /// </param> /// <param name="doc"> /// </param> /// <throws> IOException </throws> public static SegmentInfo WriteDoc(Directory dir, Analyzer analyzer, Similarity similarity, Document doc) { IndexWriter writer = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED); writer.SetSimilarity(similarity); //writer.setUseCompoundFile(false); writer.AddDocument(doc); writer.Flush(); SegmentInfo info = writer.NewestSegment(); writer.Close(); return info; }
public virtual void TestLiveChangeToCFS() { Directory dir = NewDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergePolicy(NewLogMergePolicy(true)); // Start false: iwc.SetUseCompoundFile(false); iwc.MergePolicy.NoCFSRatio = 0.0d; IndexWriter w = new IndexWriter(dir, iwc); // Change to true: w.Config.SetUseCompoundFile(true); Document doc = new Document(); doc.Add(NewStringField("field", "foo", Store.NO)); w.AddDocument(doc); w.Commit(); Assert.IsTrue(w.NewestSegment().Info.UseCompoundFile, "Expected CFS after commit"); doc.Add(NewStringField("field", "foo", Store.NO)); w.AddDocument(doc); w.Commit(); w.ForceMerge(1); w.Commit(); // no compound files after merge Assert.IsFalse(w.NewestSegment().Info.UseCompoundFile, "Expected Non-CFS after merge"); MergePolicy lmp = w.Config.MergePolicy; lmp.NoCFSRatio = 1.0; lmp.MaxCFSSegmentSizeMB = double.PositiveInfinity; w.AddDocument(doc); w.ForceMerge(1); w.Commit(); Assert.IsTrue(w.NewestSegment().Info.UseCompoundFile, "Expected CFS after merge"); w.Dispose(); dir.Dispose(); }
public override void SetUp() { base.SetUp(); /* * for (int i = 0; i < testFields.length; i++) { * fieldInfos.add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]); * } */ System.Array.Sort(testTerms); int tokenUpto = 0; for (int i = 0; i < testTerms.Length; i++) { positions[i] = new int[TERM_FREQ]; offsets[i] = new TermVectorOffsetInfo[TERM_FREQ]; // first position must be 0 for (int j = 0; j < TERM_FREQ; j++) { // positions are always sorted in increasing order positions[i][j] = (int)(j * 10 + (new System.Random().NextDouble()) * 10); // offsets are always sorted in increasing order offsets[i][j] = new TermVectorOffsetInfo(j * 10, j * 10 + testTerms[i].Length); TestToken token = tokens[tokenUpto++] = new TestToken(this); token.text = testTerms[i]; token.pos = positions[i][j]; token.startOffset = offsets[i][j].StartOffset; token.endOffset = offsets[i][j].EndOffset; } } System.Array.Sort(tokens); IndexWriter writer = new IndexWriter(dir, new MyAnalyzer(this), true, IndexWriter.MaxFieldLength.LIMITED, null); writer.UseCompoundFile = false; Document doc = new Document(); for (int i = 0; i < testFields.Length; i++) { Field.TermVector tv; if (testFieldsStorePos[i] && testFieldsStoreOff[i]) { tv = Field.TermVector.WITH_POSITIONS_OFFSETS; } else if (testFieldsStorePos[i] && !testFieldsStoreOff[i]) { tv = Field.TermVector.WITH_POSITIONS; } else if (!testFieldsStorePos[i] && testFieldsStoreOff[i]) { tv = Field.TermVector.WITH_OFFSETS; } else { tv = Field.TermVector.YES; } doc.Add(new Field(testFields[i], "", Field.Store.NO, Field.Index.ANALYZED, tv)); } //Create 5 documents for testing, they all have the same //terms for (int j = 0; j < 5; j++) { writer.AddDocument(doc, null); } writer.Commit(null); seg = writer.NewestSegment().name; writer.Close(); fieldInfos = new FieldInfos(dir, seg + "." + IndexFileNames.FIELD_INFOS_EXTENSION, null); }