void Index() { Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(dir, new Lucene.Net.Analysis.WhitespaceAnalyzer(), Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED); Lucene.Net.Documents.Document doc = null; Lucene.Net.Documents.Field f = null; doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b c d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b a d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b e f", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "x y z", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); wr.Close(); }
protected internal static Document Doc(Field[] fields) { Document doc = new Document(); for (int i = 0; i < fields.Length; i++) { doc.Add(fields[i]); } return doc; }
public virtual void TestFlushExceptions() { MockRAMDirectory directory = new MockRAMDirectory(); FailOnlyOnFlush failure = new FailOnlyOnFlush(); directory.FailOn(failure); IndexWriter writer = new IndexWriter(directory, true, ANALYZER, true); ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); writer.SetMergeScheduler(cms); writer.SetMaxBufferedDocs(2); Document doc = new Document(); Field idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(idField); for (int i = 0; i < 10; i++) { for (int j = 0; j < 20; j++) { idField.SetValue(System.Convert.ToString(i * 20 + j)); writer.AddDocument(doc); } writer.AddDocument(doc); failure.SetDoFail(); try { writer.Flush(); Assert.Fail("failed to hit IOException"); } catch (System.IO.IOException ioe) { failure.ClearDoFail(); } } writer.Close(); IndexReader reader = IndexReader.Open(directory); Assert.AreEqual(200, reader.NumDocs()); reader.Close(); directory.Close(); }
private static void AssignFieldValues(SearchResult result, Field uriField, List<SitecoreItem> items) { var itemInfo = new SitecoreItem(new ItemUri(uriField.StringValue())); foreach (Field field in result.Document.GetFields()) { itemInfo.Fields[field.Name()] = field.StringValue(); } items.Add(itemInfo); }
private static Document MakeDocument(System.String docText) { Document doc = new Document(); Field f = new Field("f", docText, Field.Store.NO, Field.Index.ANALYZED); f.SetOmitNorms(true); doc.Add(f); return doc; }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetSimilarity(similarity); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); Field noPayloadField = new Field(PayloadHelper.NO_PAYLOAD_FIELD, English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED); //noPayloadField.setBoost(0); doc.Add(noPayloadField); doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("multiField", English.IntToEnglish(i) + " " + English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory); searcher.SetSimilarity(similarity); }
private void AddNoProxDoc(IndexWriter writer) { Document doc = new Document(); Field f = new Field("content3", "aaa", Field.Store.YES, Field.Index.ANALYZED); f.SetOmitTf(true); doc.Add(f); f = new Field("content4", "aaa", Field.Store.YES, Field.Index.NO); f.SetOmitTf(true); doc.Add(f); writer.AddDocument(doc); }
public virtual void TestMixedRAM() { Directory ram = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT); IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(10); writer.MergeFactor = 2; Document d = new Document(); // this field will have Tf Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); d.Add(f1); // this field will NOT have Tf Field f2 = new Field("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED); d.Add(f2); for (int i = 0; i < 5; i++) writer.AddDocument(d); f2.OmitTermFreqAndPositions = true; for (int i = 0; i < 20; i++) writer.AddDocument(d); // force merge writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(ram); SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram); FieldInfos fi = reader.FieldInfos(); Assert.IsTrue(!fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set."); reader.Close(); ram.Close(); }
private void AddDoc(IndexWriter writer, System.String text) { Document d = new Document(); Field f = new Field(FIELD_NAME, text, Field.Store.YES, Field.Index.TOKENIZED); d.Add(f); writer.AddDocument(d); }
public virtual void TestTermVectorCorruption3() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, false, new StandardAnalyzer()); writer.SetMaxBufferedDocs(2); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); writer.SetMergeScheduler(new SerialMergeScheduler()); writer.SetMergePolicy(new LogDocMergePolicy()); Document document = new Document(); document = new Document(); Field storedField = new Field("stored", "stored", Field.Store.YES, Field.Index.NO); document.Add(storedField); Field termVectorField = new Field("termVector", "termVector", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(termVectorField); for (int i = 0; i < 10; i++) writer.AddDocument(document); writer.Close(); writer = new IndexWriter(dir, false, new StandardAnalyzer()); writer.SetMaxBufferedDocs(2); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); writer.SetMergeScheduler(new SerialMergeScheduler()); writer.SetMergePolicy(new LogDocMergePolicy()); for (int i = 0; i < 6; i++) writer.AddDocument(document); writer.Optimize(); writer.Close(); IndexReader reader = IndexReader.Open(dir); for (int i = 0; i < 10; i++) { reader.GetTermFreqVectors(i); reader.Document(i); } reader.Close(); dir.Close(); }
public virtual void TestNoWaitClose() { RAMDirectory directory = new MockRAMDirectory(); Document doc = new Document(); Field idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(idField); for (int pass = 0; pass < 2; pass++) { bool autoCommit = pass == 0; IndexWriter writer = new IndexWriter(directory, autoCommit, ANALYZER, true); for (int iter = 0; iter < 10; iter++) { ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); writer.SetMergeScheduler(cms); writer.SetMaxBufferedDocs(2); writer.SetMergeFactor(100); for (int j = 0; j < 201; j++) { idField.SetValue(System.Convert.ToString(iter * 201 + j)); writer.AddDocument(doc); } int delID = iter * 201; for (int j = 0; j < 20; j++) { writer.DeleteDocuments(new Term("id", System.Convert.ToString(delID))); delID += 5; } // Force a bunch of merge threads to kick off so we // stress out aborting them on close: writer.SetMergeFactor(3); writer.AddDocument(doc); writer.Flush(); writer.Close(false); IndexReader reader = IndexReader.Open(directory); Assert.AreEqual((1 + iter) * 182, reader.NumDocs()); reader.Close(); // Reopen writer = new IndexWriter(directory, autoCommit, ANALYZER, false); } writer.Close(); } directory.Close(); }
public virtual void TestEnablingNorms() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetMaxBufferedDocs(10); // Enable norms for only 1 doc, pre flush for (int j = 0; j < 10; j++) { Document doc = new Document(); Field f = new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED); if (j != 8) { f.SetOmitNorms(true); } doc.Add(f); writer.AddDocument(doc); } writer.Close(); Term searchTerm = new Term("field", "aaa"); IndexSearcher searcher = new IndexSearcher(dir); Hits hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(10, hits.Length()); searcher.Close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetMaxBufferedDocs(10); // Enable norms for only 1 doc, post flush for (int j = 0; j < 27; j++) { Document doc = new Document(); Field f = new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED); if (j != 26) { f.SetOmitNorms(true); } doc.Add(f); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(dir); hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(27, hits.Length()); searcher.Close(); IndexReader reader = IndexReader.Open(dir); reader.Close(); dir.Close(); }
public virtual void TestTermVectorCorruption2() { Directory dir = new MockRAMDirectory(); for (int iter = 0; iter < 4; iter++) { bool autoCommit = 1 == iter / 2; IndexWriter writer = new IndexWriter(dir, autoCommit, new StandardAnalyzer()); writer.SetMaxBufferedDocs(2); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); writer.SetMergeScheduler(new SerialMergeScheduler()); writer.SetMergePolicy(new LogDocMergePolicy()); Document document = new Document(); Field storedField = new Field("stored", "stored", Field.Store.YES, Field.Index.NO); document.Add(storedField); writer.AddDocument(document); writer.AddDocument(document); document = new Document(); document.Add(storedField); Field termVectorField = new Field("termVector", "termVector", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(termVectorField); writer.AddDocument(document); writer.Optimize(); writer.Close(); IndexReader reader = IndexReader.Open(dir); Assert.IsTrue(reader.GetTermFreqVectors(0) == null); Assert.IsTrue(reader.GetTermFreqVectors(1) == null); Assert.IsTrue(reader.GetTermFreqVectors(2) != null); reader.Close(); } dir.Close(); }
public void TestDeletesNumDocs() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "a b c", Field.Store.NO, Field.Index.ANALYZED)); Field id = new Field("id", "", Field.Store.NO, Field.Index.NOT_ANALYZED); doc.Add(id); id.SetValue("0"); w.AddDocument(doc); id.SetValue("1"); w.AddDocument(doc); IndexReader r = w.GetReader(); Assert.AreEqual(2, r.NumDocs()); r.Close(); w.DeleteDocuments(new Term("id", "0")); r = w.GetReader(); Assert.AreEqual(1, r.NumDocs()); r.Close(); w.DeleteDocuments(new Term("id", "1")); r = w.GetReader(); Assert.AreEqual(0, r.NumDocs()); r.Close(); w.Close(); dir.Close(); }
public virtual void TestBasic() { Directory dir = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.MergeFactor = 2; writer.SetMaxBufferedDocs(2); writer.SetSimilarity(new SimpleSimilarity()); System.Text.StringBuilder sb = new System.Text.StringBuilder(265); System.String term = "term"; for (int i = 0; i < 30; i++) { Document d = new Document(); sb.Append(term).Append(" "); System.String content = sb.ToString(); Field noTf = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED); noTf.OmitTermFreqAndPositions = true; d.Add(noTf); Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED); d.Add(tf); writer.AddDocument(d); //System.out.println(d); } writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(dir); /* * Verify the index */ Searcher searcher = new IndexSearcher(dir, true); searcher.Similarity = new SimpleSimilarity(); Term a = new Term("noTf", term); Term b = new Term("tf", term); Term c = new Term("noTf", "notf"); Term d2 = new Term("tf", "tf"); TermQuery q1 = new TermQuery(a); TermQuery q2 = new TermQuery(b); TermQuery q3 = new TermQuery(c); TermQuery q4 = new TermQuery(d2); searcher.Search(q1, new AnonymousClassCountingHitCollector(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q2, new AnonymousClassCountingHitCollector1(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q3, new AnonymousClassCountingHitCollector2(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q4, new AnonymousClassCountingHitCollector3(this)); //System.out.println(CountingHitCollector.getCount()); BooleanQuery bq = new BooleanQuery(); bq.Add(q1, Occur.MUST); bq.Add(q4, Occur.MUST); searcher.Search(bq, new AnonymousClassCountingHitCollector4(this)); Assert.IsTrue(15 == CountingHitCollector.GetCount()); searcher.Close(); dir.Close(); }
public virtual void TestNoPrxFile() { Directory ram = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT); IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(3); writer.MergeFactor = 2; writer.UseCompoundFile = false; Document d = new Document(); Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); f1.OmitTermFreqAndPositions = true; d.Add(f1); for (int i = 0; i < 30; i++) writer.AddDocument(d); writer.Commit(); AssertNoPrx(ram); // force merge writer.Optimize(); // flush writer.Close(); AssertNoPrx(ram); _TestUtil.CheckIndex(ram); ram.Close(); }
private void Create() { // NOTE: put seed in here to make failures // deterministic, but do not commit with a seed (to // better test): dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(17); Document doc = new Document(); Document doc2 = new Document(); Field id = new Field("id", "", Field.Store.YES, Field.Index.NO); doc.Add(id); doc2.Add(id); Field contents = new Field("contents", "", Field.Store.NO, Field.Index.ANALYZED); doc.Add(contents); doc2.Add(contents); Field byteField = new Field("byte", "", Field.Store.NO, Field.Index.NOT_ANALYZED); doc.Add(byteField); doc2.Add(byteField); Field shortField = new Field("short", "", Field.Store.NO, Field.Index.NOT_ANALYZED); doc.Add(shortField); doc2.Add(shortField); Field intField = new Field("int", "", Field.Store.NO, Field.Index.NOT_ANALYZED); doc.Add(intField); doc2.Add(intField); Field longField = new Field("long", "", Field.Store.NO, Field.Index.NOT_ANALYZED); doc.Add(longField); doc2.Add(longField); Field floatField = new Field("float", "", Field.Store.NO, Field.Index.NOT_ANALYZED); doc.Add(floatField); doc2.Add(floatField); Field doubleField = new Field("double", "", Field.Store.NO, Field.Index.NOT_ANALYZED); doc.Add(doubleField); doc2.Add(doubleField); // we use two diff string fields so our FieldCache usage // is less suspicious to cache inspection Field stringField = new Field("string", "", Field.Store.NO, Field.Index.NOT_ANALYZED); doc.Add(stringField); Field stringFieldIdx = new Field("stringIdx", "", Field.Store.NO, Field.Index.NOT_ANALYZED); doc.Add(stringFieldIdx); // doc2 doesn't have stringField or stringFieldIdx, so we get nulls for (int i = 0; i < NUM_DOCS; i++) { id.SetValue("" + i); if (i % 1000 == 0) { contents.SetValue("a b c z"); } else if (i % 100 == 0) { contents.SetValue("a b c y"); } else if (i % 10 == 0) { contents.SetValue("a b c x"); } else { contents.SetValue("a b c"); } byteField.SetValue("" + NextInt((sbyte) System.SByte.MinValue, (sbyte) System.SByte.MaxValue)); if (NextInt(10) == 3) { shortField.SetValue("" + System.Int16.MinValue); } else if (NextInt(10) == 7) { shortField.SetValue("" + System.Int16.MaxValue); } else { shortField.SetValue("" + NextInt(System.Int16.MinValue, System.Int16.MaxValue)); } if (NextInt(10) == 3) { intField.SetValue("" + System.Int32.MinValue); } else if (NextInt(10) == 7) { intField.SetValue("" + System.Int32.MaxValue); } else { intField.SetValue("" + this.r.Next()); } if (NextInt(10) == 3) { longField.SetValue("" + System.Int64.MinValue); } else if (NextInt(10) == 7) { longField.SetValue("" + System.Int64.MaxValue); } else { longField.SetValue("" + this.r.Next(System.Int32.MaxValue)); } floatField.SetValue("" + (float) this.r.NextDouble()); doubleField.SetValue("" + this.r.NextDouble()); if (i % 197 == 0) { writer.AddDocument(doc2); } else { System.String r = RandomString(NextInt(20)); stringField.SetValue(r); stringFieldIdx.SetValue(r); writer.AddDocument(doc); } } writer.Close(); searcherMultiSegment = new IndexSearcher(dir); searcherMultiSegment.SetDefaultFieldSortScoring(true, true); dir2 = new MockRAMDirectory(dir); writer = new IndexWriter(dir2, new StandardAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.Optimize(); writer.Close(); searcherSingleSegment = new IndexSearcher(dir2); searcherSingleSegment.SetDefaultFieldSortScoring(true, true); dir3 = new MockRAMDirectory(dir); writer = new IndexWriter(dir3, new StandardAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.Optimize(3); writer.Close(); searcherFewSegment = new IndexSearcher(dir3); searcherFewSegment.SetDefaultFieldSortScoring(true, true); }
private void AddDoc(System.String text, IndexWriter iw, float boost) { Document doc = new Document(); Field f = new Field("key", text, Field.Store.YES, Field.Index.ANALYZED); f.SetBoost(boost); doc.Add(f); iw.AddDocument(doc); }
public virtual void TestDeleteMerging() { RAMDirectory directory = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(directory, true, ANALYZER, true); ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); writer.SetMergeScheduler(cms); LogDocMergePolicy mp = new LogDocMergePolicy(writer); writer.SetMergePolicy(mp); // Force degenerate merging so we can get a mix of // merging of segments with and without deletes at the // start: mp.SetMinMergeDocs(1000); Document doc = new Document(); Field idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(idField); for (int i = 0; i < 10; i++) { for (int j = 0; j < 100; j++) { idField.SetValue(System.Convert.ToString(i * 100 + j)); writer.AddDocument(doc); } int delID = i; while (delID < 100 * (1 + i)) { writer.DeleteDocuments(new Term("id", "" + delID)); delID += 10; } writer.Flush(); } writer.Close(); IndexReader reader = IndexReader.Open(directory); // Verify that we did not lose any deletes... Assert.AreEqual(450, reader.NumDocs()); reader.Close(); directory.Close(); }
public virtual void TestSubclassConcurrentMergeScheduler() { MockRAMDirectory dir = new MockRAMDirectory(); dir.FailOn(new FailOnlyOnMerge()); Document doc = new Document(); Field idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(idField); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); MyMergeScheduler ms = new MyMergeScheduler(this); writer.SetMergeScheduler(ms); writer.SetMaxBufferedDocs(2); writer.SetRAMBufferSizeMB(Lucene.Net.Index.IndexWriter.DISABLE_AUTO_FLUSH); for (int i = 0; i < 20; i++) writer.AddDocument(doc); ms.Sync(); writer.Close(); Assert.IsTrue(mergeThreadCreated); Assert.IsTrue(mergeCalled); Assert.IsTrue(excCalled); dir.Close(); Assert.IsTrue(ConcurrentMergeScheduler.AnyUnhandledExceptions()); }
public virtual void TestOptimizeOverMerge() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, false, new StandardAnalyzer()); writer.SetMaxBufferedDocs(2); writer.SetMergeFactor(100); writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); Document document = new Document(); document = new Document(); Field storedField = new Field("stored", "stored", Field.Store.YES, Field.Index.NO); document.Add(storedField); Field termVectorField = new Field("termVector", "termVector", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(termVectorField); for (int i = 0; i < 170; i++) writer.AddDocument(document); writer.Close(); MyIndexWriter myWriter = new MyIndexWriter(this, dir); myWriter.Optimize(); Assert.AreEqual(10, myWriter.mergeCount); }
// Tests whether the DocumentWriter correctly enable the // omitTermFreqAndPositions bit in the FieldInfo public virtual void TestOmitTermFreqAndPositions() { Directory ram = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT); IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document d = new Document(); // this field will have Tf Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); d.Add(f1); // this field will NOT have Tf Field f2 = new Field("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED); f2.OmitTermFreqAndPositions = true; d.Add(f2); writer.AddDocument(d); writer.Optimize(); // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger // keep things constant d = new Document(); // Reverese f1.OmitTermFreqAndPositions = true; d.Add(f1); f2.OmitTermFreqAndPositions = false; d.Add(f2); writer.AddDocument(d); // force merge writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(ram); SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram); FieldInfos fi = reader.FieldInfos(); Assert.IsTrue(fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set."); Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set."); reader.Close(); ram.Close(); }
// create the next document private Document NewDoc() { Document d = new Document(); float boost = NextNorm(); for (int i = 0; i < 10; i++) { Field f = new Field("f" + i, "v" + i, Field.Store.NO, Field.Index.NOT_ANALYZED); f.SetBoost(boost); d.Add(f); } return d; }
static DocHelper() { textField1 = Field.Text(TEXT_FIELD_1_KEY, FIELD_1_TEXT, false); textField2 = Field.Text(TEXT_FIELD_2_KEY, FIELD_2_TEXT, true); keyField = Field.Keyword(KEYWORD_FIELD_KEY, KEYWORD_TEXT); unIndField = Field.UnIndexed(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT); unStoredField1 = Field.UnStored(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT, false); unStoredField2 = Field.UnStored(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, true); { nameValues = new System.Collections.Hashtable(); nameValues[TEXT_FIELD_1_KEY] = FIELD_1_TEXT; nameValues[TEXT_FIELD_2_KEY] = FIELD_2_TEXT; nameValues[KEYWORD_FIELD_KEY] = KEYWORD_TEXT; nameValues[UNINDEXED_FIELD_KEY] = UNINDEXED_FIELD_TEXT; nameValues[UNSTORED_FIELD_1_KEY] = UNSTORED_1_FIELD_TEXT; nameValues[UNSTORED_FIELD_2_KEY] = UNSTORED_2_FIELD_TEXT; } }
public virtual void TestMultiSearcher() { //setup index 1 RAMDirectory ramDir1 = new RAMDirectory(); IndexWriter writer1 = new IndexWriter(ramDir1, new StandardAnalyzer(), true); Document d = new Document(); Field f = new Field(FIELD_NAME, "multiOne", Field.Store.YES, Field.Index.TOKENIZED); d.Add(f); writer1.AddDocument(d); writer1.Optimize(); writer1.Close(); IndexReader reader1 = IndexReader.Open(ramDir1); //setup index 2 RAMDirectory ramDir2 = new RAMDirectory(); IndexWriter writer2 = new IndexWriter(ramDir2, new StandardAnalyzer(), true); d = new Document(); f = new Field(FIELD_NAME, "multiTwo", Field.Store.YES, Field.Index.TOKENIZED); d.Add(f); writer2.AddDocument(d); writer2.Optimize(); writer2.Close(); IndexReader reader2 = IndexReader.Open(ramDir2); IndexSearcher[] searchers = new IndexSearcher[2]; searchers[0] = new IndexSearcher(ramDir1); searchers[1] = new IndexSearcher(ramDir2); MultiSearcher multiSearcher = new MultiSearcher(searchers); QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); parser.SetMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); query = parser.Parse("multi*"); System.Console.Out.WriteLine("Searching for: " + query.ToString(FIELD_NAME)); //at this point the multisearcher calls combine(query[]) hits = multiSearcher.Search(query); //query = QueryParser.parse("multi*", FIELD_NAME, new StandardAnalyzer()); Query[] expandedQueries = new Query[2]; expandedQueries[0] = query.Rewrite(reader1); expandedQueries[1] = query.Rewrite(reader2); query = query.Combine(expandedQueries); //create an instance of the highlighter with the tags used to surround highlighted text Highlighter highlighter = new Highlighter(this, new QueryScorer(query)); for (int i = 0; i < hits.Length(); i++) { System.String text = hits.Doc(i).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text)); System.String highlightedText = highlighter.GetBestFragment(tokenStream, text); System.Console.Out.WriteLine(highlightedText); } Assert.IsTrue(numHighlights == 2, "Failed to find correct number of highlights " + numHighlights + " found"); }
/// <seealso cref="IndexReader.GetIndexedFieldNames(Field.TermVector tvSpec)"> /// </seealso> /// <deprecated> Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)} /// </deprecated> public override System.Collections.ICollection GetIndexedFieldNames(Field.TermVector tvSpec) { bool storedTermVector; bool storePositionWithTermVector; bool storeOffsetWithTermVector; if (tvSpec == Field.TermVector.NO) { storedTermVector = false; storePositionWithTermVector = false; storeOffsetWithTermVector = false; } else if (tvSpec == Field.TermVector.YES) { storedTermVector = true; storePositionWithTermVector = false; storeOffsetWithTermVector = false; } else if (tvSpec == Field.TermVector.WITH_POSITIONS) { storedTermVector = true; storePositionWithTermVector = true; storeOffsetWithTermVector = false; } else if (tvSpec == Field.TermVector.WITH_OFFSETS) { storedTermVector = true; storePositionWithTermVector = false; storeOffsetWithTermVector = true; } else if (tvSpec == Field.TermVector.WITH_POSITIONS_OFFSETS) { storedTermVector = true; storePositionWithTermVector = true; storeOffsetWithTermVector = true; } else { throw new System.ArgumentException("unknown termVector parameter " + tvSpec); } // maintain a unique set of field names System.Collections.Hashtable fieldSet = new System.Collections.Hashtable(); for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.isIndexed && fi.storeTermVector == storedTermVector && fi.storePositionWithTermVector == storePositionWithTermVector && fi.storeOffsetWithTermVector == storeOffsetWithTermVector) { fieldSet.Add(fi.name, fi.name); } } return fieldSet; }
/*internal*/ public Document Doc(int n) { indexStream.Seek(n * 8L); long position = indexStream.ReadLong(); fieldsStream.Seek(position); Document doc = new Document(); int numFields = fieldsStream.ReadVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.ReadVInt(); FieldInfo fi = fieldInfos.FieldInfo(fieldNumber); byte bits = fieldsStream.ReadByte(); bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0) { byte[] b = new byte[fieldsStream.ReadVInt()]; fieldsStream.ReadBytes(b, 0, b.Length); if (compressed) doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS)); else doc.Add(new Field(fi.name, b, Field.Store.YES)); } else { Field.Index index; Field.Store store = Field.Store.YES; if (fi.isIndexed && tokenize) index = Field.Index.TOKENIZED; else if (fi.isIndexed && !tokenize) index = Field.Index.UN_TOKENIZED; else index = Field.Index.NO; Field.TermVector termVector = null; if (fi.storeTermVector) { if (fi.storeOffsetWithTermVector) { if (fi.storePositionWithTermVector) { termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; } else { termVector = Field.TermVector.WITH_OFFSETS; } } else if (fi.storePositionWithTermVector) { termVector = Field.TermVector.WITH_POSITIONS; } else { termVector = Field.TermVector.YES; } } else { termVector = Field.TermVector.NO; } if (compressed) { store = Field.Store.COMPRESS; byte[] b = new byte[fieldsStream.ReadVInt()]; fieldsStream.ReadBytes(b, 0, b.Length); Field f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector); f.SetOmitNorms(fi.omitNorms); doc.Add(f); } else { Field f = new Field(fi.name, fieldsStream.ReadString(), store, index, termVector); f.SetOmitNorms(fi.omitNorms); doc.Add(f); } } } return doc; }
private static Document CreateDocument(System.String text, long time) { Document document = new Document(); // Add the text field. Field textField = new Field(TEXT_FIELD, text, Field.Store.YES, Field.Index.ANALYZED); document.Add(textField); // Add the date/time field. System.String dateTimeString = DateTools.TimeToString(time, DateTools.Resolution.SECOND); Field dateTimeField = new Field(DATE_TIME_FIELD, dateTimeString, Field.Store.YES, Field.Index.NOT_ANALYZED); document.Add(dateTimeField); return document; }
public virtual void TestLUCENE_1590() { Document doc = new Document(); // f1 has no norms doc.Add(new Field("f1", "v1", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS)); doc.Add(new Field("f1", "v2", Field.Store.YES, Field.Index.NO)); // f2 has no TF Field f = new Field("f2", "v1", Field.Store.NO, Field.Index.ANALYZED); f.SetOmitTermFreqAndPositions(true); doc.Add(f); doc.Add(new Field("f2", "v2", Field.Store.YES, Field.Index.NO)); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(doc); writer.Optimize(); // be sure to have a single segment writer.Close(); _TestUtil.CheckIndex(dir); SegmentReader reader = SegmentReader.GetOnlySegmentReader(dir); FieldInfos fi = reader.FieldInfos(); // f1 Assert.IsFalse(reader.HasNorms("f1"), "f1 should have no norms"); Assert.IsFalse(fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "omitTermFreqAndPositions field bit should not be set for f1"); // f2 Assert.IsTrue(reader.HasNorms("f2"), "f2 should have norms"); Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "omitTermFreqAndPositions field bit should be set for f2"); }
public override System.Collections.ICollection GetIndexedFieldNames(Field.TermVector tvSpec) { return in_Renamed.GetIndexedFieldNames(tvSpec); }