// Creates index based on selection of analyzer public void CreateIndex(string indexPath, string name) { luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath); IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH); if (name == "WhitespaceAnalyzer") { analyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer(); } if (name == "SimpleAnalyzer") { analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); } if (name == "StandardAnalyzer") { analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION); } if (name == "StopAnalyzer") { analyzer = new Lucene.Net.Analysis.StopAnalyzer(VERSION); } else { writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl); } writer.SetSimilarity(customSimilarity); }
protected override void CreateIndex(string indexPath) { luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath); IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH); writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl); writer.SetSimilarity(similarity); }
/// <summary> /// Creates the index at a given path /// </summary> /// <param name="indexPath">The pathname to create the index</param> public void CreateIndex(string indexPath) { luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath + "/IndexStoredPosition"); IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH); writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl); writer.SetSimilarity(mySimilarity); }
/// <summary> /// Creates the index at a given path /// </summary> /// <param name="indexPath">The pathname to create the index</param> public void CreateIndex(string indexPath) { luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath); IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH); writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl); writer.SetSimilarity(new NewSimilarity()); // for similarity measure }
const int MaxMergeDocs = 7999; // Except never merge segments that have more docs than this public static IndexWriter CreateIndexWriter(Lucene.Net.Store.Directory directory, bool create) { IndexWriter indexWriter = new IndexWriter(directory, new PackageAnalyzer(), create, IndexWriter.MaxFieldLength.UNLIMITED); indexWriter.MergeFactor = MergeFactor; indexWriter.MaxMergeDocs = MaxMergeDocs; indexWriter.SetSimilarity(new CustomSimilarity()); return indexWriter; }
private void CreateIndex(Directory dir) { IndexWriter iw = new IndexWriter(dir, anlzr, true, IndexWriter.MaxFieldLength.LIMITED); iw.SetMaxBufferedDocs(5); iw.SetMergeFactor(3); iw.SetSimilarity(similarityOne); iw.SetUseCompoundFile(true); iw.Close(); }
}//contructor which is used to initialize the objects //create index public void CreateIndex(string indexPath) { luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath); analyzerstandard = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION); analyzerkeyword = new Lucene.Net.Analysis.KeywordAnalyzer(); IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH); analysor = new PerFieldAnalyzerWrapper(analyzerstandard); writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analysor, true, mfl); writer.SetSimilarity(customSimilarity);//for task 6 }
public override void SetUp() { base.SetUp(); index = new RAMDirectory(); IndexWriter writer = new IndexWriter(index, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); writer.SetSimilarity(sim); // hed is the most important field, dek is secondary // d1 is an "ok" match for: albino elephant { Document d1 = new Document(); d1.Add(new Field("id", "d1", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id", "d1")); d1.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant")); d1.Add(new Field("dek", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("dek", "elephant")); writer.AddDocument(d1, null); } // d2 is a "good" match for: albino elephant { Document d2 = new Document(); d2.Add(new Field("id", "d2", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id", "d2")); d2.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant")); d2.Add(new Field("dek", "albino", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("dek", "albino")); d2.Add(new Field("dek", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("dek", "elephant")); writer.AddDocument(d2, null); } // d3 is a "better" match for: albino elephant { Document d3 = new Document(); d3.Add(new Field("id", "d3", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id", "d3")); d3.Add(new Field("hed", "albino", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "albino")); d3.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant")); writer.AddDocument(d3, null); } // d4 is the "best" match for: albino elephant { Document d4 = new Document(); d4.Add(new Field("id", "d4", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id", "d4")); d4.Add(new Field("hed", "albino", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "albino")); d4.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant")); d4.Add(new Field("dek", "albino", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("dek", "albino")); writer.AddDocument(d4, null); } writer.Close(); r = IndexReader.Open(index, true, null); s = new IndexSearcher(r); s.Similarity = sim; }
/// <summary> /// Creates the index at indexPath /// </summary> /// <param name="indexPath">Directory path to create the index</param> public void CreateIndex(string indexPath) { luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath); spellCheckIndexStorage = Lucene.Net.Store.FSDirectory.Open(indexPath + @"\spell"); autoCompleteIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath + @"\autocomplete"); IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH); writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl); //changes to Lucene score writer.SetSimilarity(newSimilarity); }
/// <summary> Writes the document to the directory using the analyzer /// and the similarity score; returns the SegmentInfo /// describing the new segment /// </summary> /// <param name="dir"> /// </param> /// <param name="analyzer"> /// </param> /// <param name="similarity"> /// </param> /// <param name="doc"> /// </param> /// <throws> IOException </throws> public static SegmentInfo WriteDoc(Directory dir, Analyzer analyzer, Similarity similarity, Document doc) { IndexWriter writer = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED); writer.SetSimilarity(similarity); //writer.setUseCompoundFile(false); writer.AddDocument(doc); writer.Flush(); SegmentInfo info = writer.NewestSegment(); writer.Close(); return(info); }
private void AddDocs(Directory dir, int ndocs, bool compound) { IndexWriter iw = new IndexWriter(dir, anlzr, false, IndexWriter.MaxFieldLength.LIMITED); iw.SetMaxBufferedDocs(5); iw.SetMergeFactor(3); iw.SetSimilarity(similarityOne); iw.SetUseCompoundFile(compound); for (int i = 0; i < ndocs; i++) { iw.AddDocument(NewDoc()); } iw.Close(); }
public void CreateIndex(string collectionPath, string indexPath) { HashSet <string> stopWordsSet = new HashSet <string>(STOP_WORDS); this.indexPath = indexPath; this.collectionPath = collectionPath; luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(this.indexPath); analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(VERSION, "English", stopWordsSet); shingleAnalyzer = new Lucene.Net.Analysis.Shingle.ShingleAnalyzerWrapper(analyzer, MAX_SHINGLE_SIZE); IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH); writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, shingleAnalyzer, true, mfl); writer.SetSimilarity(newSimilarity); IndexCollection(); }
public virtual void TestBasic() { Directory dir = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMergeFactor(2); writer.SetMaxBufferedDocs(2); writer.SetSimilarity(new SimpleSimilarity()); System.Text.StringBuilder sb = new System.Text.StringBuilder(265); System.String term = "term"; for (int i = 0; i < 30; i++) { Document d = new Document(); sb.Append(term).Append(" "); System.String content = sb.ToString(); Field noTf = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED); noTf.SetOmitTermFreqAndPositions(true); d.Add(noTf); Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED); d.Add(tf); writer.AddDocument(d); //System.out.println(d); } writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(dir); /* * Verify the index */ Searcher searcher = new IndexSearcher(dir); searcher.SetSimilarity(new SimpleSimilarity()); Term a = new Term("noTf", term); Term b = new Term("tf", term); Term c = new Term("noTf", "notf"); Term d2 = new Term("tf", "tf"); TermQuery q1 = new TermQuery(a); TermQuery q2 = new TermQuery(b); TermQuery q3 = new TermQuery(c); TermQuery q4 = new TermQuery(d2); searcher.Search(q1, new AnonymousClassCountingHitCollector(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q2, new AnonymousClassCountingHitCollector1(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q3, new AnonymousClassCountingHitCollector2(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q4, new AnonymousClassCountingHitCollector3(this)); //System.out.println(CountingHitCollector.getCount()); BooleanQuery bq = new BooleanQuery(); bq.Add(q1, Occur.MUST); bq.Add(q4, Occur.MUST); searcher.Search(bq, new AnonymousClassCountingHitCollector4(this)); Assert.IsTrue(15 == CountingHitCollector.GetCount()); searcher.Close(); dir.Close(); }
public void CreateWriter() // Creates index writer { IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH); writer = new IndexWriter(luceneIndexDirectory, analyzer, true, mfl); writer.SetSimilarity(newSimilarity); }
public virtual void TestBasic() { Directory dir = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.MergeFactor = 2; writer.SetMaxBufferedDocs(2); writer.SetSimilarity(new SimpleSimilarity()); System.Text.StringBuilder sb = new System.Text.StringBuilder(265); System.String term = "term"; for (int i = 0; i < 30; i++) { Document d = new Document(); sb.Append(term).Append(" "); System.String content = sb.ToString(); Field noTf = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED); noTf.OmitTermFreqAndPositions = true; d.Add(noTf); Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED); d.Add(tf); writer.AddDocument(d); //System.out.println(d); } writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(dir); /* * Verify the index */ Searcher searcher = new IndexSearcher(dir, true); searcher.Similarity = new SimpleSimilarity(); Term a = new Term("noTf", term); Term b = new Term("tf", term); Term c = new Term("noTf", "notf"); Term d2 = new Term("tf", "tf"); TermQuery q1 = new TermQuery(a); TermQuery q2 = new TermQuery(b); TermQuery q3 = new TermQuery(c); TermQuery q4 = new TermQuery(d2); searcher.Search(q1, new AnonymousClassCountingHitCollector(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q2, new AnonymousClassCountingHitCollector1(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q3, new AnonymousClassCountingHitCollector2(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q4, new AnonymousClassCountingHitCollector3(this)); //System.out.println(CountingHitCollector.getCount()); BooleanQuery bq = new BooleanQuery(); bq.Add(q1, Occur.MUST); bq.Add(q4, Occur.MUST); searcher.Search(bq, new AnonymousClassCountingHitCollector4(this)); Assert.IsTrue(15 == CountingHitCollector.GetCount()); searcher.Close(); dir.Close(); }
/// <summary> Writes the document to the directory using the analyzer /// and the similarity score; returns the SegmentInfo /// describing the new segment /// </summary> /// <param name="dir"> /// </param> /// <param name="analyzer"> /// </param> /// <param name="similarity"> /// </param> /// <param name="doc"> /// </param> /// <throws> IOException </throws> public static SegmentInfo WriteDoc(Directory dir, Analyzer analyzer, Similarity similarity, Document doc) { IndexWriter writer = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED); writer.SetSimilarity(similarity); //writer.setUseCompoundFile(false); writer.AddDocument(doc); writer.Flush(); SegmentInfo info = writer.NewestSegment(); writer.Close(); return info; }
internal static IndexWriter CreateIndexWriter(Lucene.Net.Store.Directory directory) { bool create = !IndexReader.IndexExists(directory); directory.EnsureOpen(); if (!create) { if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } } IndexWriter indexWriter = new IndexWriter(directory, new PackageAnalyzer(), create, IndexWriter.MaxFieldLength.UNLIMITED); indexWriter.MergeFactor = MergeFactor; indexWriter.MaxMergeDocs = MaxMergeDocs; indexWriter.SetSimilarity(new CustomSimilarity()); return indexWriter; }
private static IndexWriter CreateIndexWriter(Lucene.Net.Store.Directory directory) { var create = !IndexReader.IndexExists(directory); directory.EnsureOpen(); if (!create) { if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } } var indexWriter = new IndexWriter(directory, new PackageAnalyzer(), create, IndexWriter.MaxFieldLength.UNLIMITED); NuGetMergePolicyApplyer.ApplyTo(indexWriter); indexWriter.SetSimilarity(new CustomSimilarity()); return indexWriter; }