public void TestRollbackIntegrityWithBufferFlush() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); for (int i = 0; i < 5; i++) { Document doc = new Document(); doc.Add(new Field("pk", i.ToString(), Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); w.AddDocument(doc); } w.Close(); // If buffer size is small enough to cause a flush, errors ensue... w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); w.SetMaxBufferedDocs(2); Term pkTerm = new Term("pk", ""); for (int i = 0; i < 3; i++) { Document doc = new Document(); String value = i.ToString(); doc.Add(new Field("pk", value, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); doc.Add(new Field("text", "foo", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); w.UpdateDocument(pkTerm.CreateTerm(value), doc); } w.Rollback(); IndexReader r = IndexReader.Open(dir, true); Assert.AreEqual(5, r.NumDocs(), "index should contain same number of docs post rollback"); r.Close(); dir.Close(); }
public void SetUp() { var writer = new IndexWriter(store, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); var doc = new Document(); doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "Tom", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "Jerry", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("zzz", "bar", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); }
public ActionResult Process() { GetCoursesMessage message = new GetCoursesMessage(); MvcContrib.Bus.Send(message); List<Course> courses = (message.Result.Data as List<Course>); if (courses == null) return RedirectToAction("Index"); Directory directory = FSDirectory.Open(new System.IO.DirectoryInfo(Server.MapPath("~/Data/Index"))); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_29); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); Document document; try { foreach (Course course in courses) { document = new Document(); document.Add(new Field("Type", "Course", Field.Store.YES, Field.Index.NO)); document.Add(new Field("ID", course.Id.ToString(), Field.Store.YES, Field.Index.NO)); document.Add(new Field("Name", course.Name, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); document.Add(new Field("Owner", course.Owner, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); writer.AddDocument(document); GetNodesMessage courseMessage = new GetNodesMessage { Input = new GetNodesInput { CourseId = course.Id } }; MvcContrib.Bus.Send(courseMessage); List<Node> nodes = (courseMessage.Result.Data as List<Node>); foreach (Node node in nodes) { ProcessNode(writer, node); } } } catch (Exception e) { writer.Optimize(); writer.Close(); throw e; } writer.Optimize(); writer.Close(); return RedirectToAction("Index"); }
//删除全部索引 public void delAllIndex() { if (System.IO.Directory.Exists(indexPath) == false) { System.IO.Directory.CreateDirectory(indexPath); } FSDirectory fsDirectory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); if (!IndexReader.IndexExists(fsDirectory)) { return; } else { if (IndexReader.IsLocked(fsDirectory)) { IndexReader.Unlock(fsDirectory); } } Lucene.Net.Index.IndexWriter iw = new Lucene.Net.Index.IndexWriter(indexPath, new PanGuAnalyzer(), false); // iw.DeleteDocuments(new Lucene.Net.Index.Term("Key", key)); iw.DeleteAll(); iw.Optimize();//删除文件后并非从磁盘中移除,而是生成一个.del的文件,需要调用Optimize方法来清除。在清除文件前可以使用UndeleteAll方法恢复 iw.Close(); }
public void HelloWorldTest() { Directory directory = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_29); IndexWriter writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.Add(new Field("id", "1", Field.Store.YES, Field.Index.NO)); doc.Add(new Field("postBody", "sample test", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Commit(); writer.Close(); QueryParser parser = new QueryParser(Version.LUCENE_29, "postBody", analyzer); Query query = parser.Parse("sample test"); //Setup searcher IndexSearcher searcher = new IndexSearcher(directory, true); //Do the search var hits = searcher.Search(query, null, 10); for (int i = 0; i < hits.TotalHits; i++) { var doc1 = hits.ScoreDocs[i]; } searcher.Close(); directory.Close(); }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); long theLong = System.Int64.MaxValue; double theDouble = System.Double.MaxValue; sbyte theByte = (sbyte)System.SByte.MaxValue; short theShort = System.Int16.MaxValue; int theInt = System.Int32.MaxValue; float theFloat = System.Single.MaxValue; for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); doc.Add(new Field("theLong", System.Convert.ToString(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theDouble", (theDouble--).ToString("E16"), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theByte", System.Convert.ToString((sbyte)theByte--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theShort", System.Convert.ToString(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theInt", System.Convert.ToString(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theFloat", (theFloat--).ToString("E8"), Field.Store.NO, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc, null); } writer.Close(); reader = IndexReader.Open((Directory)directory, true, null); }
internal virtual void BuildDir(Directory dir, int nDocs, int maxFields, int maxFieldLen) { IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); iw.SetMaxBufferedDocs(10); for (int j = 0; j < nDocs; j++) { Document d = new Document(); int nFields = r.Next(maxFields); for (int i = 0; i < nFields; i++) { int flen = r.Next(maxFieldLen); System.Text.StringBuilder sb = new System.Text.StringBuilder("^ "); while (sb.Length < flen) { sb.Append(' ').Append(words[r.Next(words.Length)]); } sb.Append(" $"); Field.Store store = Field.Store.YES; // make random later Field.Index index = Field.Index.ANALYZED; // make random later d.Add(new Field("f" + i, sb.ToString(), store, index)); } iw.AddDocument(d, null); } iw.Close(); }
static void Main(string[] args) { var indexWriter = new IndexWriter(Configuration.IndexDirectory, new StandardAnalyzer(PortugueseStopWords), true, IndexWriter.MaxFieldLength.UNLIMITED); foreach(var filepath in Directory.GetFiles(Configuration.FilesDirectory, "*.txt")) { var document = new Document(); var title = Path.GetFileNameWithoutExtension(filepath); var idField = new Field(Configuration.Fields.ID, filepath, Field.Store.YES, Field.Index.NO); document.Add(idField); var titleField = new Field(Configuration.Fields.Title, title, Field.Store.COMPRESS, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(titleField); var textField = new Field(Configuration.Fields.Text, new StreamReader(filepath).ReadToEnd(), Field.Store.COMPRESS, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.Add(textField); indexWriter.AddDocument(document); } indexWriter.Close(); }
void Index() { Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(dir, new Lucene.Net.Analysis.WhitespaceAnalyzer(), Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED); Lucene.Net.Documents.Document doc = null; Lucene.Net.Documents.Field f = null; doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b c d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b a d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b e f", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "x y z", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); wr.Close(); }
public virtual void TestNPESpanQuery() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet <string>()), IndexWriter.MaxFieldLength.LIMITED, null); // Add documents AddDoc(writer, "1", "the big dogs went running to the market"); AddDoc(writer, "2", "the cat chased the mouse, then the cat ate the mouse quickly"); // Commit writer.Close(); // Get searcher IndexReader reader = IndexReader.Open(dir, true, null); IndexSearcher searcher = new IndexSearcher(reader); // Control (make sure docs indexed) Assert.AreEqual(2, HitCount(searcher, "the")); Assert.AreEqual(1, HitCount(searcher, "cat")); Assert.AreEqual(1, HitCount(searcher, "dogs")); Assert.AreEqual(0, HitCount(searcher, "rabbit")); // This throws exception (it shouldn't) Assert.AreEqual(1, searcher.Search(CreateSpan(0, true, new SpanQuery[] { CreateSpan(4, false, "chased", "cat"), CreateSpan("ate") }), 10, null).TotalHits); reader.Close(); dir.Close(); }
private void button1_Click(object sender, EventArgs e) { Directory index = new RAMDirectory(); StandardAnalyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); IndexWriter w = new IndexWriter(index, analyzer); addDoc(w, "Lucene in Action"); addDoc(w, "Lucene for Dummies"); addDoc(w, "Managing Gigabytes"); addDoc(w, "The Art of Computer Science"); w.Close(); String querystr = "Lucene in Action"; Query q = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "title", analyzer).Parse(querystr); //q.Parse(); int hitsPerPage = 10; IndexReader reader = IndexReader.Open(index,true); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.Create(hitsPerPage, true); searcher.Search(q, collector); ScoreDoc[] hits = collector.TopDocs().ScoreDocs; System.Console.WriteLine("Found {0} Hits", hits.Length); foreach (var item in hits) { int docId = item.Doc; Document d = searcher.Doc(docId); System.Console.WriteLine(d.Get("title") + " " + item.Score); } }
public virtual void TestLucene() { int num = 100; Directory indexA = new MockRAMDirectory(); Directory indexB = new MockRAMDirectory(); FillIndex(indexA, 0, num); bool fail = VerifyIndex(indexA, 0); if (fail) { Assert.Fail("Index a is invalid"); } FillIndex(indexB, num, num); fail = VerifyIndex(indexB, num); if (fail) { Assert.Fail("Index b is invalid"); } Directory merged = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(merged, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMergeFactor(2); writer.AddIndexes(new Directory[]{indexA, indexB}); writer.Close(); fail = VerifyIndex(merged, 0); merged.Close(); Assert.IsFalse(fail, "The merged index is invalid"); }
public void testMissingTerms() { String fieldName = "field1"; Directory rd = new RAMDirectory(); var w = new IndexWriter(rd, new KeywordAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); for (int i = 0; i < 100; i++) { var doc = new Document(); int term = i*10; //terms are units of 10; doc.Add(new Field(fieldName, "" + term, Field.Store.YES, Field.Index.ANALYZED)); w.AddDocument(doc); } IndexReader reader = w.GetReader(); w.Close(); TermsFilter tf = new TermsFilter(); tf.AddTerm(new Term(fieldName, "19")); FixedBitSet bits = (FixedBitSet) tf.GetDocIdSet(reader); Assert.AreEqual(0, bits.Cardinality(), "Must match nothing"); tf.AddTerm(new Term(fieldName, "20")); bits = (FixedBitSet) tf.GetDocIdSet(reader); Assert.AreEqual(1, bits.Cardinality(), "Must match 1"); tf.AddTerm(new Term(fieldName, "10")); bits = (FixedBitSet) tf.GetDocIdSet(reader); Assert.AreEqual(2, bits.Cardinality(), "Must match 2"); tf.AddTerm(new Term(fieldName, "00")); bits = (FixedBitSet) tf.GetDocIdSet(reader); Assert.AreEqual(2, bits.Cardinality(), "Must match 2"); reader.Close(); rd.Close(); }
public void IndexFile(string filePath) { PropertyDescriptors descriptors = new PropertyDescriptors(); descriptors.LoadData(System.Windows.Forms.Application.StartupPath + "\\PropertyDescriptors.xml"); Analyzer a = new Lucene.Net.Analysis.Standard.StandardAnalyzer(); bool create = !(System.IO.Directory.Exists(_idxDir) && IndexReader.IndexExists(_idxDir)); IndexWriter iw = new IndexWriter(_idxDir, a, create); iw.SetUseCompoundFile(true); AdDataStream adStream = new AdDataStream(filePath); adStream.LoadData(); foreach (Advert ad in adStream.FetchAd()) { Document doc = new Document(); foreach (string s in ad.GetDictionary().Keys) { string temp = descriptors.GetIndexableFormat(descriptors[s], ad[s]); doc.Add(Field.Text(s, temp)); } iw.AddDocument(doc); if (_updateCallback != null) { _updateCallback("Added Document: " + ad["Title"]); } } iw.Optimize(); iw.Close(); }
public override void SetUp() { base.SetUp(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); //writer.setUseCompoundFile(true); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); Field.TermVector termVector; int mod3 = i % 3; int mod2 = i % 2; if (mod2 == 0 && mod3 == 0) { termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; } else if (mod2 == 0) { termVector = Field.TermVector.WITH_POSITIONS; } else if (mod3 == 0) { termVector = Field.TermVector.WITH_OFFSETS; } else { termVector = Field.TermVector.YES; } doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED, termVector)); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory, true); }
private Directory MakeIndex() { Directory dir = new RAMDirectory(); try { System.Random r = NewRandom(); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(false); for (int d = 1; d <= NUM_DOCS; d++) { Document doc = new Document(); for (int f = 1; f <= NUM_FIELDS; f++) { doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.ANALYZED)); } writer.AddDocument(doc); } writer.Close(); } catch (System.Exception e) { throw new System.SystemException("", e); } return dir; }
public virtual void TestPhrasePrefixWithBooleanQuery() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet <string>()), true, IndexWriter.MaxFieldLength.LIMITED, null); Add("This is a test", "object", writer); Add("a note", "note", writer); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore, true, null); // This query will be equivalent to +type:note +body:"a t*" BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("type", "note")), Occur.MUST); MultiPhraseQuery trouble = new MultiPhraseQuery(); trouble.Add(new Term("body", "a")); trouble.Add(new Term[] { new Term("body", "test"), new Term("body", "this") }); q.Add(trouble, Occur.MUST); // exception will be thrown here without fix for #35626: ScoreDoc[] hits = searcher.Search(q, null, 1000, null).ScoreDocs; Assert.AreEqual(0, hits.Length, "Wrong number of hits"); searcher.Close(); }
public void CreateIndex(List<ISearchEntity> CreateEntities) { Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(ConfigElement.IndexDirectory, analyzer, true); //第三个参数:是否重新创建索引,True 一律清空 重新建立 False 原有基础上增量添加索引 foreach (ISearchEntity IndexEntity in CreateEntities) { ProductModel product = (ProductModel)IndexEntity; Document doc = new Document(); doc.Add(new Field("productid", Convert.ToString(product.EntityIdentity), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("productname", Convert.ToString(product.ProductName), Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("cateid", Convert.ToString(product.CategoryID), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("catepath", Convert.ToString(product.CategoryPath), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("keywords", Convert.ToString(product.Keywords), Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("description", Convert.ToString(product.Description), Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("price", Convert.ToString(product.Price), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("createtime", Convert.ToString(product.CreateTime), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("updatetime", Convert.ToString(product.UpdateTime), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("mainimage", Convert.ToString(product.ProductImage), Field.Store.YES, Field.Index.UN_TOKENIZED)); writer.AddDocument(doc); Console.WriteLine("created index for {0}:{1}", product.EntityIdentity, product.ProductName); } writer.Optimize(); writer.Close(); }
public virtual void TestRAMDirectorySize() { Directory dir = FSDirectory.Open(indexDir); MockRAMDirectory ramDir = new MockRAMDirectory(dir); dir.Close(); IndexWriter writer = new IndexWriter(ramDir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED, null); writer.Optimize(null); Assert.AreEqual(ramDir.SizeInBytes(), ramDir.GetRecomputedSizeInBytes()); ThreadClass[] threads = new ThreadClass[numThreads]; for (int i = 0; i < numThreads; i++) { int num = i; threads[i] = new AnonymousClassThread(num, writer, ramDir, this); } for (int i = 0; i < numThreads; i++) { threads[i].Start(); } for (int i = 0; i < numThreads; i++) { threads[i].Join(); } writer.Optimize(null); Assert.AreEqual(ramDir.SizeInBytes(), ramDir.GetRecomputedSizeInBytes()); writer.Close(); }
private void CreateIndex() { string sresult = ""; try { //读取数据库数据 SqlDataReader myred = ExecuteQuery(); //建立索引字段 //Lucene.Net.Analysis.Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT); StockFooAnalyzer analyzer = new StockFooAnalyzer(System.Configuration.ConfigurationManager.AppSettings["AnalyzerPath"].ToString()); FSDirectory dy = FSDirectory.Open(new DirectoryInfo(Server.MapPath("IndexDirectory"))); IndexWriter writer = new IndexWriter(dy, analyzer, true,IndexWriter.MaxFieldLength.LIMITED); while (myred.Read()) { AddDocument(writer, myred["title"].ToString(), myred["url"].ToString(), myred["site"].ToString(), myred["body"].ToString(), myred["publish_time"].ToString()); } myred.Close(); myred.Dispose(); writer.Optimize(); writer.Close(); sresult = "ok"; } catch(Exception ex) { sresult = ex.Message; } Response.Write(sresult); Response.Flush(); Response.End(); }
public virtual void TestFarsi() { /* build an index */ RAMDirectory farsiIndex = new RAMDirectory(); IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED, null); Document doc = new Document(); doc.Add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc, null); writer.Optimize(null); writer.Close(); IndexReader reader = IndexReader.Open((Directory)farsiIndex, true, null); IndexSearcher search = new IndexSearcher(reader); Query q = new TermQuery(new Term("body", "body")); // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi // characters properly. System.Globalization.CompareInfo collator = new System.Globalization.CultureInfo("ar").CompareInfo; // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a TermRangeFilter with a Farsi // Collator (or an Arabic one for the case when Farsi is not supported). int numHits = search.Search(q, new TermRangeFilter("content", "\u062F", "\u0698", T, T, collator), 1000, null).TotalHits; Assert.AreEqual(0, numHits, "The index Term should not be included."); numHits = search.Search(q, new TermRangeFilter("content", "\u0633", "\u0638", T, T, collator), 1000, null).TotalHits; Assert.AreEqual(1, numHits, "The index Term should be included."); search.Close(); }
private static RAMDirectory MakeEmptyIndex(int numDeletedDocs) { RAMDirectory d = new RAMDirectory(); IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(), true, MaxFieldLength.LIMITED, null); for (int i = 0; i < numDeletedDocs; i++) { w.AddDocument(new Document(), null); } w.Commit(null); w.DeleteDocuments(null, new MatchAllDocsQuery()); w.Commit(null); if (0 < numDeletedDocs) { Assert.IsTrue(w.HasDeletions(null), "writer has no deletions"); } Assert.AreEqual(numDeletedDocs, w.MaxDoc(), "writer is missing some deleted docs"); Assert.AreEqual(0, w.NumDocs(null), "writer has non-deleted docs"); w.Close(); IndexReader r = IndexReader.Open((Directory)d, true, null); Assert.AreEqual(numDeletedDocs, r.NumDeletedDocs, "reader has wrong number of deleted docs"); r.Close(); return(d); }
private void CreateIndex2() { IndexWriter iw = null; iw = new IndexWriter("D:\\lucene", anay, true); DataTable dt = SqlHelper2.QueryTable("select a_id, b_name,u_nickname,a_title,a_content,b_id from v_article"); foreach (DataRow dr in dt.Rows) { Document doc = new Document(); string title = dr["a_title"].ToString(); string content = dr["a_content"].ToString(); string nickname = dr["u_nickname"].ToString(); string bname = dr["b_name"].ToString(); string bid = dr["b_id"].ToString(); string aid = dr["a_id"].ToString(); if (aid == "5938") { doc.SetBoost(100); } doc.Add(Field.Keyword("title", title)); doc.Add(Field.Keyword("content", content)); doc.Add(Field.Keyword("nick", nickname)); doc.Add(Field.Text("bname", bname)); doc.Add(Field.Keyword("bid", bid)); doc.Add(Field.Keyword("aid", aid)); iw.AddDocument(doc); } iw.Optimize(); iw.Close(); Response.Write("<script>alert('建立索引完成!');</script>"); }
public virtual void TestDanish() { /* build an index */ RAMDirectory danishIndex = new RAMDirectory(); IndexWriter writer = new IndexWriter(danishIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED, null); // Danish collation orders the words below in the given order // (example taken from TestSort.testInternationalSort() ). System.String[] words = new System.String[] { "H\u00D8T", "H\u00C5T", "MAND" }; for (int docnum = 0; docnum < words.Length; ++docnum) { Document doc = new Document(); doc.Add(new Field("content", words[docnum], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc, null); } writer.Optimize(null); writer.Close(); IndexReader reader = IndexReader.Open((Directory)danishIndex, true, null); IndexSearcher search = new IndexSearcher(reader); System.Globalization.CompareInfo c = new System.Globalization.CultureInfo("da" + "-" + "dk").CompareInfo; // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ], // but Danish collation does. ScoreDoc[] result = search.Search(Csrq("content", "H\u00D8T", "MAND", F, F, c), null, 1000, null).ScoreDocs; AssertEquals("The index Term should be included.", 1, result.Length); result = search.Search(Csrq("content", "H\u00C5T", "MAND", F, F, c), null, 1000, null).ScoreDocs; AssertEquals("The index Term should not be included.", 0, result.Length); search.Close(); }
private static Directory MakeIndex() { Directory dir = new RAMDirectory(); try { System.Random r = new System.Random((System.Int32) (BASE_SEED + 42)); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true); writer.SetUseCompoundFile(false); for (int d = 1; d <= NUM_DOCS; d++) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); for (int f = 1; f <= NUM_FIELDS; f++) { doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.TOKENIZED)); } writer.AddDocument(doc); } writer.Close(); } catch (System.Exception e) { throw new System.SystemException("", e); } return dir; }
public virtual void TestCachingWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new KeywordAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.Close(); IndexReader reader = IndexReader.Open(dir, true); MockFilter filter = new MockFilter(); CachingWrapperFilter cacher = new CachingWrapperFilter(filter); // first time, nested filter is called cacher.GetDocIdSet(reader); Assert.IsTrue( filter.WasCalled(),"first time"); // make sure no exception if cache is holding the wrong docIdSet cacher.GetDocIdSet(reader); // second time, nested filter should not be called filter.Clear(); cacher.GetDocIdSet(reader); Assert.IsFalse(filter.WasCalled(),"second time" ); reader.Close(); }
public virtual void TestLucene() { int num = 100; Directory indexA = new MockRAMDirectory(); Directory indexB = new MockRAMDirectory(); FillIndex(indexA, 0, num); Assert.IsFalse(VerifyIndex(indexA, 0), "Index a is invalid"); FillIndex(indexB, num, num); Assert.IsFalse(VerifyIndex(indexB, num), "Index b is invalid"); Directory merged = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(merged, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); writer.MergeFactor = 2; writer.AddIndexesNoOptimize(new []{indexA, indexB}); writer.Optimize(); writer.Close(); var fail = VerifyIndex(merged, 0); merged.Close(); Assert.IsFalse(fail, "The merged index is invalid"); }
//Rolls back index to a chosen ID private void RollBackLast(int id) { // System.out.println("Attempting to rollback to "+id); System.String ids = "-" + id; IndexCommit last = null; IList<IndexCommit> commits = IndexReader.ListCommits(dir); for (System.Collections.IEnumerator iterator = commits.GetEnumerator(); iterator.MoveNext(); ) { IndexCommit commit = (IndexCommit) iterator.Current; System.Collections.Generic.IDictionary<string, string> ud = commit.GetUserData(); if (ud.Count > 0) if (((System.String) ud["index"]).EndsWith(ids)) last = commit; } if (last == null) throw new System.SystemException("Couldn't find commit point " + id); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), new RollbackDeletionPolicy(this, id), MaxFieldLength.UNLIMITED, last); System.Collections.Generic.IDictionary<string, string> data = new System.Collections.Generic.Dictionary<string, string>(); data["index"] = "Rolled back to 1-" + id; w.Commit(data); w.Close(); }
public virtual void TestCompressionTools() { IFieldable binaryFldCompressed = new Field("binaryCompressed", CompressionTools.Compress(System.Text.UTF8Encoding.UTF8.GetBytes(binaryValCompressed)), Field.Store.YES); IFieldable stringFldCompressed = new Field("stringCompressed", CompressionTools.CompressString(binaryValCompressed), Field.Store.YES); Document doc = new Document(); doc.Add(binaryFldCompressed); doc.Add(stringFldCompressed); /* add the doc to a ram index */ MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED, null); writer.AddDocument(doc, null); writer.Close(); /* open a reader and fetch the document */ IndexReader reader = IndexReader.Open((Directory)dir, false, null); Document docFromReader = reader.Document(0, null); Assert.IsTrue(docFromReader != null); /* fetch the binary compressed field and compare it's content with the original one */ System.String binaryFldCompressedTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(CompressionTools.Decompress(docFromReader.GetBinaryValue("binaryCompressed", null)))); Assert.IsTrue(binaryFldCompressedTest.Equals(binaryValCompressed)); Assert.IsTrue(CompressionTools.DecompressString(docFromReader.GetBinaryValue("stringCompressed", null)).Equals(binaryValCompressed)); reader.Close(); dir.Close(); }
public void SetUp() { IndexWriter writer = new IndexWriter(store, new WhitespaceAnalyzer(), true); Document doc; doc = new Document(); doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "Tom", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "Jerry", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("zzz", "bar", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); }
public void CreateIndex(List<ISearchEntity> CreateEntities) { Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(ConfigElement.IndexDirectory, analyzer, true); //第三个参数:是否重新创建索引,True 一律清空 重新建立 False 原有基础上增量添加索引 foreach (ISearchEntity IndexEntity in CreateEntities) { NewsModel news = (NewsModel)IndexEntity; Document doc = new Document(); doc.Add(new Field("newsid", Convert.ToString(news.EntityIdentity), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("title", Convert.ToString(news.Title), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("content", Convert.ToString(news.Content), Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("keywords", Convert.ToString(news.Keywords), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("catepath", Convert.ToString(news.CategoryPath), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("createtime", Convert.ToString(news.CreateTime), Field.Store.YES, Field.Index.UN_TOKENIZED)); writer.AddDocument(doc); Console.WriteLine("created index for {0}:{1}", news.EntityIdentity, news.Title); } writer.Optimize(); writer.Close(); }
public override void SetUp() { base.SetUp(); System.String tempDir = System.IO.Path.GetTempPath(); if (tempDir == null) { throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test"); } indexDir = new System.IO.DirectoryInfo(Path.Combine(tempDir, "RAMDirIndex")); Directory dir = FSDirectory.Open(indexDir); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); // add some documents Document doc = null; for (int i = 0; i < docsToAdd; i++) { doc = new Document(); doc.Add(new Field("content", English.IntToEnglish(i).Trim(), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc, null); } Assert.AreEqual(docsToAdd, writer.MaxDoc()); writer.Close(); dir.Close(); }
public void MrsJones() { var dir = new RAMDirectory(); var analyzer = new LowerCaseKeywordAnalyzer(); var writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); var document = new Lucene.Net.Documents.Document(); document.Add(new Field("Name", "MRS. SHABA", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS)); writer.AddDocument(document); writer.Close(true); var searcher = new IndexSearcher(dir, true); var termEnum = searcher.GetIndexReader().Terms(); while (termEnum.Next()) { var buffer = termEnum.Term().Text(); Console.WriteLine(buffer); } var queryParser = new RangeQueryParser(Version.LUCENE_29, "", analyzer); var query = queryParser.Parse("Name:\"MRS. S*\""); Console.WriteLine(query); var result = searcher.Search(query, 10); Assert.NotEqual(0,result.TotalHits); }
public virtual void TestBooleanQueryContainingSingleTermPrefixQuery() { // this tests against bug 33161 (now fixed) // In order to cause the bug, the outer query must have more than one term // and all terms required. // The contained PhraseMultiQuery must contain exactly one term array. RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); Add("blueberry pie", writer); Add("blueberry chewing gum", writer); Add("blue raspberry pie", writer); writer.Optimize(null); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore, true, null); // This query will be equivalent to +body:pie +body:"blue*" BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("body", "pie")), Occur.MUST); MultiPhraseQuery trouble = new MultiPhraseQuery(); trouble.Add(new Term[] { new Term("body", "blueberry"), new Term("body", "blue") }); q.Add(trouble, Occur.MUST); // exception will be thrown here without fix ScoreDoc[] hits = searcher.Search(q, null, 1000, null).ScoreDocs; Assert.AreEqual(2, hits.Length, "Wrong number of hits"); searcher.Close(); }
public override void SetUp() { base.SetUp(); System.String[] data = new System.String[] { "A 1 2 3 4 5 6", "Z 4 5 6", null, "B 2 4 5 6", "Y 3 5 6", null, "C 3 6", "X 4 5 6" }; index = new RAMDirectory(); IndexWriter writer = new IndexWriter(index, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); for (int i = 0; i < data.Length; i++) { Document doc = new Document(); doc.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id",String.valueOf(i))); doc.Add(new Field("all", "all", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("all","all")); if (null != data[i]) { doc.Add(new Field("data", data[i], Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("data",data[i])); } writer.AddDocument(doc, null); } writer.Optimize(null); writer.Close(); r = IndexReader.Open(index, true, null); s = new IndexSearcher(r); //System.out.println("Set up " + getName()); }
public void TestSpanRegex() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); // doc.Add(new Field("field", "the quick brown fox jumps over the lazy dog", // Field.Store.NO, Field.Index.ANALYZED)); // writer.AddDocument(doc); // doc = new Document(); doc.Add(new Field("field", "auto update", Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("field", "first auto update", Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory, true); SpanRegexQuery srq = new SpanRegexQuery(new Term("field", "aut.*")); SpanFirstQuery sfq = new SpanFirstQuery(srq, 1); // SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {srq, stq}, 6, // true); int numHits = searcher.Search(sfq, null, 1000).TotalHits; Assert.AreEqual(1, numHits); }
public virtual void TestSorting() { Directory directory = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(2); writer.SetMergeFactor(1000); writer.AddDocument(Adoc(new System.String[]{"id", "a", "title", "ipod", "str_s", "a"})); writer.AddDocument(Adoc(new System.String[]{"id", "b", "title", "ipod ipod", "str_s", "b"})); writer.AddDocument(Adoc(new System.String[]{"id", "c", "title", "ipod ipod ipod", "str_s", "c"})); writer.AddDocument(Adoc(new System.String[]{"id", "x", "title", "boosted", "str_s", "x"})); writer.AddDocument(Adoc(new System.String[]{"id", "y", "title", "boosted boosted", "str_s", "y"})); writer.AddDocument(Adoc(new System.String[]{"id", "z", "title", "boosted boosted boosted", "str_s", "z"})); IndexReader r = writer.GetReader(); writer.Close(); IndexSearcher searcher = new IndexSearcher(r); RunTest(searcher, true); RunTest(searcher, false); searcher.Close(); r.Close(); directory.Close(); }
public void Initialize(String directoryProviderName, IDictionary<string, string> properties, ISearchFactoryImplementor searchFactory) { DirectoryInfo indexDir = DirectoryProviderHelper.DetermineIndexDir(directoryProviderName, (IDictionary) properties); try { bool create = !IndexReader.IndexExists(indexDir.FullName); indexName = indexDir.FullName; directory = FSDirectory.GetDirectory(indexName, create); if (create) { IndexWriter iw = new IndexWriter(directory, new StandardAnalyzer(), create, new KeepOnlyLastCommitDeletionPolicy(), IndexWriter.MaxFieldLength.UNLIMITED); iw.Close(); } //searchFactory.RegisterDirectoryProviderForLocks(this); } catch (IOException e) { throw new HibernateException("Unable to initialize index: " + directoryProviderName, e); } }
public virtual void TestSimpleSkip() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new PayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Term term = new Term("test", "a"); for (int i = 0; i < 5000; i++) { Document d1 = new Document(); d1.Add(new Field(term.Field(), term.Text(), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(d1); } writer.Flush(); writer.Optimize(); writer.Close(); IndexReader reader = SegmentReader.GetOnlySegmentReader(dir); SegmentTermPositions tp = (SegmentTermPositions) reader.TermPositions(); tp.freqStream_ForNUnit = new CountingStream(this, tp.freqStream_ForNUnit); for (int i = 0; i < 2; i++) { counter = 0; tp.Seek(term); CheckSkipTo(tp, 14, 185); // no skips CheckSkipTo(tp, 17, 190); // one skip on level 0 CheckSkipTo(tp, 287, 200); // one skip on level 1, two on level 0 // this test would fail if we had only one skip level, // because than more bytes would be read from the freqStream CheckSkipTo(tp, 4800, 250); // one skip on level 2 } }
private void btnInitAnalyzer_Click(object sender, EventArgs e) { using (new BusyObject(this)) { if (analyzer == null) { string hspellPath = SelectProjectFolder("Select a path to HSpell data files", "hspell-data-files" + System.IO.Path.DirectorySeparatorChar); if (hspellPath == null) return; MorphAnalyzer a = new MorphAnalyzer(hspellPath); if (!a.IsInitialized) { MessageBox.Show("Error while trying to create a morphological analyzer object; please check the existance of the required data files and try again"); return; } analyzer = a; } // Recreate the index IndexWriter writer = new IndexWriter(FSDirectory.Open(tempPath), new Lucene.Net.Analysis.SimpleAnalyzer(), true, new IndexWriter.MaxFieldLength(10)); writer.Close(); } btnIndexAddFolder.Enabled = true; btnRunAutoTests.Enabled = true; btnExecuteSearch.Enabled = true; }
public virtual void TestTermEnum() { IndexWriter writer = null; writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); // add 100 documents with term : aaa // add 100 documents with terms: aaa bbb // Therefore, term 'aaa' has document frequency of 200 and term 'bbb' 100 for (int i = 0; i < 100; i++) { AddDoc(writer, "aaa"); AddDoc(writer, "aaa bbb"); } writer.Close(); // verify document frequency of terms in an unoptimized index VerifyDocFreq(); // merge segments by optimizing the index writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); writer.Optimize(); writer.Close(); // verify document frequency of terms in an optimized index VerifyDocFreq(); }
public override void SetUp() { base.SetUp(); Document doc; RAMDirectory rd1 = new RAMDirectory(); IndexWriter iw1 = new IndexWriter(rd1, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); doc = new Document(); doc.Add(new Field("field1", "the quick brown fox jumps", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("field2", "the quick brown fox jumps", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("field4", "", Field.Store.NO, Field.Index.ANALYZED)); iw1.AddDocument(doc); iw1.Close(); RAMDirectory rd2 = new RAMDirectory(); IndexWriter iw2 = new IndexWriter(rd2, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); doc = new Document(); doc.Add(new Field("field0", "", Field.Store.NO, Field.Index.ANALYZED)); doc.Add(new Field("field1", "the fox jumps over the lazy dog", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("field3", "the fox jumps over the lazy dog", Field.Store.YES, Field.Index.ANALYZED)); iw2.AddDocument(doc); iw2.Close(); this.ir1 = IndexReader.Open(rd1, true); this.ir2 = IndexReader.Open(rd2, true); }
public void MissingTerms_Test() { string fieldName = "field1"; RAMDirectory rd = new RAMDirectory(); IndexWriter w = new IndexWriter(rd, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); for (int i = 0; i < 100; i++) { Document doc = new Document(); int term = i * 10; //terms are units of 10; doc.Add(new Field(fieldName, "" + term, Field.Store.YES, Field.Index.NOT_ANALYZED)); w.AddDocument(doc); } w.Close(); IndexReader reader = IndexReader.Open(rd, true); TermsFilter tf = new TermsFilter(); tf.AddTerm(new Term(fieldName, "19")); OpenBitSet bits = (OpenBitSet)tf.GetDocIdSet(reader); Assert.AreEqual(0, bits.Cardinality(), "Must match nothing"); tf.AddTerm(new Term(fieldName, "20")); bits = (OpenBitSet)tf.GetDocIdSet(reader); Assert.AreEqual(1, bits.Cardinality(), "Must match 1"); tf.AddTerm(new Term(fieldName, "10")); bits = (OpenBitSet)tf.GetDocIdSet(reader); Assert.AreEqual(2, bits.Cardinality(), "Must match 2"); tf.AddTerm(new Term(fieldName, "00")); bits = (OpenBitSet)tf.GetDocIdSet(reader); Assert.AreEqual(2, bits.Cardinality(), "Must match 2"); }
public virtual void TestMultiValueSource() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); Field f = new Field("field", "", Field.Store.NO, Field.Index.NOT_ANALYZED); doc.Add(f); for (int i = 0; i < 17; i++) { f.SetValue("" + i); w.AddDocument(doc); w.Commit(); } IndexReader r = w.GetReader(); w.Close(); Assert.IsTrue(r.GetSequentialSubReaders().Length > 1); ValueSource s1 = new IntFieldSource("field"); DocValues v1 = s1.GetValues(r); DocValues v2 = new MultiValueSource(s1).GetValues(r); for (int i = 0; i < r.MaxDoc(); i++) { Assert.AreEqual(v1.IntVal(i), i); Assert.AreEqual(v2.IntVal(i), i); } Lucene.Net.Search.FieldCache_Fields.DEFAULT.PurgeAllCaches(); r.Close(); dir.Close(); }
void CloseIndex() { if (writer != null) { writer.Close(); writer = null; } }
public virtual void TestPhrasePrefix() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); Document doc1 = new Document(); Document doc2 = new Document(); Document doc3 = new Document(); Document doc4 = new Document(); Document doc5 = new Document(); doc1.Add(new Field("body", "blueberry pie", Field.Store.YES, Field.Index.ANALYZED)); doc2.Add(new Field("body", "blueberry strudel", Field.Store.YES, Field.Index.ANALYZED)); doc3.Add(new Field("body", "blueberry pizza", Field.Store.YES, Field.Index.ANALYZED)); doc4.Add(new Field("body", "blueberry chewing gum", Field.Store.YES, Field.Index.ANALYZED)); doc5.Add(new Field("body", "piccadilly circus", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc1, null); writer.AddDocument(doc2, null); writer.AddDocument(doc3, null); writer.AddDocument(doc4, null); writer.AddDocument(doc5, null); writer.Optimize(null); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore, true, null); //PhrasePrefixQuery query1 = new PhrasePrefixQuery(); MultiPhraseQuery query1 = new MultiPhraseQuery(); //PhrasePrefixQuery query2 = new PhrasePrefixQuery(); MultiPhraseQuery query2 = new MultiPhraseQuery(); query1.Add(new Term("body", "blueberry")); query2.Add(new Term("body", "strawberry")); System.Collections.ArrayList termsWithPrefix = new System.Collections.ArrayList(); IndexReader ir = IndexReader.Open((Directory)indexStore, true, null); // this TermEnum gives "piccadilly", "pie" and "pizza". System.String prefix = "pi"; TermEnum te = ir.Terms(new Term("body", prefix + "*"), null); do { if (te.Term.Text.StartsWith(prefix)) { termsWithPrefix.Add(te.Term); } }while (te.Next(null)); query1.Add((Term[])termsWithPrefix.ToArray(typeof(Term))); query2.Add((Term[])termsWithPrefix.ToArray(typeof(Term))); ScoreDoc[] result; result = searcher.Search(query1, null, 1000, null).ScoreDocs; Assert.AreEqual(2, result.Length); result = searcher.Search(query2, null, 1000, null).ScoreDocs; Assert.AreEqual(0, result.Length); }
public override void SetUp() { base.SetUp(); index = new RAMDirectory(); IndexWriter writer = new IndexWriter(index, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); writer.SetSimilarity(sim); // hed is the most important field, dek is secondary // d1 is an "ok" match for: albino elephant { Document d1 = new Document(); d1.Add(new Field("id", "d1", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id", "d1")); d1.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant")); d1.Add(new Field("dek", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("dek", "elephant")); writer.AddDocument(d1, null); } // d2 is a "good" match for: albino elephant { Document d2 = new Document(); d2.Add(new Field("id", "d2", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id", "d2")); d2.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant")); d2.Add(new Field("dek", "albino", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("dek", "albino")); d2.Add(new Field("dek", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("dek", "elephant")); writer.AddDocument(d2, null); } // d3 is a "better" match for: albino elephant { Document d3 = new Document(); d3.Add(new Field("id", "d3", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id", "d3")); d3.Add(new Field("hed", "albino", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "albino")); d3.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant")); writer.AddDocument(d3, null); } // d4 is the "best" match for: albino elephant { Document d4 = new Document(); d4.Add(new Field("id", "d4", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id", "d4")); d4.Add(new Field("hed", "albino", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "albino")); d4.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant")); d4.Add(new Field("dek", "albino", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("dek", "albino")); writer.AddDocument(d4, null); } writer.Close(); r = IndexReader.Open(index, true, null); s = new IndexSearcher(r); s.Similarity = sim; }
public void Test_Index_ReusableStringReader() { Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(new Lucene.Net.Store.RAMDirectory(), new TestAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); Lucene.Net.Documents.Field f1 = new Lucene.Net.Documents.Field("f1", TEST_STRING, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f1); wr.AddDocument(doc); wr.Close(); }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); for (int i = 0; i < docFields.Length; i++) { Document document = new Document(); document.Add(new Field(field, docFields[i], Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(document, null); } writer.Close(); searcher = new IndexSearcher(directory, true, null); // Make big index dir2 = new MockRAMDirectory(directory); // First multiply small test index: mulFactor = 1; int docCount = 0; do { Directory copy = new RAMDirectory(dir2, null); IndexWriter indexWriter = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED, null); indexWriter.AddIndexesNoOptimize(null, new[] { copy }); docCount = indexWriter.MaxDoc(); indexWriter.Close(); mulFactor *= 2; } while (docCount < 3000); IndexWriter w = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED, null); Document doc = new Document(); doc.Add(new Field("field2", "xxx", Field.Store.NO, Field.Index.ANALYZED)); for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++) { w.AddDocument(doc, null); } doc = new Document(); doc.Add(new Field("field2", "big bad bug", Field.Store.NO, Field.Index.ANALYZED)); for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++) { w.AddDocument(doc, null); } // optimize to 1 segment w.Optimize(null); reader = w.GetReader(null); w.Close(); bigSearcher = new IndexSearcher(reader); }
public virtual void TestSetBufferSize() { System.IO.DirectoryInfo indexDir = new System.IO.DirectoryInfo(System.IO.Path.Combine(AppSettings.Get("tempDir", ""), "testSetBufferSize")); MockFSDirectory dir = new MockFSDirectory(indexDir, NewRandom()); try { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); writer.UseCompoundFile = false; for (int i = 0; i < 37; i++) { Document doc = new Document(); doc.Add(new Field("content", "aaa bbb ccc ddd" + i, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("id", "" + i, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); } writer.Close(); dir.allIndexInputs.Clear(); IndexReader reader = IndexReader.Open((Directory)dir, false, null); Term aaa = new Term("content", "aaa"); Term bbb = new Term("content", "bbb"); Term ccc = new Term("content", "ccc"); Assert.AreEqual(37, reader.DocFreq(ccc, null)); reader.DeleteDocument(0, null); Assert.AreEqual(37, reader.DocFreq(aaa, null)); dir.tweakBufferSizes(); reader.DeleteDocument(4, null); Assert.AreEqual(reader.DocFreq(bbb, null), 37); dir.tweakBufferSizes(); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] hits = searcher.Search(new TermQuery(bbb), null, 1000, null).ScoreDocs; dir.tweakBufferSizes(); Assert.AreEqual(35, hits.Length); dir.tweakBufferSizes(); hits = searcher.Search(new TermQuery(new Term("id", "33")), null, 1000, null).ScoreDocs; dir.tweakBufferSizes(); Assert.AreEqual(1, hits.Length); hits = searcher.Search(new TermQuery(aaa), null, 1000, null).ScoreDocs; dir.tweakBufferSizes(); Assert.AreEqual(35, hits.Length); searcher.Close(); reader.Close(); } finally { _TestUtil.RmDir(indexDir); } }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); for (int i = 0; i < docFields.Length; i++) { Document doc = new Document(); doc.Add(new Field(field, docFields[i], Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); } writer.Close(); searcher = new IndexSearcher(directory, true, null); }
public override void SetUp() { base.SetUp(); // create test index IndexWriter writer = new IndexWriter(mDirectory, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.LIMITED, null); addDocument(writer, "A", "Should we, could we, would we?"); addDocument(writer, "B", "it should. Should it?"); addDocument(writer, "C", "it shouldn't."); addDocument(writer, "D", "Should we, should we, should we."); writer.Close(); // re-open the searcher since we added more docs searcher2 = new IndexSearcher(mDirectory, true, null); }
public override void SetUp() { base.SetUp(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); //writer.setUseCompoundFile(false); //writer.infoStream = System.out; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); IFieldable fld = new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.YES); doc.Add(fld); writer.AddDocument(doc, null); } writer.Close(); }
public override void SetUp() { base.SetUp(); directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); Document doc = new Document(); doc.Add(new Field("partnum", "Q36", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("description", "Illidium Space Modulator", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); writer.Close(); searcher = new IndexSearcher(directory, true, null); }
// test using a sparse index (with deleted docs). The DocIdSet should be not cacheable, as it uses TermDocs if the range contains 0 public void TestSparseIndex() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED, null); for (int d = -20; d <= 20; d++) { Document doc = new Document(); doc.Add(new Field("id", d.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("body", "body", Field.Store.NO, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc, null); } writer.Optimize(null); writer.DeleteDocuments(null, new Term("id", "0")); writer.Close(); IndexReader reader = IndexReader.Open((Directory)dir, true, null); IndexSearcher Search = new IndexSearcher(reader); Assert.True(reader.HasDeletions); ScoreDoc[] result; Query q = new TermQuery(new Term("body", "body")); FieldCacheRangeFilter <sbyte?> fcrf; result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, 20, T, T), 100, null).ScoreDocs; Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0], null).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(40, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", 0, 20, T, T), 100, null).ScoreDocs; Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0], null).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(20, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, 0, T, T), 100, null).ScoreDocs; Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0], null).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(20, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", 10, 20, T, T), 100, null).ScoreDocs; Assert.True(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0], null).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(11, result.Length, "find all"); result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, -10, T, T), 100, null).ScoreDocs; Assert.True(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0], null).IsCacheable, "DocIdSet must be not cacheable"); Assert.AreEqual(11, result.Length, "find all"); }
public virtual void TestReadOnlyCloneAfterOptimize() { Directory dir1 = new MockRAMDirectory(); TestIndexReaderReopen.CreateIndex(dir1, true); IndexReader reader1 = IndexReader.Open(dir1, false, null); IndexWriter w = new IndexWriter(dir1, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.LIMITED, null); w.Optimize(null); w.Close(); IndexReader reader2 = reader1.Clone(true, null); Assert.IsTrue(IsReadOnly(reader2)); reader1.Close(); reader2.Close(); dir1.Close(); }
public virtual void TestBinaryFieldInIndex() { IFieldable binaryFldStored = new Field("binaryStored", System.Text.UTF8Encoding.UTF8.GetBytes(binaryValStored), Field.Store.YES); IFieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO); // binary fields with store off are not allowed Assert.Throws <ArgumentException>( () => new Field("fail", System.Text.Encoding.UTF8.GetBytes(binaryValStored), Field.Store.NO)); Document doc = new Document(); doc.Add(binaryFldStored); doc.Add(stringFldStored); /* test for field count */ Assert.AreEqual(2, doc.fields_ForNUnit.Count); /* add the doc to a ram index */ MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED, null); writer.AddDocument(doc, null); writer.Close(); /* open a reader and fetch the document */ IndexReader reader = IndexReader.Open((Directory)dir, false, null); Document docFromReader = reader.Document(0, null); Assert.IsTrue(docFromReader != null); /* fetch the binary stored field and compare it's content with the original one */ System.String binaryFldStoredTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(docFromReader.GetBinaryValue("binaryStored", null))); Assert.IsTrue(binaryFldStoredTest.Equals(binaryValStored)); /* fetch the string field and compare it's content with the original one */ System.String stringFldStoredTest = docFromReader.Get("stringStored", null); Assert.IsTrue(stringFldStoredTest.Equals(binaryValStored)); /* delete the document from index */ reader.DeleteDocument(0, null); Assert.AreEqual(0, reader.NumDocs()); reader.Close(); dir.Close(); }
static TestNumericRangeQuery32() { { try { // set the theoretical maximum term count for 8bit (see docs for the number) BooleanQuery.MaxClauseCount = 3 * 255 * 2 + 255; directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED, null); NumericField field8 = new NumericField("field8", 8, Field.Store.YES, true), field4 = new NumericField("field4", 4, Field.Store.YES, true), field2 = new NumericField("field2", 2, Field.Store.YES, true), fieldNoTrie = new NumericField("field" + System.Int32.MaxValue, System.Int32.MaxValue, Field.Store.YES, true), ascfield8 = new NumericField("ascfield8", 8, Field.Store.NO, true), ascfield4 = new NumericField("ascfield4", 4, Field.Store.NO, true), ascfield2 = new NumericField("ascfield2", 2, Field.Store.NO, true); Document doc = new Document(); // add fields, that have a distance to test general functionality doc.Add(field8); doc.Add(field4); doc.Add(field2); doc.Add(fieldNoTrie); // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive doc.Add(ascfield8); doc.Add(ascfield4); doc.Add(ascfield2); // Add a series of noDocs docs with increasing int values for (int l = 0; l < noDocs; l++) { int val = distance * l + startOffset; field8.SetIntValue(val); field4.SetIntValue(val); field2.SetIntValue(val); fieldNoTrie.SetIntValue(val); val = l - (noDocs / 2); ascfield8.SetIntValue(val); ascfield4.SetIntValue(val); ascfield2.SetIntValue(val); writer.AddDocument(doc, null); } writer.Optimize(null); writer.Close(); searcher = new IndexSearcher(directory, true, null); } catch (System.Exception e) { throw new System.SystemException("", e); } } }
public virtual void TestHangOnClose() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMergePolicy(new LogByteSizeMergePolicy(writer)); writer.SetMaxBufferedDocs(5); writer.SetUseCompoundFile(false); writer.SetMergeFactor(100); Document doc = new Document(); doc.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); for (int i = 0; i < 60; i++) { writer.AddDocument(doc); } writer.SetMaxBufferedDocs(200); Document doc2 = new Document(); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); for (int i = 0; i < 10; i++) { writer.AddDocument(doc2); } writer.Close(); Directory dir2 = new MockRAMDirectory(); writer = new IndexWriter(dir2, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(writer); lmp.SetMinMergeMB(0.0001); writer.SetMergePolicy(lmp); writer.SetMergeFactor(4); writer.SetUseCompoundFile(false); writer.SetMergeScheduler(new SerialMergeScheduler()); writer.AddIndexesNoOptimize(new Directory[] { dir }); writer.Close(); dir.Close(); dir2.Close(); }
public virtual void TestFlushExceptions() { MockRAMDirectory directory = new MockRAMDirectory(); FailOnlyOnFlush failure = new FailOnlyOnFlush(); directory.FailOn(failure); IndexWriter writer = new IndexWriter(directory, true, ANALYZER, true); ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); writer.SetMergeScheduler(cms); writer.SetMaxBufferedDocs(2); Document doc = new Document(); Field idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(idField); for (int i = 0; i < 10; i++) { for (int j = 0; j < 20; j++) { idField.SetValue(System.Convert.ToString(i * 20 + j)); writer.AddDocument(doc); } writer.AddDocument(doc); failure.SetDoFail(); try { writer.Flush(); Assert.Fail("failed to hit IOException"); } catch (System.IO.IOException ioe) { failure.ClearDoFail(); } } writer.Close(); IndexReader reader = IndexReader.Open(directory); Assert.AreEqual(200, reader.NumDocs()); reader.Close(); directory.Close(); }