public string Search(string strQuery) { string result = string.Empty; Lucene.Net.Index.IndexReader reader = Lucene.Net.Index.IndexReader.Open(Server.MapPath(System.Configuration.ConfigurationManager.AppSettings["IndexingArticle"])); Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("ArticleDetail", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); Lucene.Net.Search.Query query = parser.Parse(strQuery); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(reader); Lucene.Net.Search.Hits hits = searcher.Search(query); Lucene.Net.Highlight.QueryScorer score = new Lucene.Net.Highlight.QueryScorer(query); Lucene.Net.Highlight.SimpleHTMLFormatter formater = new Lucene.Net.Highlight.SimpleHTMLFormatter("<span class='Highlight'>", "</span>"); Lucene.Net.Highlight.Highlighter highlighter = new Lucene.Net.Highlight.Highlighter(formater, score); result += "<div align='right' style='background-color:#F0F7F9; padding-right:15px' height='30px'><font style='FONT-WEIGHT: bold; FONT-SIZE: 10pt; COLOR: #005482; FONT-FAMILY: arial'>Kết quả tìm thấy : " + hits.Length() + " </font></div>"; result += "<div style='padding: 10px 10px 10px 10px;'>"; for (int i = 0; i < hits.Length(); i++) { string id = hits.Doc(i).Get("ArticleId"); string title = hits.Doc(i).Get("ArticleTitle"); string detail = hits.Doc(i).Get("ArticleDetail"); Lucene.Net.Analysis.TokenStream ts = (new Lucene.Net.Analysis.Standard.StandardAnalyzer()).TokenStream("ArticleDetail", new System.IO.StringReader(detail)); result += string.Format("<div align='left'><font style='FONT-WEIGHT: bold; FONT-SIZE: 10pt; COLOR: #5b5b5b; FONT-FAMILY: arial'><a href='/?ArticleId={0}'>{1}</a></font>", id, title); result += string.Format("<div align='left'><font style='FONT-SIZE: 9pt' face='Arial' color='#005482'>...{0}...</font></div></div></br>", highlighter.GetBestFragment(ts, detail)); } result += "</div>"; reader.Close(); return(result); }
/// <summary> Note that the underlying IndexReader is not closed, if /// IndexSearcher was constructed with IndexSearcher(IndexReader r). /// If the IndexReader was supplied implicitly by specifying a directory, then /// the IndexReader gets closed. /// </summary> public override void Close() { if (closeReader) { reader.Close(); } }
public static void Main(System.String[] args) { try { Directory directory = FSDirectory.GetDirectory("demo index", false); IndexReader reader = IndexReader.Open(directory); // Term term = new Term("path", "pizza"); // int deleted = reader.delete(term); // System.out.println("deleted " + deleted + // " documents containing " + term); for (int i = 0; i < reader.MaxDoc(); i++) { reader.Delete(i); } reader.Close(); directory.Close(); } catch (System.Exception e) { System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
public virtual void TestVerifyIndex() { IndexReader reader = IndexReader.Open(mDirectory); Assert.AreEqual(8, reader.NumDocs()); reader.Close(); }
public static void Main(System.String[] args) { System.String usage = typeof(DeleteFiles) + " <unique_term>"; if (args.Length == 0) { System.Console.Error.WriteLine("Usage: " + usage); System.Environment.Exit(1); } try { Directory directory = FSDirectory.Open(new System.IO.FileInfo("index")); IndexReader reader = IndexReader.Open(directory, false); // we don't want read-only because we are about to delete Term term = new Term("path", args[0]); int deleted = reader.DeleteDocuments(term); System.Console.Out.WriteLine("deleted " + deleted + " documents containing " + term); // one can also delete documents by their internal id: /* * for (int i = 0; i < reader.maxDoc(); i++) { * System.out.println("Deleting document with id " + i); * reader.delete(i); * }*/ reader.Close(); directory.Close(); } catch (System.Exception e) { System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
public override void TearDown() { base.TearDown(); readerA.Close(); readerB.Close(); readerX.Close(); }
/* Walk directory hierarchy in uid order, while keeping uid iterator from * /* existing index in sync. Mismatches indicate one of: (a) old documents to * /* be deleted; (b) unchanged documents, to be left alone; or (c) new * /* documents, to be indexed. */ private static void IndexDocs(System.IO.FileInfo file, System.IO.FileInfo index, bool create) { if (!create) { // incrementally update reader = IndexReader.Open(FSDirectory.Open(index), false); // open existing index uidIter = reader.Terms(new Term("uid", "")); // init uid iterator IndexDocs(file); if (deleting) { // delete rest of stale docs while (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid") { System.Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term().Text())); reader.DeleteDocuments(uidIter.Term()); uidIter.Next(); } deleting = false; } uidIter.Close(); // close uid iterator reader.Close(); // close existing index } // don't have exisiting else { IndexDocs(file); } }
public virtual void TestNPESpanQuery() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(new System.Collections.Hashtable(0)), IndexWriter.MaxFieldLength.LIMITED); // Add documents AddDoc(writer, "1", "the big dogs went running to the market"); AddDoc(writer, "2", "the cat chased the mouse, then the cat ate the mouse quickly"); // Commit writer.Close(); // Get searcher IndexReader reader = IndexReader.Open(dir); IndexSearcher searcher = new IndexSearcher(reader); // Control (make sure docs indexed) Assert.AreEqual(2, HitCount(searcher, "the")); Assert.AreEqual(1, HitCount(searcher, "cat")); Assert.AreEqual(1, HitCount(searcher, "dogs")); Assert.AreEqual(0, HitCount(searcher, "rabbit")); // This throws exception (it shouldn't) Assert.AreEqual(1, searcher.Search(CreateSpan(0, true, new SpanQuery[] { CreateSpan(4, false, "chased", "cat"), CreateSpan("ate") }), 10).TotalHits); reader.Close(); dir.Close(); }
public virtual void TestCachingWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.Close(); IndexReader reader = IndexReader.Open(dir); MockFilter filter = new MockFilter(); CachingWrapperFilter cacher = new CachingWrapperFilter(filter); // first time, nested filter is called cacher.GetDocIdSet(reader); Assert.IsTrue(filter.WasCalled(), "first time"); // make sure no exception if cache is holding the wrong bitset cacher.Bits(reader); cacher.GetDocIdSet(reader); // second time, nested filter should not be called filter.Clear(); cacher.GetDocIdSet(reader); Assert.IsFalse(filter.WasCalled(), "second time"); reader.Close(); }
public virtual void TestCompressionTools() { Fieldable binaryFldCompressed = new Field("binaryCompressed", CompressionTools.Compress(System.Text.UTF8Encoding.UTF8.GetBytes(binaryValCompressed)), Field.Store.YES); Fieldable stringFldCompressed = new Field("stringCompressed", CompressionTools.CompressString(binaryValCompressed), Field.Store.YES); Document doc = new Document(); doc.Add(binaryFldCompressed); doc.Add(stringFldCompressed); /** add the doc to a ram index */ MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(doc); writer.Close(); /** open a reader and fetch the document */ IndexReader reader = IndexReader.Open(dir); Document docFromReader = reader.Document(0); Assert.IsTrue(docFromReader != null); /** fetch the binary compressed field and compare it's content with the original one */ System.String binaryFldCompressedTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(CompressionTools.Decompress(docFromReader.GetBinaryValue("binaryCompressed")))); Assert.IsTrue(binaryFldCompressedTest.Equals(binaryValCompressed)); Assert.IsTrue(CompressionTools.DecompressString(docFromReader.GetBinaryValue("stringCompressed")).Equals(binaryValCompressed)); reader.Close(); dir.Close(); }
private static RAMDirectory MakeEmptyIndex(int numDeletedDocs) { RAMDirectory d = new RAMDirectory(); IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(), true, MaxFieldLength.LIMITED); for (int i = 0; i < numDeletedDocs; i++) { w.AddDocument(new Document()); } w.Commit(); w.DeleteDocuments(new MatchAllDocsQuery()); w.Commit(); if (0 < numDeletedDocs) { Assert.IsTrue(w.HasDeletions(), "writer has no deletions"); } Assert.AreEqual(numDeletedDocs, w.MaxDoc(), "writer is missing some deleted docs"); Assert.AreEqual(0, w.NumDocs(), "writer has non-deleted docs"); w.Close(); IndexReader r = IndexReader.Open(d, true); Assert.AreEqual(numDeletedDocs, r.NumDeletedDocs, "reader has wrong number of deleted docs"); r.Close(); return(d); }
public virtual void TestRAMDirectoryString() { MockRAMDirectory ramDir = new MockRAMDirectory(indexDir.FullName); // Check size Assert.AreEqual(ramDir.SizeInBytes(), ramDir.GetRecomputedSizeInBytes()); // open reader to test document count IndexReader reader = IndexReader.Open(ramDir); Assert.AreEqual(docsToAdd, reader.NumDocs()); // open search zo check if all doc's are there IndexSearcher searcher = new IndexSearcher(reader); // search for all documents for (int i = 0; i < docsToAdd; i++) { Document doc = searcher.Doc(i); Assert.IsTrue(doc.GetField("content") != null); } // cleanup reader.Close(); searcher.Close(); }
public virtual void Test() { IndexReader reader = null; try { reader = IndexReader.Open(directory); for (int i = 1; i <= numThreads; i++) { TestTermPositionVectors(reader, i); } } catch (System.IO.IOException ioe) { Assert.Fail(ioe.Message); } finally { if (reader != null) { try { /** close the opened reader */ reader.Close(); } catch (System.IO.IOException ioe) { System.Console.Error.WriteLine(ioe.StackTrace); } } } }
public string SearchAndPaging(string strQuery, string index) { string result = string.Empty; try { List <SearchArticle> searchArticleList = new List <SearchArticle>(); PSCPortal.CMS.ArticleCollection ArticleList = ArticleCollection.GetArticleCollectionPublish(); string nameSub = Libs.Ultility.GetSubDomain() == string.Empty ? "HomePage" : Libs.Ultility.GetSubDomain(); SubDomain subDomain = PSCPortal.Engine.SubDomain.GetSubByName(nameSub); PageCollection pagesBelongTo = subDomain.GetPagesBelongTo(); string strId = string.Empty; foreach (var page in pagesBelongTo) { foreach (var ar in ArticleList.Where(ar => ar.PageId == page.Id)) { strId += ar.Id + " OR "; } if (strId.Length > 0) { strId = strId.Remove(strId.Length - 3, 3); } } int pageIndex = Int32.Parse(index); string strSearch = " ArticleDetail:(" + strQuery + ") AND ArticleId:" + "( " + strId + " )"; Lucene.Net.Index.IndexReader reader = Lucene.Net.Index.IndexReader.Open(Server.MapPath(System.Configuration.ConfigurationManager.AppSettings["IndexingArticle"])); Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("ArticleDetail", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); Lucene.Net.Search.Query query = parser.Parse(strSearch); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(reader); Lucene.Net.Search.Hits hits = searcher.Search(query); Lucene.Net.Highlight.QueryScorer score = new Lucene.Net.Highlight.QueryScorer(query); Lucene.Net.Highlight.SimpleHTMLFormatter formater = new Lucene.Net.Highlight.SimpleHTMLFormatter("<span class='Highlight'>", "</span>"); Lucene.Net.Highlight.Highlighter highlighter = new Lucene.Net.Highlight.Highlighter(formater, score); result += hits.Length() + "_" + "<div class='blog_news'><div class='topic_news_title1'><div class='topic_news_title'><a href='#'>Kết quả tìm thấy: " + hits.Length() + "</a></div></div>"; result += "<div class='ct_topic_l'><div class='ct_topic_r1'>"; for (int i = pageIndex * 20 - 20; i < pageIndex * 20 && i < hits.Length(); i++) { string detail = hits.Doc(i).Get("ArticleDetail"); Lucene.Net.Analysis.TokenStream ts = (new Lucene.Net.Analysis.Standard.StandardAnalyzer()).TokenStream("ArticleDetail", new System.IO.StringReader(detail)); SearchArticle searchArticle = new SearchArticle(); searchArticle.Id = hits.Doc(i).Get("ArticleId");; searchArticle.Title = hits.Doc(i).Get("ArticleTitle"); searchArticle.Highligth = highlighter.GetBestFragment(ts, detail); searchArticleList.Add(searchArticle); } reader.Close(); JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> resultDic = new Dictionary <string, object>(); resultDic["Count"] = hits.Length(); resultDic["Data"] = searchArticleList; result = serializer.Serialize(resultDic); } catch (Exception e) { } return(result); }
private int Numdoc() { IndexReader rs = IndexReader.Open(spellindex); int num = rs.NumDocs(); Assert.IsTrue(num != 0); //System.out.println("num docs: " + num); rs.Close(); return(num); }
/// <summary> Note that the underlying IndexReader is not closed, if /// IndexSearcher was constructed with IndexSearcher(IndexReader r). /// If the IndexReader was supplied implicitly by specifying a directory, then /// the IndexReader gets closed. /// </summary> public override void Close() { if (closeReader) { FieldSortedHitQueue.Close(reader); Lucene.Net.Search.FieldCache_Fields.DEFAULT.Close(reader); reader.Close(); } }
public virtual void TestSetBufferSize() { System.IO.FileInfo indexDir = new System.IO.FileInfo(System.IO.Path.Combine(SupportClass.AppSettings.Get("tempDir", ""), "testSetBufferSize")); MockFSDirectory dir = new MockFSDirectory(indexDir, NewRandom()); try { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(false); for (int i = 0; i < 37; i++) { Document doc = new Document(); doc.Add(new Field("content", "aaa bbb ccc ddd" + i, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("id", "" + i, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Close(); dir.allIndexInputs.Clear(); IndexReader reader = IndexReader.Open(dir); Term aaa = new Term("content", "aaa"); Term bbb = new Term("content", "bbb"); Term ccc = new Term("content", "ccc"); Assert.AreEqual(37, reader.DocFreq(ccc)); reader.DeleteDocument(0); Assert.AreEqual(37, reader.DocFreq(aaa)); dir.tweakBufferSizes(); reader.DeleteDocument(4); Assert.AreEqual(reader.DocFreq(bbb), 37); dir.tweakBufferSizes(); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] hits = searcher.Search(new TermQuery(bbb), null, 1000).scoreDocs; dir.tweakBufferSizes(); Assert.AreEqual(35, hits.Length); dir.tweakBufferSizes(); hits = searcher.Search(new TermQuery(new Term("id", "33")), null, 1000).scoreDocs; dir.tweakBufferSizes(); Assert.AreEqual(1, hits.Length); hits = searcher.Search(new TermQuery(aaa), null, 1000).scoreDocs; dir.tweakBufferSizes(); Assert.AreEqual(35, hits.Length); searcher.Close(); reader.Close(); } finally { _TestUtil.RmDir(indexDir); } }
public virtual void TestBinaryFieldInIndex() { IFieldable binaryFldStored = new Field("binaryStored", System.Text.UTF8Encoding.UTF8.GetBytes(binaryValStored), Field.Store.YES); IFieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO); // binary fields with store off are not allowed Assert.Throws <ArgumentException>( () => new Field("fail", System.Text.Encoding.UTF8.GetBytes(binaryValStored), Field.Store.NO)); Document doc = new Document(); doc.Add(binaryFldStored); doc.Add(stringFldStored); /* test for field count */ Assert.AreEqual(2, doc.fields_ForNUnit.Count); /* add the doc to a ram index */ MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(doc); writer.Close(); /* open a reader and fetch the document */ IndexReader reader = IndexReader.Open(dir, false); Document docFromReader = reader.Document(0); Assert.IsTrue(docFromReader != null); /* fetch the binary stored field and compare it's content with the original one */ System.String binaryFldStoredTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(docFromReader.GetBinaryValue("binaryStored"))); Assert.IsTrue(binaryFldStoredTest.Equals(binaryValStored)); /* fetch the string field and compare it's content with the original one */ System.String stringFldStoredTest = docFromReader.Get("stringStored"); Assert.IsTrue(stringFldStoredTest.Equals(binaryValStored)); /* delete the document from index */ reader.DeleteDocument(0); Assert.AreEqual(0, reader.NumDocs()); reader.Close(); dir.Close(); }
public virtual void TestBasic() { HashSet <string> fileExtensions = new HashSet <string>(); fileExtensions.Add("fdt"); fileExtensions.Add("fdx"); Directory primaryDir = new MockRAMDirectory(); RAMDirectory secondaryDir = new MockRAMDirectory(); FileSwitchDirectory fsd = new FileSwitchDirectory(fileExtensions, primaryDir, secondaryDir, true); IndexWriter writer = new IndexWriter(fsd, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED, null); writer.UseCompoundFile = false; TestIndexWriterReader.CreateIndexNoClose(true, "ram", writer); IndexReader reader = writer.GetReader(null); Assert.AreEqual(100, reader.MaxDoc); writer.Commit(null); // we should see only fdx,fdt files here System.String[] files = primaryDir.ListAll(null); Assert.IsTrue(files.Length > 0); for (int x = 0; x < files.Length; x++) { System.String ext = FileSwitchDirectory.GetExtension(files[x]); Assert.IsTrue(fileExtensions.Contains(ext)); } files = secondaryDir.ListAll(null); Assert.IsTrue(files.Length > 0); // we should not see fdx,fdt files here for (int x = 0; x < files.Length; x++) { System.String ext = FileSwitchDirectory.GetExtension(files[x]); Assert.IsFalse(fileExtensions.Contains(ext)); } reader.Close(); writer.Close(); files = fsd.ListAll(null); for (int i = 0; i < files.Length; i++) { Assert.IsNotNull(files[i]); } fsd.Close(); }
public virtual void TestCaching() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); TokenStream stream = new AnonymousClassTokenStream(this); stream = new CachingTokenFilter(stream); doc.Add(new Field("preanalyzed", stream, TermVector.NO)); // 1) we consume all tokens twice before we add the doc to the index checkTokens(stream); stream.Reset(); checkTokens(stream); // 2) now add the document to the index and verify if all tokens are indexed // don't reset the stream here, the DocumentWriter should do that implicitly writer.AddDocument(doc); writer.Close(); IndexReader reader = IndexReader.Open(dir); TermPositions termPositions = reader.TermPositions(new Term("preanalyzed", "term1")); Assert.IsTrue(termPositions.Next()); Assert.AreEqual(1, termPositions.Freq()); Assert.AreEqual(0, termPositions.NextPosition()); termPositions.Seek(new Term("preanalyzed", "term2")); Assert.IsTrue(termPositions.Next()); Assert.AreEqual(2, termPositions.Freq()); Assert.AreEqual(1, termPositions.NextPosition()); Assert.AreEqual(3, termPositions.NextPosition()); termPositions.Seek(new Term("preanalyzed", "term3")); Assert.IsTrue(termPositions.Next()); Assert.AreEqual(1, termPositions.Freq()); Assert.AreEqual(2, termPositions.NextPosition()); reader.Close(); // 3) reset stream and consume tokens again stream.Reset(); checkTokens(stream); }
public virtual void TestMissingTerms() { System.String fieldName = "field1"; MockRAMDirectory rd = new MockRAMDirectory(); IndexWriter w = new IndexWriter(rd, new KeywordAnalyzer(), MaxFieldLength.UNLIMITED); for (int i = 0; i < 100; i++) { Document doc = new Document(); int term = i * 10; //terms are units of 10; doc.Add(new Field(fieldName, "" + term, Field.Store.YES, Field.Index.NOT_ANALYZED)); w.AddDocument(doc); } w.Close(); IndexReader reader = IndexReader.Open(rd, true); IndexSearcher searcher = new IndexSearcher(reader); int numDocs = reader.NumDocs(); ScoreDoc[] results; MatchAllDocsQuery q = new MatchAllDocsQuery(); System.Collections.ArrayList terms = new System.Collections.ArrayList(); terms.Add("5"); results = searcher.Search(q, new FieldCacheTermsFilter(fieldName, (System.String[])terms.ToArray(typeof(System.String))), numDocs).ScoreDocs; Assert.AreEqual(0, results.Length, "Must match nothing"); terms = new System.Collections.ArrayList(); terms.Add("10"); results = searcher.Search(q, new FieldCacheTermsFilter(fieldName, (System.String[])terms.ToArray(typeof(System.String))), numDocs).ScoreDocs; Assert.AreEqual(1, results.Length, "Must match 1"); terms = new System.Collections.ArrayList(); terms.Add("10"); terms.Add("20"); results = searcher.Search(q, new FieldCacheTermsFilter(fieldName, (System.String[])terms.ToArray(typeof(System.String))), numDocs).ScoreDocs; Assert.AreEqual(2, results.Length, "Must match 2"); reader.Close(); rd.Close(); }
public virtual void TestFilterWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < 500; i++) { Document document = new Document(); document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(document); } writer.Close(); IndexReader reader = IndexReader.Open(dir, true); SpanTermQuery query = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim())); SpanQueryFilter filter = new SpanQueryFilter(query); SpanFilterResult result = filter.BitSpans(reader); DocIdSet docIdSet = result.DocIdSet; Assert.IsTrue(docIdSet != null, "docIdSet is null and it shouldn't be"); AssertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10); var spans = result.Positions; Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); int size = GetDocIdSetSize(docIdSet); Assert.IsTrue(spans.Count == size, "spans Size: " + spans.Count + " is not: " + size); for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext();) { SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo)iterator.Current; Assert.IsTrue(info != null, "info is null and it shouldn't be"); //The doc should indicate the bit is on AssertContainsDocId("docIdSet doesn't contain docId " + info.Doc, docIdSet, info.Doc); //There should be two positions in each Assert.IsTrue(info.Positions.Count == 2, "info.getPositions() Size: " + info.Positions.Count + " is not: " + 2); } reader.Close(); }
public virtual void TestSetNorm_Renamed() { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); // add the same document four times IFieldable f1 = new Field("field", "word", Field.Store.YES, Field.Index.ANALYZED); Document d1 = new Document(); d1.Add(f1); writer.AddDocument(d1); writer.AddDocument(d1); writer.AddDocument(d1); writer.AddDocument(d1); writer.Close(); // reset the boost of each instance of this document IndexReader reader = IndexReader.Open(store, false); reader.SetNorm(0, "field", 1.0f); reader.SetNorm(1, "field", 2.0f); reader.SetNorm(2, "field", 4.0f); reader.SetNorm(3, "field", 16.0f); reader.Close(); // check that searches are ordered by this boost float[] scores = new float[4]; new IndexSearcher(store, true).Search(new TermQuery(new Term("field", "word")), new AnonymousClassCollector(scores, this)); float lastScore = 0.0f; for (int i = 0; i < 4; i++) { Assert.IsTrue(scores[i] > lastScore); lastScore = scores[i]; } }
public virtual void TestGiga() { StandardAnalyzer analyzer = new StandardAnalyzer(); Directory index = new MockRAMDirectory(); IndexWriter w = new IndexWriter(index, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); AddDoc("Lucene in Action", w); AddDoc("Lucene for Dummies", w); // addDoc("Giga", w); AddDoc("Giga byte", w); AddDoc("ManagingGigabytesManagingGigabyte", w); AddDoc("ManagingGigabytesManagingGigabytes", w); AddDoc("The Art of Computer Science", w); AddDoc("J. K. Rowling", w); AddDoc("JK Rowling", w); AddDoc("Joanne K Roling", w); AddDoc("Bruce Willis", w); AddDoc("Willis bruce", w); AddDoc("Brute willis", w); AddDoc("B. willis", w); IndexReader r = w.GetReader(); w.Close(); Query q = new QueryParser("field", analyzer).Parse("giga~0.9"); // 3. search IndexSearcher searcher = new IndexSearcher(r); ScoreDoc[] hits = searcher.Search(q, 10).scoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual(searcher.Doc(hits[0].doc).Get("field"), "Giga byte"); r.Close(); }
public virtual void TestNullOrSubScorer() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.Add(new Field("field", "a b c d", Field.Store.NO, Field.Index.ANALYZED)); w.AddDocument(doc); IndexReader r = w.GetReader(); IndexSearcher s = new IndexSearcher(r); BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); // PhraseQuery w/ no terms added returns a null scorer PhraseQuery pq = new PhraseQuery(); q.Add(pq, BooleanClause.Occur.SHOULD); Assert.AreEqual(1, s.Search(q, 10).totalHits); // A required clause which returns null scorer should return null scorer to // IndexSearcher. q = new BooleanQuery(); pq = new PhraseQuery(); q.Add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); q.Add(pq, BooleanClause.Occur.MUST); Assert.AreEqual(0, s.Search(q, 10).totalHits); DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f); dmq.Add(new TermQuery(new Term("field", "a"))); dmq.Add(pq); Assert.AreEqual(1, s.Search(dmq, 10).totalHits); r.Close(); w.Close(); dir.Close(); }
public virtual void TestCachingWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true); writer.Close(); IndexReader reader = IndexReader.Open(dir); MockFilter filter = new MockFilter(); CachingWrapperFilter cacher = new CachingWrapperFilter(filter); // first time, nested filter is called cacher.Bits(reader); Assert.IsTrue(filter.WasCalled(), "first time"); // second time, nested filter should not be called filter.Clear(); cacher.Bits(reader); Assert.IsFalse(filter.WasCalled(), "second time"); reader.Close(); }
/// <summary> Index a Dictionary</summary> /// <param name="dict">the dictionary to index /// </param> /// <throws> IOException </throws> public virtual void IndexDictionary(Dictionary dict) { IndexReader.Unlock(spellindex); IndexWriter writer = new IndexWriter(spellindex, new WhitespaceAnalyzer(), !IndexReader.IndexExists(spellindex)); writer.SetMergeFactor(300); writer.SetMaxBufferedDocs(150); System.Collections.IEnumerator iter = dict.GetWordsIterator(); while (iter.MoveNext()) { System.String word = (System.String)iter.Current; int len = word.Length; if (len < 3) { continue; // too short we bail but "too long" is fine... } if (this.Exist(word)) { // if the word already exist in the gramindex continue; } // ok index the word Document doc = CreateDocument(word, GetMin(len), GetMax(len)); writer.AddDocument(doc); } // close writer writer.Optimize(); writer.Close(); // close reader reader.Close(); reader = null; }
public virtual void TestBinaryFieldInIndex() { Fieldable binaryFldStored = new Field("binaryStored", System.Text.UTF8Encoding.UTF8.GetBytes(binaryValStored), Field.Store.YES); Fieldable binaryFldCompressed = new Field("binaryCompressed", System.Text.UTF8Encoding.UTF8.GetBytes(binaryValCompressed), Field.Store.COMPRESS); Fieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO); Fieldable stringFldCompressed = new Field("stringCompressed", binaryValCompressed, Field.Store.COMPRESS, Field.Index.NO, Field.TermVector.NO); try { // binary fields with store off are not allowed new Field("fail", System.Text.UTF8Encoding.UTF8.GetBytes(binaryValCompressed), Field.Store.NO); Assert.Fail(); } catch (System.ArgumentException iae) { ; } Document doc = new Document(); doc.Add(binaryFldStored); doc.Add(binaryFldCompressed); doc.Add(stringFldStored); doc.Add(stringFldCompressed); /** test for field count */ Assert.AreEqual(4, doc.fields_ForNUnit.Count); /** add the doc to a ram index */ MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(doc); writer.Close(); /** open a reader and fetch the document */ IndexReader reader = IndexReader.Open(dir); Document docFromReader = reader.Document(0); Assert.IsTrue(docFromReader != null); /** fetch the binary stored field and compare it's content with the original one */ System.String binaryFldStoredTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(docFromReader.GetBinaryValue("binaryStored"))); Assert.IsTrue(binaryFldStoredTest.Equals(binaryValStored)); /** fetch the binary compressed field and compare it's content with the original one */ System.String binaryFldCompressedTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(docFromReader.GetBinaryValue("binaryCompressed"))); Assert.IsTrue(binaryFldCompressedTest.Equals(binaryValCompressed)); /** fetch the string field and compare it's content with the original one */ System.String stringFldStoredTest = docFromReader.Get("stringStored"); Assert.IsTrue(stringFldStoredTest.Equals(binaryValStored)); /** fetch the compressed string field and compare it's content with the original one */ System.String stringFldCompressedTest = docFromReader.Get("stringCompressed"); Assert.IsTrue(stringFldCompressedTest.Equals(binaryValCompressed)); /** delete the document from index */ reader.DeleteDocument(0); Assert.AreEqual(0, reader.NumDocs()); reader.Close(); dir.Close(); }
public static void DeleteArticleIndexing(Guid articleId) { Lucene.Net.Index.IndexReader reader = Lucene.Net.Index.IndexReader.Open(HttpContext.Current.Server.MapPath(ConfigurationManager.AppSettings["IndexingArticle"])); reader.DeleteDocuments(new Lucene.Net.Index.Term("ArticleId", articleId.ToString())); reader.Close(); }
public virtual void TestEmptyIndex() { // creating two directories for indices Directory indexStoreA = new MockRAMDirectory(); Directory indexStoreB = new MockRAMDirectory(); // creating a document to store Document lDoc = new Document(); lDoc.Add(new Field("fulltext", "Once upon a time.....", Field.Store.YES, Field.Index.ANALYZED)); lDoc.Add(new Field("id", "doc1", Field.Store.YES, Field.Index.NOT_ANALYZED)); lDoc.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); // creating a document to store Document lDoc2 = new Document(); lDoc2.Add(new Field("fulltext", "in a galaxy far far away.....", Field.Store.YES, Field.Index.ANALYZED)); lDoc2.Add(new Field("id", "doc2", Field.Store.YES, Field.Index.NOT_ANALYZED)); lDoc2.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); // creating a document to store Document lDoc3 = new Document(); lDoc3.Add(new Field("fulltext", "a bizarre bug manifested itself....", Field.Store.YES, Field.Index.ANALYZED)); lDoc3.Add(new Field("id", "doc3", Field.Store.YES, Field.Index.NOT_ANALYZED)); lDoc3.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); // creating an index writer for the first index IndexWriter writerA = new IndexWriter(indexStoreA, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); // creating an index writer for the second index, but writing nothing IndexWriter writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); //-------------------------------------------------------------------- // scenario 1 //-------------------------------------------------------------------- // writing the documents to the first index writerA.AddDocument(lDoc); writerA.AddDocument(lDoc2); writerA.AddDocument(lDoc3); writerA.Optimize(); writerA.Close(); // closing the second index writerB.Close(); // creating the query QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "fulltext", new StandardAnalyzer(Util.Version.LUCENE_CURRENT)); Query query = parser.Parse("handle:1"); // building the searchables Searcher[] searchers = new Searcher[2]; // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index searchers[0] = new IndexSearcher(indexStoreB, true); searchers[1] = new IndexSearcher(indexStoreA, true); // creating the multiSearcher Searcher mSearcher = GetMultiSearcherInstance(searchers); // performing the search ScoreDoc[] hits = mSearcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); // iterating over the hit documents for (int i = 0; i < hits.Length; i++) { mSearcher.Doc(hits[i].Doc); } mSearcher.Close(); //-------------------------------------------------------------------- // scenario 2 //-------------------------------------------------------------------- // adding one document to the empty index writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.LIMITED); writerB.AddDocument(lDoc); writerB.Optimize(); writerB.Close(); // building the searchables Searcher[] searchers2 = new Searcher[2]; // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index searchers2[0] = new IndexSearcher(indexStoreB, true); searchers2[1] = new IndexSearcher(indexStoreA, true); // creating the mulitSearcher MultiSearcher mSearcher2 = GetMultiSearcherInstance(searchers2); // performing the same search ScoreDoc[] hits2 = mSearcher2.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(4, hits2.Length); // iterating over the hit documents for (int i = 0; i < hits2.Length; i++) { // no exception should happen at this point mSearcher2.Doc(hits2[i].Doc); } // test the subSearcher() method: Query subSearcherQuery = parser.Parse("id:doc1"); hits2 = mSearcher2.Search(subSearcherQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits2.Length); Assert.AreEqual(0, mSearcher2.SubSearcher(hits2[0].Doc)); // hit from searchers2[0] Assert.AreEqual(1, mSearcher2.SubSearcher(hits2[1].Doc)); // hit from searchers2[1] subSearcherQuery = parser.Parse("id:doc2"); hits2 = mSearcher2.Search(subSearcherQuery, null, 1000).ScoreDocs; Assert.AreEqual(1, hits2.Length); Assert.AreEqual(1, mSearcher2.SubSearcher(hits2[0].Doc)); // hit from searchers2[1] mSearcher2.Close(); //-------------------------------------------------------------------- // scenario 3 //-------------------------------------------------------------------- // deleting the document just added, this will cause a different exception to take place Term term = new Term("id", "doc1"); IndexReader readerB = IndexReader.Open(indexStoreB, false); readerB.DeleteDocuments(term); readerB.Close(); // optimizing the index with the writer writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.LIMITED); writerB.Optimize(); writerB.Close(); // building the searchables Searcher[] searchers3 = new Searcher[2]; searchers3[0] = new IndexSearcher(indexStoreB, true); searchers3[1] = new IndexSearcher(indexStoreA, true); // creating the mulitSearcher Searcher mSearcher3 = GetMultiSearcherInstance(searchers3); // performing the same search ScoreDoc[] hits3 = mSearcher3.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits3.Length); // iterating over the hit documents for (int i = 0; i < hits3.Length; i++) { mSearcher3.Doc(hits3[i].Doc); } mSearcher3.Close(); indexStoreA.Close(); indexStoreB.Close(); }
/* Walk directory hierarchy in uid order, while keeping uid iterator from /* existing index in sync. Mismatches indicate one of: (a) old documents to /* be deleted; (b) unchanged documents, to be left alone; or (c) new /* documents, to be indexed. */ private static void IndexDocs(System.IO.DirectoryInfo file, System.IO.DirectoryInfo index, bool create) { if (!create) { // incrementally update reader = IndexReader.Open(FSDirectory.Open(index), false); // open existing index uidIter = reader.Terms(new Term("uid", "")); // init uid iterator IndexDocs(file); if (deleting) { // delete rest of stale docs while (uidIter.Term() != null && (System.Object) uidIter.Term().Field == (System.Object) "uid") { System.Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term().Text)); reader.DeleteDocuments(uidIter.Term()); uidIter.Next(); } deleting = false; } uidIter.Close(); // close uid iterator reader.Close(); // close existing index } // don't have exisiting else IndexDocs(file); }