public virtual void TestRAMDirectoryString() { MockRAMDirectory ramDir = new MockRAMDirectory(indexDir.FullName); // Check size Assert.AreEqual(ramDir.SizeInBytes(), ramDir.GetRecomputedSizeInBytes()); // open reader to test document count IndexReader reader = IndexReader.Open(ramDir); Assert.AreEqual(docsToAdd, reader.NumDocs()); // open search zo check if all doc's are there IndexSearcher searcher = new IndexSearcher(reader); // search for all documents for (int i = 0; i < docsToAdd; i++) { Document doc = searcher.Doc(i); Assert.IsTrue(doc.GetField("content") != null); } // cleanup reader.Close(); searcher.Close(); }
public void Run() { Lucene.Net.Index.IndexReader indexReader = Lucene.Net.Index.IndexReader.Open(_directory, true); Lucene.Net.Search.Searcher indexSearch = new Lucene.Net.Search.IndexSearcher(indexReader); var queryParser = new QueryParser(luceneVersion, "TAGS", _analyzer); Console.Write("검색어를 입력해주세요 :"); string q = Console.ReadLine(); var query = queryParser.Parse(q); Console.WriteLine("[검색어] {0}", q); Lucene.Net.Search.TopDocs resultDocs = indexSearch.Search(query, indexReader.MaxDoc); var hits = resultDocs.ScoreDocs; int currentRow = 0; foreach (var hit in hits) { var documentFromSearch = indexSearch.Doc(hit.Doc); Console.WriteLine("* Result {0}", ++currentRow); Console.WriteLine("\t-제목 : {0}", documentFromSearch.Get("TITLE")); Console.WriteLine("\t-내용 : {0}", documentFromSearch.Get("SUMMARY")); Console.WriteLine("\t-태그 : {0}", documentFromSearch.Get("TAGS")); } Console.WriteLine(); }
/// <summary> /// Search for files. /// </summary> /// <param name="queryText">The query text.</param> /// <returns>The files that match the query text.</returns> public SourceFile[] Search(string queryText) { Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser( Lucene.Net.Util.Version.LUCENE_30, "body", _analyzer); Lucene.Net.Search.Query query = parser.Parse(queryText); using (Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(_directory, true)) { Lucene.Net.Search.TopDocs result = searcher.Search(query, int.MaxValue); List <SourceFile> files = new List <SourceFile>(); foreach (Lucene.Net.Search.ScoreDoc d in result.ScoreDocs) { Lucene.Net.Documents.Document doc = searcher.Doc(d.Doc); files.Add(new SourceFile( doc.Get("id"), doc.Get("type"), doc.Get("name"), doc.Get("fileName"), null)); } return(files.ToArray()); } }
/// <summary> /// Constructor. /// </summary> /// <param name="directory">The directory of the index.</param> /// <param name="isWriteMode">IsWriteMode</param> public SearchIndex(string directory, bool isWriteMode) { if (String.IsNullOrWhiteSpace(directory)) { throw new ArgumentException("directory is null or whitespace.", "directory"); } _directory = Lucene.Net.Store.FSDirectory.Open(directory); _analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer( Lucene.Net.Util.Version.LUCENE_30, new HashSet <string>()); IsWriteMode = isWriteMode; if (IsWriteMode) { _writer = new Lucene.Net.Index.IndexWriter( _directory, _analyzer, new Lucene.Net.Index.IndexWriter.MaxFieldLength(1000000)); _searcher = null; } else { _searcher = new Lucene.Net.Search.IndexSearcher( _directory, true); } }
public virtual void TestDemo_Renamed_Method() { Analyzer analyzer = new StandardAnalyzer(); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead (note that the // parameter true will overwrite the index in that directory // if one exists): //Directory directory = FSDirectory.getDirectory("/tmp/testindex", true); IndexWriter iwriter = new IndexWriter(directory, analyzer, true); iwriter.SetMaxFieldLength(25000); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.TOKENIZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory); // Parse a simple query that searches for "text": Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("fieldname", analyzer); Query query = parser.Parse("text"); Hits hits = isearcher.Search(query); Assert.AreEqual(1, hits.Length()); // Iterate through the results: for (int i = 0; i < hits.Length(); i++) { Document hitDoc = hits.Doc(i); Assert.AreEqual("This is the text to be indexed.", hitDoc.Get("fieldname")); } isearcher.Close(); directory.Close(); }
public string Search(string strQuery) { string result = string.Empty; Lucene.Net.Index.IndexReader reader = Lucene.Net.Index.IndexReader.Open(Server.MapPath(System.Configuration.ConfigurationManager.AppSettings["IndexingArticle"])); Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("ArticleDetail", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); Lucene.Net.Search.Query query = parser.Parse(strQuery); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(reader); Lucene.Net.Search.Hits hits = searcher.Search(query); Lucene.Net.Highlight.QueryScorer score = new Lucene.Net.Highlight.QueryScorer(query); Lucene.Net.Highlight.SimpleHTMLFormatter formater = new Lucene.Net.Highlight.SimpleHTMLFormatter("<span class='Highlight'>", "</span>"); Lucene.Net.Highlight.Highlighter highlighter = new Lucene.Net.Highlight.Highlighter(formater, score); result += "<div align='right' style='background-color:#F0F7F9; padding-right:15px' height='30px'><font style='FONT-WEIGHT: bold; FONT-SIZE: 10pt; COLOR: #005482; FONT-FAMILY: arial'>Kết quả tìm thấy : " + hits.Length() + " </font></div>"; result += "<div style='padding: 10px 10px 10px 10px;'>"; for (int i = 0; i < hits.Length(); i++) { string id = hits.Doc(i).Get("ArticleId"); string title = hits.Doc(i).Get("ArticleTitle"); string detail = hits.Doc(i).Get("ArticleDetail"); Lucene.Net.Analysis.TokenStream ts = (new Lucene.Net.Analysis.Standard.StandardAnalyzer()).TokenStream("ArticleDetail", new System.IO.StringReader(detail)); result += string.Format("<div align='left'><font style='FONT-WEIGHT: bold; FONT-SIZE: 10pt; COLOR: #5b5b5b; FONT-FAMILY: arial'><a href='/?ArticleId={0}'>{1}</a></font>", id, title); result += string.Format("<div align='left'><font style='FONT-SIZE: 9pt' face='Arial' color='#005482'>...{0}...</font></div></div></br>", highlighter.GetBestFragment(ts, detail)); } result += "</div>"; reader.Close(); return(result); }
public void Test_Search_FieldDoc() { try { LUCENENET_100_CreateIndex(); System.Runtime.Remoting.Channels.ChannelServices.RegisterChannel(new System.Runtime.Remoting.Channels.Tcp.TcpChannel(38087)); Lucene.Net.Search.IndexSearcher indexSearcher = new Lucene.Net.Search.IndexSearcher(LUCENENET_100_Dir); System.Runtime.Remoting.RemotingServices.Marshal(indexSearcher, "Searcher"); LUCENENET_100_ClientSearch(); } catch (Exception ex) { Console.WriteLine(ex.Message); } //Wait Client to finish while (LUCENENET_100_testFinished == false) { System.Threading.Thread.Sleep(10); } if (LUCENENET_100_Exception != null) { throw LUCENENET_100_Exception; } }
public virtual void TestMmapIndex() { FSDirectory storeDirectory; storeDirectory = new MMapDirectory(new System.IO.FileInfo(storePathname), null); // plan to add a set of useful stopwords, consider changing some of the // interior filters. StandardAnalyzer analyzer = new StandardAnalyzer(new System.Collections.Hashtable()); // TODO: something about lock timeouts and leftover locks. IndexWriter writer = new IndexWriter(storeDirectory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); IndexSearcher searcher = new IndexSearcher(storePathname); for (int dx = 0; dx < 1000; dx++) { System.String f = RandomField(); Document doc = new Document(); doc.Add(new Field("data", f, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } searcher.Close(); writer.Close(); RmDir(new System.IO.FileInfo(storePathname)); }
public virtual void TestDifferentNumResults() { // test the collector w/ FacetRequests and different numResults DirectoryReader indexReader = DirectoryReader.Open(indexDir); var taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher searcher = NewSearcher(indexReader); FacetsCollector sfc = new FacetsCollector(); TermQuery q = new TermQuery(A); searcher.Search(q, sfc); Facets facets = GetTaxonomyFacetCounts(taxoReader, Config, sfc); FacetResult result = facets.GetTopChildren(NUM_CHILDREN_CP_A, CP_A); Assert.AreEqual(-1, (int)result.Value); foreach (LabelAndValue labelValue in result.LabelValues) { Assert.AreEqual(termExpectedCounts[CP_A + "/" + labelValue.label], labelValue.value); } result = facets.GetTopChildren(NUM_CHILDREN_CP_B, CP_B); Assert.AreEqual(termExpectedCounts[CP_B], result.Value); foreach (LabelAndValue labelValue in result.LabelValues) { Assert.AreEqual(termExpectedCounts[CP_B + "/" + labelValue.label], labelValue.value); } IOUtils.Close(indexReader, taxoReader); }
public virtual void TestNPESpanQuery() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(new System.Collections.Hashtable(0)), IndexWriter.MaxFieldLength.LIMITED); // Add documents AddDoc(writer, "1", "the big dogs went running to the market"); AddDoc(writer, "2", "the cat chased the mouse, then the cat ate the mouse quickly"); // Commit writer.Close(); // Get searcher IndexReader reader = IndexReader.Open(dir); IndexSearcher searcher = new IndexSearcher(reader); // Control (make sure docs indexed) Assert.AreEqual(2, HitCount(searcher, "the")); Assert.AreEqual(1, HitCount(searcher, "cat")); Assert.AreEqual(1, HitCount(searcher, "dogs")); Assert.AreEqual(0, HitCount(searcher, "rabbit")); // This throws exception (it shouldn't) Assert.AreEqual(1, searcher.Search(CreateSpan(0, true, new SpanQuery[] { CreateSpan(4, false, "chased", "cat"), CreateSpan("ate") }), 10).TotalHits); reader.Close(); dir.Close(); }
public override void SetUp() { base.SetUp(); PayloadHelper helper = new PayloadHelper(); searcher = helper.SetUp(similarity, 1000); indexReader = searcher.GetIndexReader(); }
} // End Sub BuildIndex // https://lucenenet.apache.org/ // https://www.codeproject.com/Articles/609980/Small-Lucene-NET-Demo-App // https://stackoverflow.com/questions/12600196/lucene-how-to-index-file-names private static void SearchPath(string phrase, string indexPath) { Lucene.Net.Util.LuceneVersion version = Lucene.Net.Util.LuceneVersion.LUCENE_48; Lucene.Net.Store.Directory luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath); Lucene.Net.Index.IndexReader r = Lucene.Net.Index.DirectoryReader.Open(luceneIndexDirectory); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(r); Lucene.Net.Analysis.Analyzer analyzer = GetWrappedAnalyzer(); Lucene.Net.QueryParsers.Classic.QueryParser parser = new Lucene.Net.QueryParsers.Classic.QueryParser(version, "file_name", analyzer); // https://stackoverflow.com/questions/15170097/how-to-search-across-all-the-fields // Lucene.Net.QueryParsers.Classic.MultiFieldQueryParser parser = new Lucene.Net.QueryParsers.Classic.MultiFieldQueryParser(version, GetFields(r), analyzer); Lucene.Net.Search.Query query = parser.Parse(Lucene.Net.QueryParsers.Classic.QueryParser.Escape(phrase)); Lucene.Net.Search.ScoreDoc[] hits = searcher.Search(query, 10).ScoreDocs; foreach (Lucene.Net.Search.ScoreDoc hit in hits) { Lucene.Net.Documents.Document foundDoc = searcher.Doc(hit.Doc); System.Console.WriteLine(hit.Score); string full_name = foundDoc.Get("full_name"); System.Console.WriteLine(full_name); // string favoritePhrase = foundDoc.Get("favoritePhrase"); // System.Console.WriteLine(favoritePhrase); } // Next hit } // End Sub SearchPath
public virtual void TestMmapIndex() { Assert.Ignore("Need to port tests, but we don't really support MMapDirectories anyway"); FSDirectory storeDirectory; storeDirectory = new MMapDirectory(new System.IO.DirectoryInfo(storePathname), null); // plan to add a set of useful stopwords, consider changing some of the // interior filters. StandardAnalyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet<string>()); // TODO: something about lock timeouts and leftover locks. IndexWriter writer = new IndexWriter(storeDirectory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); IndexSearcher searcher = new IndexSearcher(storeDirectory, true); for (int dx = 0; dx < 1000; dx++) { System.String f = RandomField(); Document doc = new Document(); doc.Add(new Field("data", f, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } searcher.Close(); writer.Close(); RmDir(new System.IO.FileInfo(storePathname)); }
// Test that queries based on reverse/ordFieldScore returns docs with expected score. private void DoTestExactScore(System.String field, bool inOrder) { IndexSearcher s = new IndexSearcher(dir, true); ValueSource vs; if (inOrder) { vs = new OrdFieldSource(field); } else { vs = new ReverseOrdFieldSource(field); } Query q = new ValueSourceQuery(vs); TopDocs td = s.Search(q, null, 1000); Assert.AreEqual(N_DOCS, td.TotalHits, "All docs should be matched!"); ScoreDoc[] sd = td.ScoreDocs; for (int i = 0; i < sd.Length; i++) { float score = sd[i].Score; System.String id = s.IndexReader.Document(sd[i].Doc).Get(ID_FIELD); Log("-------- " + i + ". Explain doc " + id); Log(s.Explain(q, sd[i].Doc)); float expectedScore = N_DOCS - i; Assert.AreEqual(expectedScore, score, TEST_SCORE_TOLERANCE_DELTA, "score of result " + i + " shuould be " + expectedScore + " != " + score); System.String expectedId = inOrder?Id2String(N_DOCS - i):Id2String(i + 1); // reverse ==> smaller values first Assert.IsTrue(expectedId.Equals(id), "id of result " + i + " shuould be " + expectedId + " != " + score); } }
public IEnumerable <int> Get(string search, int max = 100, int minScore = 1) { if (!built) { BuildIndexes(); built = true; } try { var dir = FSDirectory.Open(new System.IO.DirectoryInfo(@"C:\lucene")); Lucene.Net.Analysis.Standard.StandardAnalyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_30, "body", analyzer); Lucene.Net.Search.Query query = null; query = parser.Parse(search); var searcher = new Lucene.Net.Search.IndexSearcher(dir); var hits = searcher.Search(query, max); var doc = searcher.Doc(hits.ScoreDocs[0].Doc); var result = hits.ScoreDocs.Where(s => s.Score > minScore).Select(h => int.Parse(searcher.Doc(h.Doc).GetField("id").StringValue)); return(result); } catch (Exception e) { throw; } }
static void Main(string[] args) { var directory = LuceneIndex(); var luceneIndexSearcher = new Lucene.Net.Search.IndexSearcher(directory); string input; Console.WriteLine(luceneIndexSearcher.MaxDoc); Console.WriteLine(luceneIndexSearcher.Doc(0)); do { input = Console.ReadLine(); //build a query object var searchTerm = new Lucene.Net.Index.Term("Data", input); var query = new Lucene.Net.Search.FuzzyQuery(searchTerm, 0.05f); //execute the query var hits = luceneIndexSearcher.Search(query, 10); //iterate over the results. for (int i = 0; i < Math.Min(10, hits.TotalHits); i++) { var doc = hits.ScoreDocs[i]; string contentValue = luceneIndexSearcher.Doc(doc.Doc).Get("Data"); Console.WriteLine(contentValue); } if (input == "all") { for (int i = 0; i < luceneIndexSearcher.MaxDoc; i++) { Console.WriteLine(luceneIndexSearcher.Doc(i)); } } }while (input != "exit"); directory.Dispose(); }
public void TestBooleanQuerySerialization() { Lucene.Net.Search.BooleanQuery lucQuery = new Lucene.Net.Search.BooleanQuery(); lucQuery.Add(new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("field", "x")), Lucene.Net.Search.BooleanClause.Occur.MUST); System.Runtime.Serialization.Formatters.Binary.BinaryFormatter bf = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(); System.IO.MemoryStream ms = new System.IO.MemoryStream(); bf.Serialize(ms, lucQuery); ms.Seek(0, System.IO.SeekOrigin.Begin); Lucene.Net.Search.BooleanQuery lucQuery2 = (Lucene.Net.Search.BooleanQuery)bf.Deserialize(ms); ms.Close(); Assert.AreEqual(lucQuery, lucQuery2, "Error in serialization"); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir, true); int hitCount = searcher.Search(lucQuery, 20).totalHits; searcher.Close(); searcher = new Lucene.Net.Search.IndexSearcher(dir, true); int hitCount2 = searcher.Search(lucQuery2, 20).totalHits; Assert.AreEqual(hitCount, hitCount2, "Error in serialization - different hit counts"); }
public override void Warm(IndexReader r) { IndexSearcher s = new IndexSearcher(r); Lucene.Net.Search.TopDocs hits = s.Search(new TermQuery(new Term("foo", "bar")), 10); Assert.AreEqual(20, hits.TotalHits); }
public virtual void TestGetValuesForIndexedDocument() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true); writer.AddDocument(MakeDocumentWithFields()); writer.Close(); Searcher searcher = new IndexSearcher(dir); // search for something that does exists Query query = new TermQuery(new Term("keyword", "test1")); // ensure that queries return expected results without DateFilter first Hits hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); try { DoAssert(hits.Doc(0), true); } catch (System.Exception e) { System.Console.Error.WriteLine(e.StackTrace); System.Console.Error.Write("\n"); } finally { searcher.Close(); } }
public virtual void TestFirstClauseWithoutPayload() { Spans spans; IndexSearcher searcher = GetSearcher(); SpanQuery[] clauses = new SpanQuery[3]; clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "nopayload")); clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "qq")); clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "ss")); SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 6, true); SpanQuery[] clauses2 = new SpanQuery[2]; clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "pp")); clauses2[1] = spanNearQuery; SpanNearQuery snq = new SpanNearQuery(clauses2, 6, false); SpanQuery[] clauses3 = new SpanQuery[2]; clauses3[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "np")); clauses3[1] = snq; SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false); spans = nestedSpanNearQuery.GetSpans(searcher.IndexReader); Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); CheckSpans(spans, 1, new int[] { 3 }); }
public virtual void TestMmapIndex() { Assert.Ignore("Need to port tests, but we don't really support MMapDirectories anyway"); FSDirectory storeDirectory; storeDirectory = new MMapDirectory(new System.IO.DirectoryInfo(storePathname), null); // plan to add a set of useful stopwords, consider changing some of the // interior filters. StandardAnalyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet <string>()); // TODO: something about lock timeouts and leftover locks. IndexWriter writer = new IndexWriter(storeDirectory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null); IndexSearcher searcher = new IndexSearcher(storeDirectory, true, null); for (int dx = 0; dx < 1000; dx++) { System.String f = RandomField(); Document doc = new Document(); doc.Add(new Field("data", f, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); } searcher.Close(); writer.Close(); RmDir(new System.IO.FileInfo(storePathname)); }
public virtual void TestAllCounts() { DirectoryReader indexReader = DirectoryReader.Open(indexDir); var taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher searcher = NewSearcher(indexReader); FacetsCollector sfc = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(taxoReader, Config, sfc); FacetResult result = facets.GetTopChildren(NUM_CHILDREN_CP_A, CP_A); Assert.AreEqual(-1, (int)result.Value); int prevValue = int.MaxValue; foreach (LabelAndValue labelValue in result.LabelValues) { Assert.AreEqual(allExpectedCounts[CP_A + "/" + labelValue.label], labelValue.value); Assert.True((int)labelValue.value <= prevValue, "wrong sort order of sub results: labelValue.value=" + labelValue.value + " prevValue=" + prevValue); prevValue = (int)labelValue.value; } result = facets.GetTopChildren(NUM_CHILDREN_CP_B, CP_B); Assert.AreEqual(allExpectedCounts[CP_B], result.Value); prevValue = int.MaxValue; foreach (LabelAndValue labelValue in result.LabelValues) { Assert.AreEqual(allExpectedCounts[CP_B + "/" + labelValue.label], labelValue.value); Assert.True((int)labelValue.value <= prevValue, "wrong sort order of sub results: labelValue.value=" + labelValue.value + " prevValue=" + prevValue); prevValue = (int)labelValue.value; } IOUtils.Close(indexReader, taxoReader); }
public virtual void TestPayloadSpanUtil() { RAMDirectory directory = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetSimilarity(similarity); Document doc = new Document(); doc.Add(new Field(PayloadHelper.FIELD, "xx rr yy mm pp", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory, true); IndexReader reader = searcher.IndexReader; PayloadSpanUtil psu = new PayloadSpanUtil(reader); System.Collections.Generic.ICollection <byte[]> payloads = psu.GetPayloadsForQuery(new TermQuery(new Term(PayloadHelper.FIELD, "rr"))); if (DEBUG) { System.Console.Out.WriteLine("Num payloads:" + payloads.Count); } System.Collections.IEnumerator it = payloads.GetEnumerator(); while (it.MoveNext()) { byte[] bytes = (byte[])it.Current; if (DEBUG) { System.Console.Out.WriteLine(new System.String(System.Text.UTF8Encoding.UTF8.GetChars(bytes))); } } }
private IndexSearcher GetSearcher() { RAMDirectory directory = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(this); System.String[] docs = new System.String[] { "xx rr yy mm pp", "xx yy mm rr pp", "nopayload qq ss pp np", "one two three four five six seven eight nine ten eleven", "nine one two three four five six seven eight eleven ten" }; IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetSimilarity(similarity); Document doc = null; for (int i = 0; i < docs.Length; i++) { doc = new Document(); System.String docText = docs[i]; doc.Add(new Field(PayloadHelper.FIELD, docText, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Close(); IndexSearcher searcher = new IndexSearcher(directory, true); return(searcher); }
public Task <IEnumerable <ISearchItem> > Search(string pattern, int page) { using (Analyzer analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48)) using (Lucene.Net.Store.Directory index = new SimpleFSDirectory(Path.ChangeExtension(_bookFile.FullName, Convert.ToInt32(LuceneVersion.LUCENE_48).ToString()))) using (IndexReader reader = DirectoryReader.Open(index)) { Lucene.Net.Search.Query query = new QueryParser(LuceneVersion.LUCENE_48, nameof(TabHtmlText.Html), analyzer).Parse(pattern); Lucene.Net.Search.TopScoreDocCollector collector = Lucene.Net.Search.TopScoreDocCollector.Create(512, true); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(reader); searcher.Search(query, collector); Lucene.Net.Search.TopDocs docs = collector.GetTopDocs(page * PageSize, PageSize); QueryScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), scorer) // SpanGradientFormatter { TextFragmenter = new SimpleSpanFragmenter(scorer, 30) }; IEnumerable <ISearchItem> items = docs.ScoreDocs.Select(scoreDoc => { Document doc = searcher.Doc(scoreDoc.Doc); string html = doc.Get(nameof(TabHtmlText.Html)); string[] fragments = highlighter.GetBestFragments(new HTMLStripCharAnalyzer(), nameof(TabHtmlText.Html), html, 3); return(new SearchItem(int.Parse(doc.Get(nameof(TabHtmlText.NumId))), string.Join("\n", fragments))); }); return(Task.FromResult(items.ToList().AsEnumerable())); } }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetSimilarity(similarity); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); Field noPayloadField = new Field(PayloadHelper.NO_PAYLOAD_FIELD, English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED); //noPayloadField.setBoost(0); doc.Add(noPayloadField); doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("multiField", English.IntToEnglish(i) + " " + English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory); searcher.SetSimilarity(similarity); }
public virtual void TestNoParents() { DirectoryReader indexReader = DirectoryReader.Open(indexDir); var taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher searcher = NewSearcher(indexReader); var sfc = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(taxoReader, Config, sfc); FacetResult result = facets.GetTopChildren(NUM_CHILDREN_CP_C, CP_C); Assert.AreEqual(allExpectedCounts[CP_C], result.Value); foreach (LabelAndValue labelValue in result.LabelValues) { Assert.AreEqual(allExpectedCounts[CP_C + "/" + labelValue.label], labelValue.value); } result = facets.GetTopChildren(NUM_CHILDREN_CP_D, CP_D); Assert.AreEqual(allExpectedCounts[CP_C], result.Value); foreach (LabelAndValue labelValue in result.LabelValues) { Assert.AreEqual(allExpectedCounts[CP_D + "/" + labelValue.label], labelValue.value); } IOUtils.Close(indexReader, taxoReader); }
public virtual void TestDemo_Renamed() { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead: //Directory directory = FSDirectory.open("/tmp/testindex"); IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000)); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true // Parse a simple query that searches for "text": QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "fieldname", analyzer); Query query = parser.Parse("text"); ScoreDoc[] hits = isearcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // Iterate through the results: for (int i = 0; i < hits.Length; i++) { Document hitDoc = isearcher.Doc(hits[i].Doc); Assert.AreEqual(hitDoc.Get("fieldname"), "This is the text to be indexed."); } isearcher.Close(); directory.Close(); }
public virtual void TestDemo_Renamed() { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead: //Directory directory = FSDirectory.open("/tmp/testindex"); IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000)); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true // Parse a simple query that searches for "text": QueryParser parser = new QueryParser("fieldname", analyzer); Query query = parser.Parse("text"); ScoreDoc[] hits = isearcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); // Iterate through the results: for (int i = 0; i < hits.Length; i++) { Document hitDoc = isearcher.Doc(hits[i].doc); Assert.AreEqual(hitDoc.Get("fieldname"), "This is the text to be indexed."); } isearcher.Close(); directory.Close(); }
public virtual void TestHeavilyNestedSpanQuery() { Spans spans; IndexSearcher searcher = GetSearcher(); SpanQuery[] clauses = new SpanQuery[3]; clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "two")); clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "three")); SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 5, true); clauses = new SpanQuery[3]; clauses[0] = spanNearQuery; clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "five")); clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "six")); SpanNearQuery spanNearQuery2 = new SpanNearQuery(clauses, 6, true); SpanQuery[] clauses2 = new SpanQuery[2]; clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "eleven")); clauses2[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "ten")); SpanNearQuery spanNearQuery3 = new SpanNearQuery(clauses2, 2, false); SpanQuery[] clauses3 = new SpanQuery[3]; clauses3[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "nine")); clauses3[1] = spanNearQuery2; clauses3[2] = spanNearQuery3; SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false); spans = nestedSpanNearQuery.GetSpans(searcher.IndexReader); Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); CheckSpans(spans, 2, new int[] { 8, 8 }); }
private void InitBlock(int[] lastDoc, Lucene.Net.Search.Query q, Lucene.Net.Search.IndexSearcher s, float maxDiff, IndexReader[] lastReader) { this.lastDoc = lastDoc; this.q = q; this.s = s; this.maxDiff = maxDiff; this.lastReader = lastReader; }
private int GetHitCount(Directory dir, Term term) { IndexSearcher searcher = new IndexSearcher(dir, true, null); int hitCount = searcher.Search(new TermQuery(term), null, 1000, null).TotalHits; searcher.Close(); return(hitCount); }
public override void SetUp() { base.SetUp(); PayloadHelper helper = new PayloadHelper(); searcher = helper.SetUp(similarity, 1000); indexReader = searcher.IndexReader; }
public List <LuceneData> MemberSearch(string searchTerm) { var searchData = new List <LuceneData>(); try { Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(_indexFileLocation); //create an analyzer to process the text Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(); //create the query parser, with the default search feild set to "content" Lucene.Net.QueryParsers.QueryParser queryParser = new Lucene.Net.QueryParsers.QueryParser("SearchContent", analyzer); //parse the query string into a Query object Lucene.Net.Search.Query query = queryParser.Parse(searchTerm); //create an index searcher that will perform the search //Lucene.Net.Index.IndexReader indexReader = Lucene.Net.Index.IndexReader.Open(dir, true); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir, true); ////build a query object //Lucene.Net.Index.Term luceneSearchTerm = new Lucene.Net.Index.Term("searchContent", searchTerm); //Lucene.Net.Search.Query query = new Lucene.Net.Search.TermQuery(luceneSearchTerm); //execute the query Lucene.Net.Search.Hits hits = searcher.Search(query); //int resultCount = hits.Length(); //if (resultCount > 1000){ // resultCount = 1000; //} //iterate over the results. for (int i = 0; i < hits.Length(); i++) { Lucene.Net.Documents.Document doc = hits.Doc(i); searchData.Add(new LuceneData { MemberID = Convert.ToInt32(doc.Get("MemberID")), FirstName = doc.Get("FirstName"), LastName = doc.Get("LastName"), CompanyName = doc.Get("CompanyName"), City = doc.Get("City"), State = doc.Get("State"), PostalCode = doc.Get("PostalCode") }); } } catch (Exception ex) { } return(searchData); }
public virtual void SetUp() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); System.Text.StringBuilder buffer = new System.Text.StringBuilder(); for (int i = 0; i < docFields.Length; i++) { Document doc = new Document(); doc.Add(Field.Text(field, docFields[i])); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory); }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < docFields.Length; i++) { Document doc = new Document(); doc.Add(new Field(FIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory); }
public override void SetUp() { base.SetUp(); directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true); Document doc = new Document(); doc.Add(new Field("partnum", "Q36", Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("description", "Illidium Space Modulator", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); writer.Close(); searcher = new IndexSearcher(directory); }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); for (int i = 0; i < docFields.Length; i++) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Field(field, docFields[i], Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory); //System.out.println("set up " + getName()); }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory, true); }
public virtual void TestIterator() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "iterator test doc 1", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("field", "iterator test doc 2", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Close(); _TestUtil.CheckIndex(directory); IndexSearcher searcher = new IndexSearcher(directory); Hits hits = searcher.Search(new TermQuery(new Term("field", "iterator"))); HitIterator iterator = (HitIterator) hits.Iterator(); Assert.AreEqual(2, iterator.Length()); Assert.IsTrue(iterator.MoveNext()); Hit hit = (Hit) iterator.Current; Assert.AreEqual("iterator test doc 1", hit.Get("field")); Assert.IsTrue(iterator.MoveNext()); hit = (Hit) iterator.Current; Assert.AreEqual("iterator test doc 2", hit.GetDocument().Get("field")); Assert.IsFalse(iterator.MoveNext()); bool caughtException = false; try { System.Object generatedAux = iterator.Current; } catch (System.ArgumentOutOfRangeException e) { Assert.IsTrue(true); caughtException = true; } Assert.IsTrue(caughtException); }
public static void Main(System.String[] args) { System.String index = @"c:\EmailTest\LuceneDB"; IndexReader reader = IndexReader.Open(FSDirectory.Open(new System.IO.FileInfo(index)), true); // only searching, so read-only=true Searcher searcher = new IndexSearcher(reader); if (Stopwatch.IsHighResolution) { System.Console.WriteLine("We have a high resolution timer with an frequency of {0} ticks/ms", Stopwatch.Frequency/1000); } searchFor(searcher, "jeske AND neotonic"); searchFor(searcher, "noticed AND problems"); searchFor(searcher, "data AND returned"); searchFor(searcher, "scott AND hassan"); searcher.Close(); reader.Close(); System.Console.WriteLine("done"); }
public static bool PreviouslyIndexed(string url) { string indexFileLocation = indexDir; Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(indexFileLocation, false); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir); Lucene.Net.Search.Hits hits = null; try { Lucene.Net.Search.Query query = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("url", url)); hits = searcher.Search(query); } catch { } finally { searcher.Close(); } return hits.Length() > 0; }
public static List<IndexedItem> SearchProjects(string s) { List<IndexedItem> retVal = new List<IndexedItem>(); string indexFileLocation = indexDir; Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(indexFileLocation, false); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir); try { Lucene.Net.Search.Query query = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("content", s)); query = query.Combine(new Lucene.Net.Search.Query[] { query, new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("url", fromUrl)) }); query = query.Combine(new Lucene.Net.Search.Query[] { query, new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("title", s)) }); //execute the query Lucene.Net.Search.Hits hits = searcher.Search(query); //iterate over the results. for (int i = 0; i < hits.Length(); i++) { Lucene.Net.Documents.Document doc = hits.Doc(i); string article = doc.Get("content"); string title = doc.Get("title"); string url = doc.Get("url"); retVal.Add(new IndexedItem { Article = article, Href = url, Title = title }); } foreach (IndexedItem ind in retVal) { Console.WriteLine(ind.Href); } retVal = retVal.Distinct().ToList(); } catch { } finally { searcher.Close(); } return retVal; }
public override Weight CreateWeight(IndexSearcher searcher) { Weight baseWeight = baseQuery.CreateWeight(searcher); object[] drillDowns = new object[drillDownQueries.Length]; for (int dim = 0; dim < drillDownQueries.Length; dim++) { Query query = drillDownQueries[dim]; Filter filter = DrillDownQuery.GetFilter(query); if (filter != null) { drillDowns[dim] = filter; } else { // TODO: would be nice if we could say "we will do no // scoring" here.... drillDowns[dim] = searcher.Rewrite(query).CreateWeight(searcher); } } return new WeightAnonymousInnerClassHelper(this, baseWeight, drillDowns); }
// Test that queries based on reverse/ordFieldScore scores correctly private void DoTestRank(System.String field, bool inOrder) { IndexSearcher s = new IndexSearcher(dir, true); ValueSource vs; if (inOrder) { vs = new OrdFieldSource(field); } else { vs = new ReverseOrdFieldSource(field); } Query q = new ValueSourceQuery(vs); Log("test: " + q); QueryUtils.Check(q, s); ScoreDoc[] h = s.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(N_DOCS, h.Length, "All docs should be matched!"); System.String prevID = inOrder?"IE":"IC"; // smaller than all ids of docs in this test ("ID0001", etc.) for (int i = 0; i < h.Length; i++) { System.String resID = s.Doc(h[i].Doc).Get(ID_FIELD); Log(i + ". score=" + h[i].Score + " - " + resID); Log(s.Explain(q, h[i].Doc)); if (inOrder) { Assert.IsTrue(String.CompareOrdinal(resID, prevID) < 0, "res id " + resID + " should be < prev res id " + prevID); } else { Assert.IsTrue(String.CompareOrdinal(resID, prevID) > 0, "res id " + resID + " should be > prev res id " + prevID); } prevID = resID; } }
public virtual void TestMmapIndex() { FSDirectory storeDirectory; storeDirectory = FSDirectory.GetDirectory(storePathname); // plan to add a set of useful stopwords, consider changing some of the // interior filters. StandardAnalyzer analyzer = new StandardAnalyzer(new System.Collections.Hashtable()); // TODO: something about lock timeouts and leftover locks. IndexWriter writer = new IndexWriter(storeDirectory, analyzer, true); IndexSearcher searcher = new IndexSearcher(storePathname); for (int dx = 0; dx < 1000; dx++) { System.String f = RandomField(); Document doc = new Document(); doc.Add(new Field("data", f, Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); } searcher.Close(); writer.Close(); RmDir(new System.IO.FileInfo(storePathname)); }
public static void Main(System.String[] a) { System.String indexName = "localhost_index"; System.String fn = "c:/Program Files/Apache Group/Apache/htdocs/manual/vhosts/index.html.en"; System.Uri url = null; for (int i = 0; i < a.Length; i++) { if (a[i].Equals("-i")) { indexName = a[++i]; } else if (a[i].Equals("-f")) { fn = a[++i]; } else if (a[i].Equals("-url")) { url = new System.Uri(a[++i]); } } System.IO.StreamWriter temp_writer; temp_writer = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding); temp_writer.AutoFlush = true; System.IO.StreamWriter o = temp_writer; IndexReader r = IndexReader.Open(indexName); o.WriteLine("Open index " + indexName + " which has " + r.NumDocs() + " docs"); MoreLikeThis mlt = new MoreLikeThis(r); o.WriteLine("Query generation parameters:"); o.WriteLine(mlt.DescribeParams()); o.WriteLine(); Query query = null; if (url != null) { o.WriteLine("Parsing URL: " + url); query = mlt.Like(url); } else if (fn != null) { o.WriteLine("Parsing file: " + fn); query = mlt.Like(new System.IO.FileInfo(fn)); } o.WriteLine("q: " + query); o.WriteLine(); IndexSearcher searcher = new IndexSearcher(indexName); Hits hits = searcher.Search(query); int len = hits.Length(); o.WriteLine("found: " + len + " documents matching"); o.WriteLine(); for (int i = 0; i < System.Math.Min(25, len); i++) { Document d = hits.Doc(i); System.String summary = d.Get("summary"); o.WriteLine("score : " + hits.Score(i)); o.WriteLine("url : " + d.Get("url")); o.WriteLine("\ttitle : " + d.Get("title")); if (summary != null) o.WriteLine("\tsummary: " + d.Get("summary")); o.WriteLine(); } }
public virtual void TestMultiSearcher() { //setup index 1 RAMDirectory ramDir1 = new RAMDirectory(); IndexWriter writer1 = new IndexWriter(ramDir1, new StandardAnalyzer(), true); Document d = new Document(); Field f = new Field(FIELD_NAME, "multiOne", Field.Store.YES, Field.Index.TOKENIZED); d.Add(f); writer1.AddDocument(d); writer1.Optimize(); writer1.Close(); IndexReader reader1 = IndexReader.Open(ramDir1); //setup index 2 RAMDirectory ramDir2 = new RAMDirectory(); IndexWriter writer2 = new IndexWriter(ramDir2, new StandardAnalyzer(), true); d = new Document(); f = new Field(FIELD_NAME, "multiTwo", Field.Store.YES, Field.Index.TOKENIZED); d.Add(f); writer2.AddDocument(d); writer2.Optimize(); writer2.Close(); IndexReader reader2 = IndexReader.Open(ramDir2); IndexSearcher[] searchers = new IndexSearcher[2]; searchers[0] = new IndexSearcher(ramDir1); searchers[1] = new IndexSearcher(ramDir2); MultiSearcher multiSearcher = new MultiSearcher(searchers); QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); parser.SetMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); query = parser.Parse("multi*"); System.Console.Out.WriteLine("Searching for: " + query.ToString(FIELD_NAME)); //at this point the multisearcher calls combine(query[]) hits = multiSearcher.Search(query); //query = QueryParser.parse("multi*", FIELD_NAME, new StandardAnalyzer()); Query[] expandedQueries = new Query[2]; expandedQueries[0] = query.Rewrite(reader1); expandedQueries[1] = query.Rewrite(reader2); query = query.Combine(expandedQueries); //create an instance of the highlighter with the tags used to surround highlighted text Highlighter highlighter = new Highlighter(this, new QueryScorer(query)); for (int i = 0; i < hits.Length(); i++) { System.String text = hits.Doc(i).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text)); System.String highlightedText = highlighter.GetBestFragment(tokenStream, text); System.Console.Out.WriteLine(highlightedText); } Assert.IsTrue(numHighlights == 2, "Failed to find correct number of highlights " + numHighlights + " found"); }
// Test that FieldScoreQuery returns docs with expected score. private void DoTestExactScore(System.String field, FieldScoreQuery.Type tp) { IndexSearcher s = new IndexSearcher(dir, true); Query q = new FieldScoreQuery(field, tp); TopDocs td = s.Search(q, null, 1000); Assert.AreEqual(N_DOCS, td.TotalHits, "All docs should be matched!"); ScoreDoc[] sd = td.ScoreDocs; for (int i = 0; i < sd.Length; i++) { float score = sd[i].Score; Log(s.Explain(q, sd[i].Doc)); System.String id = s.IndexReader.Document(sd[i].Doc).Get(ID_FIELD); float expectedScore = ExpectedFieldScore(id); // "ID7" --> 7.0 Assert.AreEqual(expectedScore, score, TEST_SCORE_TOLERANCE_DELTA, "score of " + id + " shuould be " + expectedScore + " != " + score); } }
// Test that FieldScoreQuery returns docs in expected order. private void DoTestRank(System.String field, FieldScoreQuery.Type tp) { IndexSearcher s = new IndexSearcher(dir, true); Query q = new FieldScoreQuery(field, tp); Log("test: " + q); QueryUtils.Check(q, s); ScoreDoc[] h = s.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(N_DOCS, h.Length, "All docs should be matched!"); System.String prevID = "ID" + (N_DOCS + 1); // greater than all ids of docs in this test for (int i = 0; i < h.Length; i++) { System.String resID = s.Doc(h[i].Doc).Get(ID_FIELD); Log(i + ". score=" + h[i].Score + " - " + resID); Log(s.Explain(q, h[i].Doc)); Assert.IsTrue(String.CompareOrdinal(resID, prevID) < 0, "res id " + resID + " should be < prev res id " + prevID); prevID = resID; } }
// Test that values loaded for FieldScoreQuery are cached properly and consumes the proper RAM resources. private void DoTestCaching(System.String field, FieldScoreQuery.Type tp) { // prepare expected array types for comparison System.Collections.Hashtable expectedArrayTypes = new System.Collections.Hashtable(); expectedArrayTypes[FieldScoreQuery.Type.BYTE] = new sbyte[0]; expectedArrayTypes[FieldScoreQuery.Type.SHORT] = new short[0]; expectedArrayTypes[FieldScoreQuery.Type.INT] = new int[0]; expectedArrayTypes[FieldScoreQuery.Type.FLOAT] = new float[0]; IndexSearcher s = new IndexSearcher(dir, true); System.Object[] innerArray = new Object[s.IndexReader.GetSequentialSubReaders().Length]; bool warned = false; // print warning once. for (int i = 0; i < 10; i++) { FieldScoreQuery q = new FieldScoreQuery(field, tp); ScoreDoc[] h = s.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(N_DOCS, h.Length, "All docs should be matched!"); IndexReader[] readers = s.IndexReader.GetSequentialSubReaders(); for (int j = 0; j < readers.Length; j++) { IndexReader reader = readers[j]; try { if (i == 0) { innerArray[j] = q.valSrc.GetValues(reader).InnerArray; Log(i + ". compare: " + innerArray[j].GetType() + " to " + expectedArrayTypes[tp].GetType()); Assert.AreEqual(innerArray[j].GetType(), expectedArrayTypes[tp].GetType(), "field values should be cached in the correct array type!"); } else { Log(i + ". compare: " + innerArray[j] + " to " + q.valSrc.GetValues(reader).InnerArray); Assert.AreSame(innerArray[j], q.valSrc.GetValues(reader).InnerArray, "field values should be cached and reused!"); } } catch (System.NotSupportedException) { if (!warned) { System.Console.Error.WriteLine("WARNING: " + TestName() + " cannot fully test values of " + q); warned = true; } } } } // verify new values are reloaded (not reused) for a new reader s = new IndexSearcher(dir, true); FieldScoreQuery q2 = new FieldScoreQuery(field, tp); ScoreDoc[] h2 = s.Search(q2, null, 1000).ScoreDocs; Assert.AreEqual(N_DOCS, h2.Length, "All docs should be matched!"); IndexReader[] readers2 = s.IndexReader.GetSequentialSubReaders(); for (int j = 0; j < readers2.Length; j++) { IndexReader reader = readers2[j]; try { Log("compare: " + innerArray + " to " + q2.valSrc.GetValues(reader).InnerArray); Assert.AreNotSame(innerArray, q2.valSrc.GetValues(reader).InnerArray, "cached field values should not be reused if reader as changed!"); } catch (System.NotSupportedException) { if (!warned) { System.Console.Error.WriteLine("WARNING: " + TestName() + " cannot fully test values of " + q2); warned = true; } } } }
public virtual void TestSetBufferSize() { System.IO.FileInfo indexDir = new System.IO.FileInfo(System.IO.Path.Combine(SupportClass.AppSettings.Get("tempDir", ""), "testSetBufferSize")); MockFSDirectory dir = new MockFSDirectory(indexDir); try { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetUseCompoundFile(false); for (int i = 0; i < 37; i++) { Document doc = new Document(); doc.Add(new Field("content", "aaa bbb ccc ddd" + i, Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("id", "" + i, Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); } writer.Close(); dir.allIndexInputs.Clear(); IndexReader reader = IndexReader.Open(dir); Term aaa = new Term("content", "aaa"); Term bbb = new Term("content", "bbb"); Term ccc = new Term("content", "ccc"); Assert.AreEqual(reader.DocFreq(ccc), 37); reader.DeleteDocument(0); Assert.AreEqual(reader.DocFreq(aaa), 37); dir.TweakBufferSizes(); reader.DeleteDocument(4); Assert.AreEqual(reader.DocFreq(bbb), 37); dir.TweakBufferSizes(); IndexSearcher searcher = new IndexSearcher(reader); Hits hits = searcher.Search(new TermQuery(bbb)); dir.TweakBufferSizes(); Assert.AreEqual(35, hits.Length()); dir.TweakBufferSizes(); hits = searcher.Search(new TermQuery(new Term("id", "33"))); dir.TweakBufferSizes(); Assert.AreEqual(1, hits.Length()); hits = searcher.Search(new TermQuery(aaa)); dir.TweakBufferSizes(); Assert.AreEqual(35, hits.Length()); searcher.Close(); reader.Close(); } finally { _TestUtil.RmDir(indexDir); } }
// Test that queries based on reverse/ordFieldScore returns docs with expected score. private void DoTestExactScore(System.String field, bool inOrder) { IndexSearcher s = new IndexSearcher(dir, true); ValueSource vs; if (inOrder) { vs = new OrdFieldSource(field); } else { vs = new ReverseOrdFieldSource(field); } Query q = new ValueSourceQuery(vs); TopDocs td = s.Search(q, null, 1000); Assert.AreEqual(N_DOCS, td.TotalHits, "All docs should be matched!"); ScoreDoc[] sd = td.ScoreDocs; for (int i = 0; i < sd.Length; i++) { float score = sd[i].Score; System.String id = s.IndexReader.Document(sd[i].Doc).Get(ID_FIELD); Log("-------- " + i + ". Explain doc " + id); Log(s.Explain(q, sd[i].Doc)); float expectedScore = N_DOCS - i; Assert.AreEqual(expectedScore, score, TEST_SCORE_TOLERANCE_DELTA, "score of result " + i + " shuould be " + expectedScore + " != " + score); System.String expectedId = inOrder?Id2String(N_DOCS - i):Id2String(i + 1); // reverse ==> smaller values first Assert.IsTrue(expectedId.Equals(id), "id of result " + i + " shuould be " + expectedId + " != " + score); } }
public static void Main(System.String[] args) { try { Searcher searcher = new IndexSearcher(@"index"); Analyzer analyzer = new StandardAnalyzer(); System.IO.StreamReader in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).CurrentEncoding); while (true) { System.Console.Out.Write("Query: "); System.String line = in_Renamed.ReadLine(); if (line.Length == - 1) break; Query query = QueryParser.Parse(line, "contents", analyzer); System.Console.Out.WriteLine("Searching for: " + query.ToString("contents")); Hits hits = searcher.Search(query); System.Console.Out.WriteLine(hits.Length() + " total matching documents"); int HITS_PER_PAGE = 10; for (int start = 0; start < hits.Length(); start += HITS_PER_PAGE) { int end = System.Math.Min(hits.Length(), start + HITS_PER_PAGE); for (int i = start; i < end; i++) { Document doc = hits.Doc(i); System.String path = doc.Get("path"); if (path != null) { System.Console.Out.WriteLine(i + ". " + path); } else { System.String url = doc.Get("url"); if (url != null) { System.Console.Out.WriteLine(i + ". " + url); System.Console.Out.WriteLine(" - " + doc.Get("title")); } else { System.Console.Out.WriteLine(i + ". " + "No path nor URL for this document"); } } } if (hits.Length() > end) { System.Console.Out.Write("more (y/n) ? "); line = in_Renamed.ReadLine(); if (line.Length == 0 || line[0] == 'n') break; } } } searcher.Close(); } catch (System.Exception e) { System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetSimilarity(similarity); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); Field noPayloadField = new Field(PayloadHelper.NO_PAYLOAD_FIELD, English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED); //noPayloadField.setBoost(0); doc.Add(noPayloadField); doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("multiField", English.IntToEnglish(i) + " " + English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory); searcher.SetSimilarity(similarity); }
private void Search() { try { SearchProgressBar.Maximum = 11; ProgressLabel.Text = "Progress: Initialize Search ..."; Searcher searcher = new IndexSearcher(@"Canon\index"); Analyzer analyzer = new StandardAnalyzer(); ArrayList resultList = new ArrayList(); System.IO.StreamReader in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).CurrentEncoding); String line = QueryInputBox.Text; if (line.Length == - 1) return; ProgressLabel.Text = "Progress: Parsing Query ..."; Query query = QueryParser.Parse(line, "contents", analyzer); //int[] ix = qtm.GetTermFrequencies(); Hits hits = searcher.Search(query); SearchProgressBar.Increment(1); ProgressLabel.Text = "Progress: Searched. Analyzing results ..."; //QueryHighlightExtractor highlighter = new QueryHighlightExtractor(query, new WhitespaceAnalyzer(), "<B>", "</B>"); Highlighter highlighter = new Highlighter(new QueryScorer(query)); highlighter.SetTextFragmenter(new SimpleFragmenter(80)); int maxNumFragmentsRequired = 1; //int HITS_PER_PAGE = 10; for (int i = 0; i < 10; i++) { SearchProgressBar.Increment(1); ProgressLabel.Text = "Progress: Analyzing hit " + (i+1).ToString(); // get the document from index Document doc = hits.Doc(i); //SegmentReader ir = new SegmentReader(); //Lucene.Net.Index.TermFreqVector tfv = //tfv.GetTermFrequencies string score = hits.Score(i).ToString(); //Box += "Hit no. " + i + " scored: " + score + " occ: " + /*highlighter.tokenFrequency */ " best fragment: \n"; ResultSet a = new ResultSet(); a.BookName = doc.Get("path").Replace(@"c:\cscd\temp\",""); a.Score = hits.Score(i); a.numberOfHits = hits.Length(); // get the document filename // we can't get the text from the index //because we didn't store it there //so get it from archive string path = doc.Get("path"); string name = GetInternalName(path); PaliReaderUtils.AalekhDecoder.UnzipFromZipLibrary(name); path = System.IO.Directory.GetCurrentDirectory() + @"\Work\" + name + ".htm"; string plainText = ""; //load text from zip archive temporarily using (StreamReader sr = new StreamReader(path, System.Text.Encoding.Default)) { plainText = parseHtml(sr.ReadToEnd()); } //-------------------------------Highlighter Code 1.4 TokenStream tokenStream = analyzer.TokenStream(new StringReader(plainText)); a.textFragment = highlighter.GetBestFragments(tokenStream, plainText, maxNumFragmentsRequired, "..."); if(File.Exists(path)) File.Delete(path); //------------------------------- resultList.Add(a); } SearchProgressBar.Value = 0; searcher.Close(); ssr = new ShowSearchResults(/*Box*/resultList); //this.Hide(); ssr.OpenBookEvent += new ShowSearchResults.OpenBook(this.TriggerOpenBook); ssr.Closing += new System.ComponentModel.CancelEventHandler(this.Closing_ResultWindow); this.Hide(); ssr.ShowDialog(); } catch (System.Exception e) { MessageBox.Show(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
// Test that values loaded for FieldScoreQuery are cached properly and consumes the proper RAM resources. private void DoTestCaching(System.String field, bool inOrder) { IndexSearcher s = new IndexSearcher(dir, true); System.Object innerArray = null; bool warned = false; // print warning once for (int i = 0; i < 10; i++) { ValueSource vs; if (inOrder) { vs = new OrdFieldSource(field); } else { vs = new ReverseOrdFieldSource(field); } ValueSourceQuery q = new ValueSourceQuery(vs); ScoreDoc[] h = s.Search(q, null, 1000).ScoreDocs; try { Assert.AreEqual(N_DOCS, h.Length, "All docs should be matched!"); IndexReader[] readers = s.IndexReader.GetSequentialSubReaders(); for (int j = 0; j < readers.Length; j++) { IndexReader reader = readers[j]; if (i == 0) { innerArray = q.valSrc.GetValues(reader).InnerArray; } else { Log(i + ". compare: " + innerArray + " to " + q.valSrc.GetValues(reader).InnerArray); Assert.AreSame(innerArray, q.valSrc.GetValues(reader).InnerArray, "field values should be cached and reused!"); } } } catch (System.NotSupportedException) { if (!warned) { System.Console.Error.WriteLine("WARNING: " + TestName() + " cannot fully test values of " + q); warned = true; } } } ValueSource vs2; ValueSourceQuery q2; ScoreDoc[] h2; // verify that different values are loaded for a different field System.String field2 = INT_FIELD; Assert.IsFalse(field.Equals(field2)); // otherwise this test is meaningless. if (inOrder) { vs2 = new OrdFieldSource(field2); } else { vs2 = new ReverseOrdFieldSource(field2); } q2 = new ValueSourceQuery(vs2); h2 = s.Search(q2, null, 1000).ScoreDocs; Assert.AreEqual(N_DOCS, h2.Length, "All docs should be matched!"); IndexReader[] readers2 = s.IndexReader.GetSequentialSubReaders(); for (int j = 0; j < readers2.Length; j++) { IndexReader reader = readers2[j]; try { Log("compare (should differ): " + innerArray + " to " + q2.valSrc.GetValues(reader).InnerArray); Assert.AreNotSame(innerArray, q2.valSrc.GetValues(reader).InnerArray, "different values shuold be loaded for a different field!"); } catch (System.NotSupportedException) { if (!warned) { System.Console.Error.WriteLine("WARNING: " + TestName() + " cannot fully test values of " + q2); warned = true; } } } // verify new values are reloaded (not reused) for a new reader s = new IndexSearcher(dir, true); if (inOrder) { vs2 = new OrdFieldSource(field); } else { vs2 = new ReverseOrdFieldSource(field); } q2 = new ValueSourceQuery(vs2); h2 = s.Search(q2, null, 1000).ScoreDocs; Assert.AreEqual(N_DOCS, h2.Length, "All docs should be matched!"); readers2 = s.IndexReader.GetSequentialSubReaders(); for (int j = 0; j < readers2.Length; j++) { IndexReader reader = readers2[j]; try { Log("compare (should differ): " + innerArray + " to " + q2.valSrc.GetValues(reader).InnerArray); Assert.AreNotSame(innerArray, q2.valSrc.GetValues(reader).InnerArray, "cached field values should not be reused if reader as changed!"); } catch (System.NotSupportedException) { if (!warned) { System.Console.Error.WriteLine("WARNING: " + TestName() + " cannot fully test values of " + q2); warned = true; } } } }
public virtual void TestNPESpanQuery() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(new System.Collections.Hashtable(0)), IndexWriter.MaxFieldLength.LIMITED); // Add documents AddDoc(writer, "1", "the big dogs went running to the market"); AddDoc(writer, "2", "the cat chased the mouse, then the cat ate the mouse quickly"); // Commit writer.Close(); // Get searcher IndexReader reader = IndexReader.Open(dir); IndexSearcher searcher = new IndexSearcher(reader); // Control (make sure docs indexed) Assert.AreEqual(2, HitCount(searcher, "the")); Assert.AreEqual(1, HitCount(searcher, "cat")); Assert.AreEqual(1, HitCount(searcher, "dogs")); Assert.AreEqual(0, HitCount(searcher, "rabbit")); // This throws exception (it shouldn't) Assert.AreEqual(1, searcher.Search(CreateSpan(0, true, new SpanQuery[]{CreateSpan(4, false, "chased", "cat"), CreateSpan("ate")}), 10).totalHits); reader.Close(); dir.Close(); }
public virtual void TestFieldSetValue() { Field field = new Field("id", "id1", Field.Store.YES, Field.Index.NOT_ANALYZED); Document doc = new Document(); doc.Add(field); doc.Add(new Field("keyword", "test", Field.Store.YES, Field.Index.NOT_ANALYZED)); RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(doc); field.SetValue("id2"); writer.AddDocument(doc); field.SetValue("id3"); writer.AddDocument(doc); writer.Close(); Searcher searcher = new IndexSearcher(dir); Query query = new TermQuery(new Term("keyword", "test")); // ensure that queries return expected results without DateFilter first ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); int result = 0; for (int i = 0; i < 3; i++) { Document doc2 = searcher.Doc(hits[i].doc); Field f = doc2.GetField("id"); if (f.StringValue().Equals("id1")) result |= 1; else if (f.StringValue().Equals("id2")) result |= 2; else if (f.StringValue().Equals("id3")) result |= 4; else Assert.Fail("unexpected id field"); } searcher.Close(); dir.Close(); Assert.AreEqual(7, result, "did not see all IDs"); }
public virtual void TestGetValuesForIndexedDocument() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(MakeDocumentWithFields()); writer.Close(); Searcher searcher = new IndexSearcher(dir); // search for something that does exists Query query = new TermQuery(new Term("keyword", "test1")); // ensure that queries return expected results without DateFilter first ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); DoAssert(searcher.Doc(hits[0].doc), true); searcher.Close(); }