private Analyzer GetAnalyer(int type) { Analyzer analyzer; string path =System.Configuration.ConfigurationSettings.AppSettings["Data"].ToString(); switch (type) { case 0: analyzer=new StockFooAnalyzer(path); break; case 1: analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT); break; case 2: analyzer = new SimpleAnalyzer(); break; case 3: analyzer = new StopAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT); break; case 4: analyzer = new KeywordAnalyzer(); break; case 5: analyzer = new WhitespaceAnalyzer(); break; default: analyzer = new StockFooAnalyzer(path); break; } return analyzer; }
public void SpecifyAnalyzer() { var analyzer = new SimpleAnalyzer(); map.Property(x => x.Date).AnalyzeWith(analyzer); var mapper = GetMappingInfo("Date"); Assert.That(mapper.Analyzer, Is.SameAs(analyzer)); }
public virtual void TestSimple() { Analyzer a = new SimpleAnalyzer(); AssertAnalyzesTo(a, "foo bar FOO BAR", new System.String[]{"foo", "bar", "foo", "bar"}); AssertAnalyzesTo(a, "foo bar . FOO <> BAR", new System.String[]{"foo", "bar", "foo", "bar"}); AssertAnalyzesTo(a, "foo.bar.FOO.BAR", new System.String[]{"foo", "bar", "foo", "bar"}); AssertAnalyzesTo(a, "U.S.A.", new System.String[]{"u", "s", "a"}); AssertAnalyzesTo(a, "C++", new System.String[]{"c"}); AssertAnalyzesTo(a, "B2B", new System.String[]{"b", "b"}); AssertAnalyzesTo(a, "2B", new System.String[]{"b"}); AssertAnalyzesTo(a, "\"QUOTED\" word", new System.String[]{"quoted", "word"}); }
public void ReusabeleTokenStreamTest() { string testwords = "我是中国人,I can speak chinese!"; SimpleAnalyzer sanalyzer = new SimpleAnalyzer(); TokenStream ts = sanalyzer.ReusableTokenStream("", new StringReader(testwords)); Token token; while ((token = ts.Next()) != null) { Console.WriteLine(token.TermText()); } ts.Close(); }
public static void Main(System.String[] args) { try { Directory directory = new RAMDirectory(); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer, true); int MAX_DOCS = 225; for (int j = 0; j < MAX_DOCS; j++) { Lucene.Net.Documents.Document d = new Lucene.Net.Documents.Document(); d.Add(new Field(PRIORITY_FIELD, HIGH_PRIORITY, Field.Store.YES, Field.Index.TOKENIZED)); d.Add(new Field(ID_FIELD, System.Convert.ToString(j), Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(d); } writer.Close(); // try a search without OR Searcher searcher = new IndexSearcher(directory); Hits hits = null; Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(PRIORITY_FIELD, analyzer); Query query = parser.Parse(HIGH_PRIORITY); System.Console.Out.WriteLine("Query: " + query.ToString(PRIORITY_FIELD)); hits = searcher.Search(query); PrintHits(hits); searcher.Close(); // try a new search with OR searcher = new IndexSearcher(directory); hits = null; parser = new Lucene.Net.QueryParsers.QueryParser(PRIORITY_FIELD, analyzer); query = parser.Parse(HIGH_PRIORITY + " OR " + MED_PRIORITY); System.Console.Out.WriteLine("Query: " + query.ToString(PRIORITY_FIELD)); hits = searcher.Search(query); PrintHits(hits); searcher.Close(); } catch (System.Exception e) { System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
public virtual void TestSimple() { Analyzer a = new SimpleAnalyzer(); AssertAnalyzesTo(a, "foo bar FOO BAR", new System.String[] { "foo", "bar", "foo", "bar" }); AssertAnalyzesTo(a, "foo bar . FOO <> BAR", new System.String[] { "foo", "bar", "foo", "bar" }); AssertAnalyzesTo(a, "foo.bar.FOO.BAR", new System.String[] { "foo", "bar", "foo", "bar" }); AssertAnalyzesTo(a, "U.S.A.", new System.String[] { "u", "s", "a" }); AssertAnalyzesTo(a, "C++", new System.String[] { "c" }); AssertAnalyzesTo(a, "B2B", new System.String[] { "b", "b" }); AssertAnalyzesTo(a, "2B", new System.String[] { "b" }); AssertAnalyzesTo(a, "\"QUOTED\" word", new System.String[] { "quoted", "word" }); }
// This method takes a search term and a text as a parameter, and displays the text // with the search term in bold. public static void RealHighlighter(string searchTerm, string text) { TermQuery query = new TermQuery(new Term("mainText", searchTerm)); Lucene.Net.Search.Highlight.IScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(scorer); System.IO.StringReader reader = new System.IO.StringReader(text); TokenStream tokenStream = new SimpleAnalyzer().TokenStream("mainText", reader); String[] toBePrinted = highlighter.GetBestFragments(tokenStream, text, 5); // 5 is the maximum number of fragments that gets tested foreach (var word in toBePrinted) { Console.Write(word); } }
public static void Main(System.String[] args) { try { Directory directory = new RAMDirectory(); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer, true); System.String[] docs = new System.String[]{"a b c d e", "a b c d e a b c d e", "a b c d e f g h i j", "a c e", "e c a", "a c e a c e", "a c e a b c"}; for (int j = 0; j < docs.Length; j++) { Document d = new Document(); d.Add(Field.Text("contents", docs[j])); writer.AddDocument(d); } writer.Close(); Searcher searcher = new IndexSearcher(directory); System.String[] queries = new System.String[]{"\"a c e\""}; Hits hits = null; QueryParsers.QueryParser parser = new QueryParsers.QueryParser("contents", analyzer); parser.SetPhraseSlop(4); for (int j = 0; j < queries.Length; j++) { Query query = parser.Parse(queries[j]); System.Console.Out.WriteLine("Query: " + query.ToString("contents")); //DateFilter filter = // new DateFilter("modified", Time(1997,0,1), Time(1998,0,1)); //DateFilter filter = DateFilter.Before("modified", Time(1997,00,01)); //System.out.println(filter); hits = searcher.Search(query); System.Console.Out.WriteLine(hits.Length() + " total results"); for (int i = 0; i < hits.Length() && i < 10; i++) { Document d = hits.Doc(i); System.Console.Out.WriteLine(i + " " + hits.Score(i) + " " + d.Get("contents")); } } searcher.Close(); } catch (System.Exception e) { System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
// TEST METHOD FOR HIGHLIGHTING. public static void Highlighter() { string textTest = "I am a man that follows hell."; TermQuery queryTest = new TermQuery(new Term("", "hell")); Lucene.Net.Search.Highlight.IScorer scorer = new QueryScorer(queryTest); Highlighter highlighter = new Highlighter(scorer); System.IO.StringReader reader = new System.IO.StringReader(textTest); TokenStream tokenStream = new SimpleAnalyzer().TokenStream("field", reader); String[] toBePrinted = highlighter.GetBestFragments(tokenStream, textTest, 1); // 1 is the maximum number of fragments that gets tested foreach (var word in toBePrinted) { Console.WriteLine(word); } }
public void Code() { Analyzer _keywordanalyzer = new KeywordAnalyzer(); Analyzer _simpleanalyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); Analyzer _stopanalyzer = new Lucene.Net.Analysis.StopAnalyzer(Lucene.Net.Util.Version.LUCENE_30); Analyzer _whitespaceanalyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer(); Analyzer _standardanalyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); var _perfieldanalyzer = new Lucene.Net.Analysis.PerFieldAnalyzerWrapper(_standardanalyzer); _perfieldanalyzer.AddAnalyzer("firstname", _keywordanalyzer); _perfieldanalyzer.AddAnalyzer("lastname", _keywordanalyzer); IndexWriter _writer = new IndexWriter(_directory, _perfieldanalyzer, IndexWriter.MaxFieldLength.UNLIMITED); IndexReader _reader = _writer.GetReader(); IndexSearcher _searcher = new IndexSearcher(_reader); //QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", _standardanalyzer); string[] fields = new[] { "text", "title", "author" }; var boosts = new Dictionary <string, float>(); boosts.Add("text", 2.0f); boosts.Add("title", 1.5f); QueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, fields, _standardanalyzer, boosts); Query query = parser.Parse("lucene is great"); TopDocs hits = _searcher.Search(query, 1000); IEnumerable <Document> docs = hits.ScoreDocs.Select(hit => _searcher.Doc(hit.Doc)); var books = docs.Select(doc => new Book() { Text = doc.Get("text"), Title = doc.Get("title"), Author = doc.Get("author"), Length = Int32.Parse(doc.Get("length")) }); _writer.Optimize(); _writer.Commit(); _writer.DeleteAll(); }
private void DoTest(System.IO.StringWriter out_Renamed, bool useCompoundFiles) { Directory directory = new RAMDirectory(); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer, true); writer.SetUseCompoundFile(useCompoundFiles); int MAX_DOCS = 225; for (int j = 0; j < MAX_DOCS; j++) { Document d = new Document(); d.Add(Field.Text(PRIORITY_FIELD, HIGH_PRIORITY)); d.Add(Field.Text(ID_FIELD, System.Convert.ToString(j))); writer.AddDocument(d); } writer.Close(); // try a search without OR Searcher searcher = new IndexSearcher(directory); Hits hits = null; QueryParsers.QueryParser parser = new QueryParsers.QueryParser(PRIORITY_FIELD, analyzer); Query query = parser.Parse(HIGH_PRIORITY); out_Renamed.WriteLine("Query: " + query.ToString(PRIORITY_FIELD)); hits = searcher.Search(query); PrintHits(out_Renamed, hits); CheckHits(hits, MAX_DOCS); searcher.Close(); // try a new search with OR searcher = new IndexSearcher(directory); hits = null; parser = new QueryParsers.QueryParser(PRIORITY_FIELD, analyzer); query = parser.Parse(HIGH_PRIORITY + " OR " + MED_PRIORITY); out_Renamed.WriteLine("Query: " + query.ToString(PRIORITY_FIELD)); hits = searcher.Search(query); PrintHits(out_Renamed, hits); CheckHits(hits, MAX_DOCS); searcher.Close(); }
private void DoTestSearch(System.IO.StreamWriter out_Renamed, bool useCompoundFile) { Directory directory = new RAMDirectory(); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(useCompoundFile); System.String[] docs = new System.String[]{"a b c d e", "a b c d e a b c d e", "a b c d e f g h i j", "a c e", "e c a", "a c e a c e", "a c e a b c"}; for (int j = 0; j < docs.Length; j++) { Document d = new Document(); d.Add(new Field("contents", docs[j], Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d); } writer.Close(); Searcher searcher = new IndexSearcher(directory); System.String[] queries = new System.String[]{"a b", "\"a b\"", "\"a b c\"", "a c", "\"a c\"", "\"a c e\""}; ScoreDoc[] hits = null; QueryParser parser = new QueryParser("contents", analyzer); parser.SetPhraseSlop(4); for (int j = 0; j < queries.Length; j++) { Query query = parser.Parse(queries[j]); out_Renamed.WriteLine("Query: " + query.ToString("contents")); //DateFilter filter = // new DateFilter("modified", Time(1997,0,1), Time(1998,0,1)); //DateFilter filter = DateFilter.Before("modified", Time(1997,00,01)); //System.out.println(filter); hits = searcher.Search(query, null, 1000).scoreDocs; out_Renamed.WriteLine(hits.Length + " total results"); for (int i = 0; i < hits.Length && i < 10; i++) { Document d = searcher.Doc(hits[i].doc); out_Renamed.WriteLine(i + " " + hits[i].score + " " + d.Get("contents")); } } searcher.Close(); }
public void WithoutGettingErrors() { using(var luceneCodecDirectory = new LuceneCodecDirectory(Path, Enumerable.Empty<AbstractIndexCodec>())) using(var simpleAnalyzer = new SimpleAnalyzer()) { using (var w = new IndexWriter(luceneCodecDirectory, simpleAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED)) { var doc = new Lucene.Net.Documents.Document(); doc.Add(new Field("test", "value", Field.Store.YES, Field.Index.ANALYZED)); w.AddDocument(doc); } using(var s = new IndexSearcher(luceneCodecDirectory)) { var termQuery = new TermQuery(new Term("test", "value")); var topDocs = s.Search(termQuery, 10); Assert.Equal(1, topDocs.TotalHits); } } }
// This method is printing out the message details given the index document. // NOTE: The field "mainText" must be stored in indexing level. Same goes for any // other field you want to search. private static void DisplayMessage(Document d, string searchTerm) { // THIS IS USED IN THE DATABASE INDEXic //Console.WriteLine("id: " + d.Get("id") + "\n" + "messageBox: " + d.Get("messageBox") + "\n" + "incoming: " + d.Get("incoming") + "\n" + "date: " + d.Get("date") + "\n" + "mainText: " + d.Get("mainText")); // THIS IS USED IN MY TEST FILES //Console.WriteLine("id: " + d.Get("id") + "\n" + "mainText: " + d.Get("mainText")); string text = d.Get("mainText"); TermQuery query = new TermQuery(new Term("mainText", searchTerm)); Lucene.Net.Search.Highlight.IScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(scorer); System.IO.StringReader reader = new System.IO.StringReader(text); TokenStream tokenStream = new SimpleAnalyzer().TokenStream("mainText", reader); String[] toBePrinted = highlighter.GetBestFragments(tokenStream, text, 5); // 5 is the maximum number of fragments that gets tested foreach (var word in toBePrinted) { Console.Write(word); } Console.WriteLine("====================="); Console.ReadKey(); }
private void InitLucene() { analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer); }
/// <summary> /// Returns an Analyzer for the given AnalyzerType /// </summary> /// <param name="oAnalyzerType">Enumeration value</param> /// <returns>Analyzer</returns> public static Analyzer GetAnalyzer(AnalyzerType oAnalyzerType) { Analyzer oAnalyzer = null; switch (oAnalyzerType) { case AnalyzerType.SimpleAnalyzer: oAnalyzer = new SimpleAnalyzer(); break; case AnalyzerType.StopAnalyzer: oAnalyzer = new StopAnalyzer(); break; case AnalyzerType.WhitespaceAnalyzer: oAnalyzer = new WhitespaceAnalyzer(); break; default: case AnalyzerType.StandardAnalyzer: oAnalyzer = new StandardAnalyzer(); break; } return oAnalyzer; }
private void DoTest(System.IO.StreamWriter out_Renamed, bool useCompoundFiles) { Directory directory = new RAMDirectory(); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.UseCompoundFile = useCompoundFiles; int MAX_DOCS = 225; for (int j = 0; j < MAX_DOCS; j++) { Document d = new Document(); d.Add(new Field(PRIORITY_FIELD, HIGH_PRIORITY, Field.Store.YES, Field.Index.ANALYZED)); d.Add(new Field(ID_FIELD, System.Convert.ToString(j), Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d); } writer.Close(); // try a search without OR Searcher searcher = new IndexSearcher(directory, true); QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, PRIORITY_FIELD, analyzer); Query query = parser.Parse(HIGH_PRIORITY); out_Renamed.WriteLine("Query: " + query.ToString(PRIORITY_FIELD)); ScoreDoc[] hits = searcher.Search(query, null, MAX_DOCS).ScoreDocs; PrintHits(out_Renamed, hits, searcher); CheckHits(hits, MAX_DOCS, searcher); searcher.Close(); // try a new search with OR searcher = new IndexSearcher(directory, true); hits = null; parser = new QueryParser(Util.Version.LUCENE_CURRENT, PRIORITY_FIELD, analyzer); query = parser.Parse(HIGH_PRIORITY + " OR " + MED_PRIORITY); out_Renamed.WriteLine("Query: " + query.ToString(PRIORITY_FIELD)); hits = searcher.Search(query, null, MAX_DOCS).ScoreDocs; PrintHits(out_Renamed, hits, searcher); CheckHits(hits, MAX_DOCS, searcher); searcher.Close(); }
public static string[] SplitWords2(string content) { List<string> strList = new List<string>(); Analyzer analyzer = new Lucene.Net.Analysis.SimpleAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法 TokenStream tokenStream = analyzer.TokenStream("", new StringReader(content)); Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita; bool hasNext = tokenStream.IncrementToken(); while (hasNext) { ita = tokenStream.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>(); strList.Add(ita.Term); hasNext = tokenStream.IncrementToken(); } return strList.ToArray(); }
private void queryRhino_Lucene() { try { String indexRhinoFile = System.IO.Directory.GetCurrentDirectory() + @"\rhino-documents.json"; String queryRhinoFile = System.IO.Directory.GetCurrentDirectory() + @"\rhino-queries.json"; String indexLuceneFile = System.IO.Directory.GetCurrentDirectory() + @"\lucene-documents.json"; String queryLuceneFile = System.IO.Directory.GetCurrentDirectory() + @"\lucene-queries.json"; List<indexDocument> results = new List<indexDocument>(); List<indexQuery> queries = new List<indexQuery>(); String line; System.IO.StreamReader indexFile = new System.IO.StreamReader(indexLuceneFile); //System.IO.StreamReader indexFile = new System.IO.StreamReader(indexRhinoFile); while ((line = indexFile.ReadLine()) != null) { indexDocument result = JsonConvert.DeserializeObject<indexDocument>(line); results.Add(result); } indexFile.Close(); line = ""; System.IO.StreamReader queryFile = new System.IO.StreamReader(queryLuceneFile); //System.IO.StreamReader queryFile = new System.IO.StreamReader(queryRhinoFile); while ((line = queryFile.ReadLine()) != null) { indexQuery query = JsonConvert.DeserializeObject<indexQuery>(line); queries.Add(query); } queryFile.Close(); Lucene.Net.Store.Directory indexDirectory = FSDirectory.Open(@"c:\TEMP_LUCENE2");//System.IO.Directory.GetCurrentDirectory()); //Lucene.Net.Store.Directory indexDirectory = FSDirectory.Open(@"c:\TEMP_RHINO2"); Analyzer analyzer = new SimpleAnalyzer(); //Analyzer analyzer = new StopAnalyzer(Lucene.Net.Util.Version.LUCENE_30); //Analyzer analyzer = new WhitespaceAnalyzer(); // Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); //CREATE INDEX IndexWriterConfig config = new IndexWriterConfig(); IndexWriter writer = new IndexWriter(indexDirectory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); //For each line in LUCENE-DOCUMENTS foreach (indexDocument d in results) { Document doc;// = new Document(); doc = d.ToDocument(); //doc.Add(new Field("name", d.name, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); //doc.Add(new Field("id", d.id, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); //doc.Add(new Field("file_name", d.file_name, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); //doc.Add(new Field("tokens", d.tokens.ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); writer.AddDocument(doc); } writer.Optimize(); writer.Commit(); writer.Dispose(); //System.IO.StreamWriter resultsFile = new System.IO.StreamWriter(@"C:\JSON_RESULTS\lucene_results.json"); //File.Create(@"C:\rhino_results.json"); //System.IO.StreamWriter resultsFile = new System.IO.StreamWriter(@"C:\JSON_RESULTS\rhino_results.json", true); //SEARCH //For each query foreach (indexQuery q in queries) { Document query;// = new Document(); query = q.ToDocument(); //console.writeline("Query " + query.Get("id")); String jsonResult = "{\"query_id\": " + query.Get("id") + ", "; Field[] title_tokens = query.GetFields("title_tokens"); Field[] description_tokens = query.GetFields("description_tokens"); String title_query = ""; String description_query = ""; foreach (Field f in title_tokens) { title_query += " " + f.StringValue; } foreach (Field f in description_tokens) { description_query += " " + f.StringValue; } String query1 = title_query + " " + description_query; String query2 = title_query; String query3 = description_query; QueryParser queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "tokens", analyzer); Query query1Parsed = queryParser.Parse(query1); Query query2Parsed = queryParser.Parse(query2); Query query3Parsed = queryParser.Parse(query3); int hitsPerPage = 20; //Lucene.Net.Store.Directory directory = FSDirectory.Open(System.IO.Directory.GetCurrentDirectory()); IndexReader reader = DirectoryReader.Open(indexDirectory, true); IndexSearcher searcher = new IndexSearcher(reader); //console.writeline("---------------"); jsonResult += "\"title_and_description\": ["; //console.writeline("Results for query #1 - Tile and Description:"); TopDocs docs = searcher.Search(query1Parsed, hitsPerPage); ScoreDoc[] hits = docs.ScoreDocs; for (int i = 0; i < hits.Length; i++) { int docId = hits[i].Doc; Document d = searcher.Doc(docId); //string file_name = d.Get("file_name"); //console.writeline("DocId: " + docId.ToString()); if (i == hits.Length - 1) jsonResult += (docId + 1).ToString(); else jsonResult += (docId + 1).ToString() + ", "; } jsonResult += "], "; //console.writeline("---------------"); jsonResult += "\"title_only\": ["; //console.writeline("Results for query #2 - Title only:"); docs = searcher.Search(query2Parsed, hitsPerPage); hits = docs.ScoreDocs; for (int i = 0; i < hits.Length; i++) { int docId = hits[i].Doc; Document d = searcher.Doc(docId); //string file_name = d.Get("file_name"); //console.writeline("DocId: " + docId.ToString()); if (i == hits.Length - 1) jsonResult += (docId + 1).ToString(); else jsonResult += (docId + 1).ToString() + ", "; } jsonResult += "], "; //console.writeline("---------------"); jsonResult += "\"description_only\": ["; //console.writeline("Results for query #3 - Description only:"); docs = searcher.Search(query3Parsed, hitsPerPage); hits = docs.ScoreDocs; for (int i = 0; i < hits.Length; i++) { int docId = hits[i].Doc; Document d = searcher.Doc(docId); //string file_name = d.Get("file_name"); //console.writeline("DocId: " + docId.ToString()); if (i == hits.Length - 1) jsonResult += (docId + 1).ToString(); else jsonResult += (docId + 1).ToString() + ", "; } jsonResult += "]}"; //console.writeline("---------------"); //resultsFile.WriteLine(jsonResult); Console.WriteLine(jsonResult); } //resultsFile.Close(); } catch (Exception ex) { MessageBox.Show(ex.ToString()); } }
public virtual Lucene.Net.QueryParsers.QueryParser GetParser(Analyzer a) { if (a == null) a = new SimpleAnalyzer(); Lucene.Net.QueryParsers.QueryParser qp = new Lucene.Net.QueryParsers.QueryParser("field", a); qp.SetDefaultOperator(Lucene.Net.QueryParsers.QueryParser.OR_OPERATOR); return qp; }
public virtual Query GetQueryDOA(System.String query, Analyzer a) { if (a == null) a = new SimpleAnalyzer(); Lucene.Net.QueryParsers.QueryParser qp = new Lucene.Net.QueryParsers.QueryParser("field", a); qp.SetDefaultOperator(Lucene.Net.QueryParsers.QueryParser.AND_OPERATOR); return qp.Parse(query); }
public void TestQueryScorerHits() { Analyzer analyzer = new SimpleAnalyzer(); QueryParser qp = new QueryParser(TEST_VERSION, FIELD_NAME, analyzer); query = qp.Parse("\"very long\""); searcher = new IndexSearcher(ramDir, true); TopDocs hits = searcher.Search(query, 10); QueryScorer scorer = new QueryScorer(query, FIELD_NAME); Highlighter highlighter = new Highlighter(scorer); for (int i = 0; i < hits.ScoreDocs.Length; i++) { Document doc = searcher.Doc(hits.ScoreDocs[i].Doc); String storedField = doc.Get(FIELD_NAME); TokenStream stream = TokenSources.GetAnyTokenStream(searcher.IndexReader, hits.ScoreDocs[i].Doc, FIELD_NAME, doc, analyzer); IFragmenter fragmenter = new SimpleSpanFragmenter(scorer); highlighter.TextFragmenter = fragmenter; String fragment = highlighter.GetBestFragment(stream, storedField); Console.WriteLine(fragment); } }
private void OpenIndexWriter() { var dir = FSDirectory.Open(new DirectoryInfo(Config.IndexPath)); //create an analyzer to process the text var analyzer = new SimpleAnalyzer(); //create the index writer with the directory and analyzer defined. _indexWriter = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); _indexWriter.SetMaxBufferedDocs(IndexWriter.DISABLE_AUTO_FLUSH); }
public void addToDictionary() { String derbyJson = AppDomain.CurrentDomain.BaseDirectory + @"jira\derby_issues_filtered.json"; String ofbizJson = AppDomain.CurrentDomain.BaseDirectory + @"jira\ofbiz_issues_filtered.json"; List<Issue> issuesDerby = new List<Issue>(); List<Issue> issuesOfbiz = new List<Issue>(); String line; System.IO.StreamReader indexFileDerby = new System.IO.StreamReader(derbyJson); System.IO.StreamReader indexFileOfbiz = new System.IO.StreamReader(ofbizJson); //System.IO.StreamReader indexFile = new System.IO.StreamReader(indexRhinoFile); while ((line = indexFileDerby.ReadLine()) != null) { Issue issue = JsonConvert.DeserializeObject<Issue>(line); issuesDerby.Add(issue); } indexFileDerby.Close(); while ((line = indexFileOfbiz.ReadLine()) != null) { Issue issue = JsonConvert.DeserializeObject<Issue>(line); issuesOfbiz.Add(issue); } indexFileOfbiz.Close(); Lucene.Net.Store.Directory indexDirectory = FSDirectory.Open(@"c:\temp_corpus"); //System.IO.Directory.GetCurrentDirectory()); //Lucene.Net.Store.Directory indexDirectory = FSDirectory.Open(@"c:\TEMP_RHINO2"); Analyzer analyzer = new SimpleAnalyzer(); //Analyzer analyzer = new StopAnalyzer(Lucene.Net.Util.Version.LUCENE_30); //Analyzer analyzer = new WhitespaceAnalyzer(); // Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); //CREATE INDEX IndexWriterConfig config = new IndexWriterConfig(); IndexWriter writer = new IndexWriter(indexDirectory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); //For each line in LUCENE-DOCUMENTS foreach (Issue d in issuesDerby) { Document doc;// = new Document(); doc = d.ToDocument(); //doc.Add(new Field("name", d.name, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); //doc.Add(new Field("id", d.id, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); //doc.Add(new Field("file_name", d.file_name, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); //doc.Add(new Field("tokens", d.tokens.ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); writer.AddDocument(doc); } writer.Optimize(); writer.Commit(); writer.Dispose(); foreach (Issue d in issuesOfbiz) { Document doc;// = new Document(); doc = d.ToDocument(); //doc.Add(new Field("name", d.name, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); //doc.Add(new Field("id", d.id, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); //doc.Add(new Field("file_name", d.file_name, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); //doc.Add(new Field("tokens", d.tokens.ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); writer.AddDocument(doc); } writer.Optimize(); writer.Commit(); writer.Dispose(); }