/// <summary> /// Incrementa a quantidade para as palavras pesquisas. /// </summary> /// <param name="text"></param> /// <param name="parameters">Parametros usados na pesquisa.</param> private void IncrementCountWords(string text, IEnumerable <Parameter> parameters) { if (string.IsNullOrEmpty(text)) { return; } var analyzer = new StandardAnalyzer(Version.LUCENE_29, Stopwords.PORTUGUESE_SET); var stringReader = new System.IO.StringReader(text); var tokenStream = analyzer.TokenStream("defaultFieldName", stringReader); var statisticsManager2 = _statisticsManager as ISearchStatistics2; var token = tokenStream.Next(); while (token != null) { var termText = token.TermText(); if (_segments.ContainsKey(termText)) { _statisticsManager.IncrementCountWords(token.TermText()); if (statisticsManager2 != null) { statisticsManager2.IncrementCountWords(text, parameters); } } token = tokenStream.Next(); } }
private void Search() { try { SearchProgressBar.Maximum = 11; ProgressLabel.Text = "Progress: Initialize Search ..."; Searcher searcher = new IndexSearcher(@"Canon\index"); Analyzer analyzer = new StandardAnalyzer(); ArrayList resultList = new ArrayList(); System.IO.StreamReader in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).CurrentEncoding); String line = QueryInputBox.Text; if (line.Length == - 1) return; ProgressLabel.Text = "Progress: Parsing Query ..."; Query query = QueryParser.Parse(line, "contents", analyzer); //int[] ix = qtm.GetTermFrequencies(); Hits hits = searcher.Search(query); SearchProgressBar.Increment(1); ProgressLabel.Text = "Progress: Searched. Analyzing results ..."; //QueryHighlightExtractor highlighter = new QueryHighlightExtractor(query, new WhitespaceAnalyzer(), "<B>", "</B>"); Highlighter highlighter = new Highlighter(new QueryScorer(query)); highlighter.SetTextFragmenter(new SimpleFragmenter(80)); int maxNumFragmentsRequired = 1; //int HITS_PER_PAGE = 10; for (int i = 0; i < 10; i++) { SearchProgressBar.Increment(1); ProgressLabel.Text = "Progress: Analyzing hit " + (i+1).ToString(); // get the document from index Document doc = hits.Doc(i); //SegmentReader ir = new SegmentReader(); //Lucene.Net.Index.TermFreqVector tfv = //tfv.GetTermFrequencies string score = hits.Score(i).ToString(); //Box += "Hit no. " + i + " scored: " + score + " occ: " + /*highlighter.tokenFrequency */ " best fragment: \n"; ResultSet a = new ResultSet(); a.BookName = doc.Get("path").Replace(@"c:\cscd\temp\",""); a.Score = hits.Score(i); a.numberOfHits = hits.Length(); // get the document filename // we can't get the text from the index //because we didn't store it there //so get it from archive string path = doc.Get("path"); string name = GetInternalName(path); PaliReaderUtils.AalekhDecoder.UnzipFromZipLibrary(name); path = System.IO.Directory.GetCurrentDirectory() + @"\Work\" + name + ".htm"; string plainText = ""; //load text from zip archive temporarily using (StreamReader sr = new StreamReader(path, System.Text.Encoding.Default)) { plainText = parseHtml(sr.ReadToEnd()); } //-------------------------------Highlighter Code 1.4 TokenStream tokenStream = analyzer.TokenStream(new StringReader(plainText)); a.textFragment = highlighter.GetBestFragments(tokenStream, plainText, maxNumFragmentsRequired, "..."); if(File.Exists(path)) File.Delete(path); //------------------------------- resultList.Add(a); } SearchProgressBar.Value = 0; searcher.Close(); ssr = new ShowSearchResults(/*Box*/resultList); //this.Hide(); ssr.OpenBookEvent += new ShowSearchResults.OpenBook(this.TriggerOpenBook); ssr.Closing += new System.ComponentModel.CancelEventHandler(this.Closing_ResultWindow); this.Hide(); ssr.ShowDialog(); } catch (System.Exception e) { MessageBox.Show(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
public virtual void TestUnRewrittenQuery() { //test to show how rewritten query can still be used searcher = new IndexSearcher(ramDir); Analyzer analyzer = new StandardAnalyzer(); QueryParser parser = new QueryParser(FIELD_NAME, analyzer); Query query = parser.Parse("JF? or Kenned*"); System.Console.Out.WriteLine("Searching with primitive query"); //forget to set this and... //query=query.rewrite(reader); Hits hits = searcher.Search(query); //create an instance of the highlighter with the tags used to surround highlighted text // QueryHighlightExtractor highlighter = new QueryHighlightExtractor(this, query, new StandardAnalyzer()); Highlighter highlighter = new Highlighter(this, new QueryScorer(query)); highlighter.SetTextFragmenter(new SimpleFragmenter(40)); int maxNumFragmentsRequired = 3; for (int i = 0; i < hits.Length(); i++) { System.String text = hits.Doc(i).Get(FIELD_NAME); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text)); System.String highlightedText = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); System.Console.Out.WriteLine(highlightedText); } //We expect to have zero highlights if the query is multi-terms and is not rewritten! Assert.IsTrue(numHighlights == 0, "Failed to find correct number of highlights " + numHighlights + " found"); }
/// <summary> /// Prepara os dados para a pesquisa. /// </summary> /// <param name="text"></param> /// <param name="filters"></param> /// <param name="options"></param> /// <param name="query"></param> /// <param name="filter"></param> /// <param name="sortFields"></param> private void PrepareSearch(string text, FilterParameter[] filters, SearchOptions options, out Query query, out Filter filter, out global::Lucene.Net.Search.SortField[] sortFields) { if (options == null) { throw new ArgumentNullException("options"); } ValidateOptions(options); var analyzer = new StandardAnalyzer(Version.LUCENE_29, Stopwords.PORTUGUESE_SET); var parser = new QueryParser(Version.LUCENE_29, "FullText", analyzer); var terms = new List <string>(); using (var stringReader = new System.IO.StringReader(RemoveAcent(text))) { var tokenStream = analyzer.TokenStream("FullText", stringReader); var token = tokenStream.Next(); while (token != null) { var term = token.TermText(); terms.Add(term); token = tokenStream.Next(); } } var segmentsTerms = new List <string>(); var segmentsSortTerms = new List <string>(); foreach (var i in terms) { if (_segments.ContainsKey(i) && !segmentsTerms.Contains(i)) { segmentsTerms.Add(i); } else { segmentsSortTerms.Add(i); } } if (segmentsTerms.Count > 0) { var booleanQuery = new BooleanQuery(); foreach (var s in segmentsTerms) { booleanQuery.Add(parser.Parse(s), BooleanClause.Occur.MUST); } query = booleanQuery; filter = GetFilter(filters, _structRepository, analyzer); sortFields = new global::Lucene.Net.Search.SortField[] { new global::Lucene.Net.Search.SortField("FullText", new FullTextFieldComparatorSource(analyzer, _dataRepository, segmentsSortTerms), true) }; return; } var queries = new List <Query>(); foreach (var i in terms) { queries.Add(parser.Parse(i)); } if (queries.Count == 0) { query = parser.Parse(RemoveAcent(text)); } else { var booleanQuery = new BooleanQuery(); foreach (var q in queries) { booleanQuery.Add(q, BooleanClause.Occur.MUST); } query = booleanQuery; } filter = null; sortFields = null; if (query != null) { filter = GetFilter(filters, _structRepository, analyzer); } else { query = GetQuery(filters, _structRepository, analyzer); } }
public BooleanQuery GetCommQuery(string changecommBody, string changecommGroup, string changecommApps, string changecommCI) { Lucene.Net.Analysis.Analyzer commsAnalyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); Lucene.Net.Search.BooleanQuery.MaxClauseCount = 25000; TextReader textReadCommBody = new StringReader(changecommBody); TextReader textReadCommGroup = new StringReader(changecommGroup); TextReader textReadCommApps = new StringReader(changecommApps); TextReader textReadCommCI = new StringReader(changecommCI); Lucene.Net.Analysis.TokenStream tokenizedCommBody = commsAnalyzer.TokenStream(changecommBody, textReadCommBody); Lucene.Net.Analysis.TokenStream tokenizedCommGroup = commsAnalyzer.TokenStream(changecommGroup, textReadCommGroup); Lucene.Net.Analysis.TokenStream tokenizedCommApps = commsAnalyzer.TokenStream(changecommApps, textReadCommApps); Lucene.Net.Analysis.TokenStream tokenizedCommCI = commsAnalyzer.TokenStream(changecommCI, textReadCommCI); Lucene.Net.Search.BooleanQuery query1 = new Lucene.Net.Search.BooleanQuery(); try { int tokenCount = 0; tokenizedCommBody.Reset(); var termAttrText = tokenizedCommBody.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>(); while (tokenizedCommBody.IncrementToken()) { tokenCount++; string Term = termAttrText.Term; query1.Add(new Lucene.Net.Search.TermQuery(new Term("change_description", Term)), Lucene.Net.Search.Occur.SHOULD); } } catch (Exception ex) { Debug.WriteLine(ex); } Lucene.Net.Search.BooleanQuery query2 = new Lucene.Net.Search.BooleanQuery(); try { tokenizedCommGroup.Reset(); var termAttrTicker = tokenizedCommGroup.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>(); int tokenCount = 0; while (tokenizedCommGroup.IncrementToken()) { tokenCount++; string Term = termAttrTicker.Term; query2.Add(new Lucene.Net.Search.TermQuery(new Term("change_group", Term)), Lucene.Net.Search.Occur.SHOULD); } } catch (Exception ex) { Debug.WriteLine(ex); } Lucene.Net.Search.BooleanQuery query3 = new Lucene.Net.Search.BooleanQuery(); try { tokenizedCommApps.Reset(); var termAttrTicker = tokenizedCommApps.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>(); int tokenCount = 0; while (tokenizedCommApps.IncrementToken()) { tokenCount++; string Term = termAttrTicker.Term; query3.Add(new Lucene.Net.Search.TermQuery(new Term("application", Term)), Lucene.Net.Search.Occur.SHOULD); } } catch (Exception ex) { Debug.WriteLine(ex); } Lucene.Net.Search.BooleanQuery query4 = new Lucene.Net.Search.BooleanQuery(); try { tokenizedCommCI.Reset(); var termAttrTicker = tokenizedCommCI.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>(); int tokenCount = 0; while (tokenizedCommCI.IncrementToken()) { tokenCount++; string Term = termAttrTicker.Term; query4.Add(new Lucene.Net.Search.TermQuery(new Term("change_CI", Term)), Lucene.Net.Search.Occur.SHOULD); } } catch (Exception ex) { Debug.WriteLine(ex); } Lucene.Net.Search.BooleanQuery comQuery = new Lucene.Net.Search.BooleanQuery(); query4.Boost = 5; query3.MinimumNumberShouldMatch = 1; comQuery.Add(query1, Lucene.Net.Search.Occur.SHOULD); comQuery.Add(query2, Lucene.Net.Search.Occur.SHOULD); comQuery.Add(query3, Lucene.Net.Search.Occur.SHOULD); comQuery.Add(query4, Lucene.Net.Search.Occur.SHOULD); return(comQuery); }