コード例 #1
0
        /// <summary>
        /// Incrementa a quantidade para as palavras pesquisas.
        /// </summary>
        /// <param name="text"></param>
        /// <param name="parameters">Parametros usados na pesquisa.</param>
        private void IncrementCountWords(string text, IEnumerable <Parameter> parameters)
        {
            if (string.IsNullOrEmpty(text))
            {
                return;
            }
            var analyzer           = new StandardAnalyzer(Version.LUCENE_29, Stopwords.PORTUGUESE_SET);
            var stringReader       = new System.IO.StringReader(text);
            var tokenStream        = analyzer.TokenStream("defaultFieldName", stringReader);
            var statisticsManager2 = _statisticsManager as ISearchStatistics2;
            var token = tokenStream.Next();

            while (token != null)
            {
                var termText = token.TermText();
                if (_segments.ContainsKey(termText))
                {
                    _statisticsManager.IncrementCountWords(token.TermText());
                    if (statisticsManager2 != null)
                    {
                        statisticsManager2.IncrementCountWords(text, parameters);
                    }
                }
                token = tokenStream.Next();
            }
        }
コード例 #2
0
        private void Search()
        {
            try
            {
                SearchProgressBar.Maximum = 11;
                ProgressLabel.Text = "Progress: Initialize Search ...";
                Searcher searcher = new IndexSearcher(@"Canon\index");
                Analyzer analyzer = new StandardAnalyzer();
                ArrayList resultList = new ArrayList();

                System.IO.StreamReader in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).CurrentEncoding);

                String line = QueryInputBox.Text;
                if (line.Length == - 1)
                    return;
                ProgressLabel.Text = "Progress: Parsing Query ...";
                Query query = QueryParser.Parse(line, "contents", analyzer);
                //int[] ix = qtm.GetTermFrequencies();

                Hits hits = searcher.Search(query);
                SearchProgressBar.Increment(1);
                ProgressLabel.Text = "Progress: Searched. Analyzing results ...";

                //QueryHighlightExtractor highlighter = new QueryHighlightExtractor(query, new WhitespaceAnalyzer(), "<B>", "</B>");
                Highlighter highlighter = new Highlighter(new QueryScorer(query));
                highlighter.SetTextFragmenter(new SimpleFragmenter(80));
                int maxNumFragmentsRequired = 1;

                    //int HITS_PER_PAGE = 10;
                    for (int i = 0; i < 10; i++)
                    {
                            SearchProgressBar.Increment(1);
                            ProgressLabel.Text = "Progress: Analyzing hit " + (i+1).ToString();
                            // get the document from index
                            Document doc = hits.Doc(i);
                            //SegmentReader ir = new SegmentReader();
                            //Lucene.Net.Index.TermFreqVector tfv =
                            //tfv.GetTermFrequencies
                            string score = hits.Score(i).ToString();
                            //Box += "Hit no. " + i + " scored: " + score + " occ: " + /*highlighter.tokenFrequency */ " best fragment: \n";
                            ResultSet a = new ResultSet();
                            a.BookName = doc.Get("path").Replace(@"c:\cscd\temp\","");
                            a.Score = hits.Score(i);
                            a.numberOfHits = hits.Length();

                            // get the document filename
                            // we can't get the text from the index
                            //because we didn't store it there
                            //so get it from archive
                            string path = doc.Get("path");
                            string name = GetInternalName(path);
                            PaliReaderUtils.AalekhDecoder.UnzipFromZipLibrary(name);
                            path = System.IO.Directory.GetCurrentDirectory() + @"\Work\" + name + ".htm";
                            string plainText = "";
                            //load text from zip archive temporarily
                            using (StreamReader sr = new StreamReader(path, System.Text.Encoding.Default))
                            {
                                plainText = parseHtml(sr.ReadToEnd());
                            }
            //-------------------------------Highlighter Code 1.4
                            TokenStream tokenStream = analyzer.TokenStream(new StringReader(plainText));
                            a.textFragment = highlighter.GetBestFragments(tokenStream, plainText, maxNumFragmentsRequired, "...");
                            if(File.Exists(path))
                                File.Delete(path);
            //-------------------------------
                            resultList.Add(a);
                        }
                SearchProgressBar.Value = 0;
                searcher.Close();
                ssr = new ShowSearchResults(/*Box*/resultList);
                //this.Hide();
                ssr.OpenBookEvent += new ShowSearchResults.OpenBook(this.TriggerOpenBook);
                ssr.Closing += new System.ComponentModel.CancelEventHandler(this.Closing_ResultWindow);
                this.Hide();
                ssr.ShowDialog();

            }
            catch (System.Exception e)
            {
                MessageBox.Show(" caught a " + e.GetType() + "\n with message: " + e.Message);
            }
        }
コード例 #3
0
		public virtual void  TestUnRewrittenQuery()
		{
			//test to show how rewritten query can still be used
			searcher = new IndexSearcher(ramDir);
			Analyzer analyzer = new StandardAnalyzer();
			
			QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
			Query query = parser.Parse("JF? or Kenned*");
			System.Console.Out.WriteLine("Searching with primitive query");
			//forget to set this and...
			//query=query.rewrite(reader);
			Hits hits = searcher.Search(query);
			
			//create an instance of the highlighter with the tags used to surround highlighted text
			//		QueryHighlightExtractor highlighter = new QueryHighlightExtractor(this, query, new StandardAnalyzer());
			Highlighter highlighter = new Highlighter(this, new QueryScorer(query));
			
			highlighter.SetTextFragmenter(new SimpleFragmenter(40));
			
			int maxNumFragmentsRequired = 3;
			
			for (int i = 0; i < hits.Length(); i++)
			{
				System.String text = hits.Doc(i).Get(FIELD_NAME);
				TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text));
				
				System.String highlightedText = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
				System.Console.Out.WriteLine(highlightedText);
			}
			//We expect to have zero highlights if the query is multi-terms and is not rewritten!
			Assert.IsTrue(numHighlights == 0, "Failed to find correct number of highlights " + numHighlights + " found");
		}
コード例 #4
0
        /// <summary>
        /// Prepara os dados para a pesquisa.
        /// </summary>
        /// <param name="text"></param>
        /// <param name="filters"></param>
        /// <param name="options"></param>
        /// <param name="query"></param>
        /// <param name="filter"></param>
        /// <param name="sortFields"></param>
        private void PrepareSearch(string text, FilterParameter[] filters, SearchOptions options, out Query query, out Filter filter, out global::Lucene.Net.Search.SortField[] sortFields)
        {
            if (options == null)
            {
                throw new ArgumentNullException("options");
            }
            ValidateOptions(options);
            var analyzer = new StandardAnalyzer(Version.LUCENE_29, Stopwords.PORTUGUESE_SET);
            var parser   = new QueryParser(Version.LUCENE_29, "FullText", analyzer);
            var terms    = new List <string>();

            using (var stringReader = new System.IO.StringReader(RemoveAcent(text)))
            {
                var tokenStream = analyzer.TokenStream("FullText", stringReader);
                var token       = tokenStream.Next();
                while (token != null)
                {
                    var term = token.TermText();
                    terms.Add(term);
                    token = tokenStream.Next();
                }
            }
            var segmentsTerms     = new List <string>();
            var segmentsSortTerms = new List <string>();

            foreach (var i in terms)
            {
                if (_segments.ContainsKey(i) && !segmentsTerms.Contains(i))
                {
                    segmentsTerms.Add(i);
                }
                else
                {
                    segmentsSortTerms.Add(i);
                }
            }
            if (segmentsTerms.Count > 0)
            {
                var booleanQuery = new BooleanQuery();
                foreach (var s in segmentsTerms)
                {
                    booleanQuery.Add(parser.Parse(s), BooleanClause.Occur.MUST);
                }
                query      = booleanQuery;
                filter     = GetFilter(filters, _structRepository, analyzer);
                sortFields = new global::Lucene.Net.Search.SortField[] {
                    new global::Lucene.Net.Search.SortField("FullText", new FullTextFieldComparatorSource(analyzer, _dataRepository, segmentsSortTerms), true)
                };
                return;
            }
            var queries = new List <Query>();

            foreach (var i in terms)
            {
                queries.Add(parser.Parse(i));
            }
            if (queries.Count == 0)
            {
                query = parser.Parse(RemoveAcent(text));
            }
            else
            {
                var booleanQuery = new BooleanQuery();
                foreach (var q in queries)
                {
                    booleanQuery.Add(q, BooleanClause.Occur.MUST);
                }
                query = booleanQuery;
            }
            filter     = null;
            sortFields = null;
            if (query != null)
            {
                filter = GetFilter(filters, _structRepository, analyzer);
            }
            else
            {
                query = GetQuery(filters, _structRepository, analyzer);
            }
        }
コード例 #5
0
        public BooleanQuery GetCommQuery(string changecommBody, string changecommGroup, string changecommApps, string changecommCI)
        {
            Lucene.Net.Analysis.Analyzer commsAnalyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);

            Lucene.Net.Search.BooleanQuery.MaxClauseCount = 25000;


            TextReader textReadCommBody  = new StringReader(changecommBody);
            TextReader textReadCommGroup = new StringReader(changecommGroup);
            TextReader textReadCommApps  = new StringReader(changecommApps);
            TextReader textReadCommCI    = new StringReader(changecommCI);


            Lucene.Net.Analysis.TokenStream tokenizedCommBody  = commsAnalyzer.TokenStream(changecommBody, textReadCommBody);
            Lucene.Net.Analysis.TokenStream tokenizedCommGroup = commsAnalyzer.TokenStream(changecommGroup, textReadCommGroup);
            Lucene.Net.Analysis.TokenStream tokenizedCommApps  = commsAnalyzer.TokenStream(changecommApps, textReadCommApps);
            Lucene.Net.Analysis.TokenStream tokenizedCommCI    = commsAnalyzer.TokenStream(changecommCI, textReadCommCI);


            Lucene.Net.Search.BooleanQuery query1 = new Lucene.Net.Search.BooleanQuery();
            try
            {
                int tokenCount = 0;
                tokenizedCommBody.Reset();

                var termAttrText = tokenizedCommBody.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();

                while (tokenizedCommBody.IncrementToken())
                {
                    tokenCount++;

                    string Term = termAttrText.Term;

                    query1.Add(new Lucene.Net.Search.TermQuery(new Term("change_description", Term)), Lucene.Net.Search.Occur.SHOULD);
                }
            }
            catch (Exception ex)
            {
                Debug.WriteLine(ex);
            }

            Lucene.Net.Search.BooleanQuery query2 = new Lucene.Net.Search.BooleanQuery();

            try
            {
                tokenizedCommGroup.Reset();

                var termAttrTicker = tokenizedCommGroup.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();

                int tokenCount = 0;

                while (tokenizedCommGroup.IncrementToken())
                {
                    tokenCount++;

                    string Term = termAttrTicker.Term;

                    query2.Add(new Lucene.Net.Search.TermQuery(new Term("change_group", Term)), Lucene.Net.Search.Occur.SHOULD);
                }
            }
            catch (Exception ex)
            {
                Debug.WriteLine(ex);
            }

            Lucene.Net.Search.BooleanQuery query3 = new Lucene.Net.Search.BooleanQuery();

            try
            {
                tokenizedCommApps.Reset();

                var termAttrTicker = tokenizedCommApps.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();

                int tokenCount = 0;

                while (tokenizedCommApps.IncrementToken())
                {
                    tokenCount++;

                    string Term = termAttrTicker.Term;

                    query3.Add(new Lucene.Net.Search.TermQuery(new Term("application", Term)), Lucene.Net.Search.Occur.SHOULD);
                }
            }
            catch (Exception ex)
            {
                Debug.WriteLine(ex);
            }


            Lucene.Net.Search.BooleanQuery query4 = new Lucene.Net.Search.BooleanQuery();

            try
            {
                tokenizedCommCI.Reset();

                var termAttrTicker = tokenizedCommCI.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();

                int tokenCount = 0;

                while (tokenizedCommCI.IncrementToken())
                {
                    tokenCount++;

                    string Term = termAttrTicker.Term;

                    query4.Add(new Lucene.Net.Search.TermQuery(new Term("change_CI", Term)), Lucene.Net.Search.Occur.SHOULD);
                }
            }
            catch (Exception ex)
            {
                Debug.WriteLine(ex);
            }

            Lucene.Net.Search.BooleanQuery comQuery = new Lucene.Net.Search.BooleanQuery();


            query4.Boost = 5;
            query3.MinimumNumberShouldMatch = 1;

            comQuery.Add(query1, Lucene.Net.Search.Occur.SHOULD);
            comQuery.Add(query2, Lucene.Net.Search.Occur.SHOULD);
            comQuery.Add(query3, Lucene.Net.Search.Occur.SHOULD);
            comQuery.Add(query4, Lucene.Net.Search.Occur.SHOULD);

            return(comQuery);
        }