Example #1
0
        public string Search(string strQuery)
        {
            string result = string.Empty;

            Lucene.Net.Index.IndexReader        reader           = Lucene.Net.Index.IndexReader.Open(Server.MapPath(System.Configuration.ConfigurationManager.AppSettings["IndexingArticle"]));
            Lucene.Net.QueryParsers.QueryParser parser           = new Lucene.Net.QueryParsers.QueryParser("ArticleDetail", new Lucene.Net.Analysis.Standard.StandardAnalyzer());
            Lucene.Net.Search.Query             query            = parser.Parse(strQuery);
            Lucene.Net.Search.IndexSearcher     searcher         = new Lucene.Net.Search.IndexSearcher(reader);
            Lucene.Net.Search.Hits                   hits        = searcher.Search(query);
            Lucene.Net.Highlight.QueryScorer         score       = new Lucene.Net.Highlight.QueryScorer(query);
            Lucene.Net.Highlight.SimpleHTMLFormatter formater    = new Lucene.Net.Highlight.SimpleHTMLFormatter("<span class='Highlight'>", "</span>");
            Lucene.Net.Highlight.Highlighter         highlighter = new Lucene.Net.Highlight.Highlighter(formater, score);
            result += "<div align='right' style='background-color:#F0F7F9; padding-right:15px' height='30px'><font style='FONT-WEIGHT: bold; FONT-SIZE: 10pt; COLOR: #005482; FONT-FAMILY: arial'>Kết quả tìm thấy : " + hits.Length() + "  </font></div>";
            result += "<div style='padding: 10px 10px 10px 10px;'>";
            for (int i = 0; i < hits.Length(); i++)
            {
                string id     = hits.Doc(i).Get("ArticleId");
                string title  = hits.Doc(i).Get("ArticleTitle");
                string detail = hits.Doc(i).Get("ArticleDetail");
                Lucene.Net.Analysis.TokenStream ts = (new Lucene.Net.Analysis.Standard.StandardAnalyzer()).TokenStream("ArticleDetail", new System.IO.StringReader(detail));
                result += string.Format("<div align='left'><font style='FONT-WEIGHT: bold; FONT-SIZE: 10pt; COLOR: #5b5b5b; FONT-FAMILY: arial'><a href='/?ArticleId={0}'>{1}</a></font>", id, title);
                result += string.Format("<div align='left'><font style='FONT-SIZE: 9pt' face='Arial' color='#005482'>...{0}...</font></div></div></br>", highlighter.GetBestFragment(ts, detail));
            }
            result += "</div>";
            reader.Close();
            return(result);
        }
Example #2
0
        public string SearchAndPaging(string strQuery, string index)
        {
            string result = string.Empty;

            try
            {
                List <SearchArticle>            searchArticleList = new List <SearchArticle>();
                PSCPortal.CMS.ArticleCollection ArticleList       = ArticleCollection.GetArticleCollectionPublish();
                string         nameSub       = Libs.Ultility.GetSubDomain() == string.Empty ? "HomePage" : Libs.Ultility.GetSubDomain();
                SubDomain      subDomain     = PSCPortal.Engine.SubDomain.GetSubByName(nameSub);
                PageCollection pagesBelongTo = subDomain.GetPagesBelongTo();
                string         strId         = string.Empty;
                foreach (var page in pagesBelongTo)
                {
                    foreach (var ar in ArticleList.Where(ar => ar.PageId == page.Id))
                    {
                        strId += ar.Id + " OR ";
                    }
                    if (strId.Length > 0)
                    {
                        strId = strId.Remove(strId.Length - 3, 3);
                    }
                }
                int    pageIndex = Int32.Parse(index);
                string strSearch = " ArticleDetail:(" + strQuery + ") AND ArticleId:" + "( " + strId + " )";
                Lucene.Net.Index.IndexReader        reader           = Lucene.Net.Index.IndexReader.Open(Server.MapPath(System.Configuration.ConfigurationManager.AppSettings["IndexingArticle"]));
                Lucene.Net.QueryParsers.QueryParser parser           = new Lucene.Net.QueryParsers.QueryParser("ArticleDetail", new Lucene.Net.Analysis.Standard.StandardAnalyzer());
                Lucene.Net.Search.Query             query            = parser.Parse(strSearch);
                Lucene.Net.Search.IndexSearcher     searcher         = new Lucene.Net.Search.IndexSearcher(reader);
                Lucene.Net.Search.Hits                   hits        = searcher.Search(query);
                Lucene.Net.Highlight.QueryScorer         score       = new Lucene.Net.Highlight.QueryScorer(query);
                Lucene.Net.Highlight.SimpleHTMLFormatter formater    = new Lucene.Net.Highlight.SimpleHTMLFormatter("<span class='Highlight'>", "</span>");
                Lucene.Net.Highlight.Highlighter         highlighter = new Lucene.Net.Highlight.Highlighter(formater, score);
                result += hits.Length() + "_" + "<div class='blog_news'><div class='topic_news_title1'><div class='topic_news_title'><a href='#'>Kết quả tìm thấy: " + hits.Length() + "</a></div></div>";
                result += "<div class='ct_topic_l'><div class='ct_topic_r1'>";
                for (int i = pageIndex * 20 - 20; i < pageIndex * 20 && i < hits.Length(); i++)
                {
                    string detail = hits.Doc(i).Get("ArticleDetail");
                    Lucene.Net.Analysis.TokenStream ts = (new Lucene.Net.Analysis.Standard.StandardAnalyzer()).TokenStream("ArticleDetail", new System.IO.StringReader(detail));
                    SearchArticle searchArticle        = new SearchArticle();
                    searchArticle.Id        = hits.Doc(i).Get("ArticleId");;
                    searchArticle.Title     = hits.Doc(i).Get("ArticleTitle");
                    searchArticle.Highligth = highlighter.GetBestFragment(ts, detail);
                    searchArticleList.Add(searchArticle);
                }
                reader.Close();
                JavaScriptSerializer        serializer = new JavaScriptSerializer();
                Dictionary <string, object> resultDic  = new Dictionary <string, object>();
                resultDic["Count"] = hits.Length();
                resultDic["Data"]  = searchArticleList;
                result             = serializer.Serialize(resultDic);
            }
            catch (Exception e)
            {
            }
            return(result);
        }
		public virtual void  TestMaxSizeHighlightTruncates()
		{
			System.String goodWord = "goodtoken";
			System.String[] stopWords = new System.String[]{"stoppedtoken"};
			
			TermQuery query = new TermQuery(new Term("data", goodWord));
			SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
			Highlighter hg = new Highlighter(fm, new QueryScorer(query));
			hg.SetTextFragmenter(new NullFragmenter());
			
			System.String match = null;
			System.Text.StringBuilder sb = new System.Text.StringBuilder();
			sb.Append(goodWord);
			for (int i = 0; i < 10000; i++)
			{
				sb.Append(" ");
				sb.Append(stopWords[0]);
			}
			
			hg.SetMaxDocBytesToAnalyze(100);
			match = hg.GetBestFragment(new StandardAnalyzer(stopWords), "data", sb.ToString());
			Assert.IsTrue(match.Length < hg.GetMaxDocBytesToAnalyze(), "Matched text should be no more than 100 chars in length ");
			
			//add another tokenized word to the overrall length - but set way beyond 
			//the length of text under consideration (after a large slug of stop words + whitespace)
			sb.Append(" ");
			sb.Append(goodWord);
			match = hg.GetBestFragment(new StandardAnalyzer(stopWords), "data", sb.ToString());
			Assert.IsTrue(match.Length < hg.GetMaxDocBytesToAnalyze(), "Matched text should be no more than 100 chars in length ");
		}
        public IEnumerable<Content> Search(string keyword, int page, int pageSize, out int totals)
        {
            lock (locker)
            {
                List<Content> result = new List<Content>();
                IndexReader reader = IndexReader.Open(DBNLConfigurationManager.LuceneElement.IndexingFolder);

                IndexSearcher searcher = new IndexSearcher(reader);

                TopDocCollector collector = new TopDocCollector((page + 1) * pageSize);

                PhraseQuery pquery = new PhraseQuery();
                BooleanQuery myquery = new BooleanQuery();
                PhraseQuery q2 = new PhraseQuery();
                //grab the search terms from the query string
                string[] str = keyword.Split(' ');
                //build the query
                foreach (string word in str)
                {
                    //brand is the field I'm searching in
                    q2.Add(new Term("content", word.ToLower()));
                }

                //finally, add it to the BooleanQuery object
                myquery.Add(q2, BooleanClause.Occur.MUST);

                //foreach (string srt in keyword.Split(new char[] {' '}))
                //{
                //    pquery.Add(new Term("content", srt.ToLower()));
                //}
                //pquery.Add(q2, BooleanClause.Occur.MUST);

                TermQuery query = new TermQuery(new Term("content", keyword));
                //            TopDocs topDocs = searcher.Search(query, collector);
                //searcher.Search(query, collector);

                QueryParser qp = new QueryParser("content", new StandardAnalyzer());

                //Contains a phrase such as "this is a phrase"
                Query q = qp.Parse(keyword);
                //Hits hits = searcher.Search(q);
                //Hits hits = searcher.Search(query);
                Hits hits = searcher.Search(myquery);

                //ScoreDoc[] hits = collector.TopDocs().scoreDocs;
                totals = hits.Length();
                Lucene.Net.Highlight.Formatter formatter = new Lucene.Net.Highlight.SimpleHTMLFormatter(
            "<span class=\"Highlight\">",
            "</span>");

                Lucene.Net.Highlight.SimpleFragmenter fragmenter = new Lucene.Net.Highlight.SimpleFragmenter(400);
                Lucene.Net.Highlight.QueryScorer scorer = new Lucene.Net.Highlight.QueryScorer(myquery);
                Lucene.Net.Highlight.Highlighter highlighter = new Lucene.Net.Highlight.Highlighter(formatter, scorer);
                highlighter.SetTextFragmenter(fragmenter);

                for (int i = (page - 1) * pageSize; i < Math.Min(page * pageSize, hits.Length()); i++)
                {

                    Document doc = hits.Doc(i);
                    string raw_text = doc.Get("content");

                    Lucene.Net.Analysis.TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(raw_text));
                    string highlighted_text = highlighter.GetBestFragments(stream, raw_text, 1, "...").Replace("'", "''");

                    if (highlighted_text == "") // someties the highlighter fails to emit text...
                    {
                        highlighted_text = raw_text.Replace("'", "''");
                    }
                    if (highlighted_text.Length > 500)
                    {
                        highlighted_text = highlighted_text.Substring(0, 500);
                    }

                    Content content = new ContentService().GetItem(int.Parse(doc.Get("id")));
                    content.HighlightText = highlighted_text;
                    result.Add(content);
                }
                reader.Close();

                searcher.Close();
                return result.AsEnumerable();
            }
        }
        public string Query(string keyword)
        {
            Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("text", analyzer);
            Lucene.Net.Search.Query query = null;

            try
            {
                if (string.IsNullOrEmpty(keyword))
                {
                    throw new Exception("keywork is empty");
                }

                query = parser.Parse(keyword);

            }
            catch (Exception e)
            {
            }

            lock (locker)
            {

                Lucene.Net.Search.Hits hits = null;
                try
                {
                    if (searcher == null)
                    {
                        searcher = new Lucene.Net.Search.IndexSearcher(DBNLConfigurationManager.LuceneElement.IndexingFolder);
                    }

                    hits = searcher.Search(query);

                }
                catch (Exception e)
                {
                }

                for (int i = 0; i < hits.Length(); i++)
                {
                    Lucene.Net.Documents.Document doc = hits.Doc(i);

                }

                Lucene.Net.Highlight.Formatter formatter = new Lucene.Net.Highlight.SimpleHTMLFormatter(
            "<span style=\"background:yellow;color:red;\">",
            "</span>");

                Lucene.Net.Highlight.SimpleFragmenter fragmenter = new Lucene.Net.Highlight.SimpleFragmenter(400);
                Lucene.Net.Highlight.QueryScorer scorer = new Lucene.Net.Highlight.QueryScorer(query);
                Lucene.Net.Highlight.Highlighter highlighter = new Lucene.Net.Highlight.Highlighter(formatter, scorer);
                highlighter.SetTextFragmenter(fragmenter);

                StringBuilder sb = new StringBuilder();
                string guid = Guid.NewGuid().ToString().Replace("-", "");
                Dictionary<string, int> dict_already_seen_ids = new Dictionary<string, int>();

                // insert the search results into a temp table which we will join with what's in the database
                for (int i = 0; i < hits.Length(); i++)
                {
                    if (dict_already_seen_ids.Count < 100)
                    {
                        Lucene.Net.Documents.Document doc = hits.Doc(i);
                        string id = doc.Get("id");
                        if (!dict_already_seen_ids.ContainsKey(id))
                        {
                            dict_already_seen_ids[id] = 1;

                            string raw_text =doc.Get("raw_text");

                            Lucene.Net.Analysis.TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(raw_text));
                            string highlighted_text = highlighter.GetBestFragments(stream, raw_text, 1, "...").Replace("'", "''");

                            if (highlighted_text == "") // someties the highlighter fails to emit text...
                            {
                                highlighted_text = raw_text.Replace("'", "''");
                            }
                            if (highlighted_text.Length > 3000)
                            {
                                highlighted_text = highlighted_text.Substring(0, 3000);
                            }
                            sb.Append(highlighted_text);
                            sb.Append("'");
                            sb.Append(")\n");
                        }
                    }
                    else
                    {
                        break;
                    }
                }
                return sb.ToString();

            }
        }
Example #6
0
        public SearchModel Search(string searchText)
        {
            var result = new SearchModel();

            if (string.IsNullOrEmpty(searchText))
            {
                result.Message = "Įveskite paieškos užklausą.";
                return(result);
            }

            var stemmedSearchText = new LithuanianStemmer().Stem(searchText.Trim());

            if (string.IsNullOrEmpty(stemmedSearchText))
            {
                result.Message = "Įveskite paieškos užklausą.";
                return(result);
            }

            Lucene.Net.Search.Hits hits = null;
            try
            {
                if (char.IsLetter(stemmedSearchText[stemmedSearchText.Length - 1]))
                {
                    stemmedSearchText += "*";
                }

                query = parser.Parse(stemmedSearchText);

                if (searcher == null)
                {
                    searcher = new Lucene.Net.Search.IndexSearcher(CustomAppSettings.SearchIndexFolder);
                }

                hits = searcher.Search(query);
            }
            catch (Exception e)
            {
                result.Message = "Paieška nepavyko. Pataisykite užklausą. Klaidos pranešimas: " + e.Message;
                return(result);
            }

            Lucene.Net.Highlight.Formatter formatter = new Lucene.Net.Highlight.SimpleHTMLFormatter(
                "<span class=\"highlightResult\">",
                "</span>");

            var fragmenter  = new Lucene.Net.Highlight.SimpleFragmenter(100);
            var scorer      = new Lucene.Net.Highlight.QueryScorer(searcher.Rewrite(query));
            var highlighter = new Lucene.Net.Highlight.Highlighter(formatter, scorer);

            highlighter.SetTextFragmenter(fragmenter);

            Dictionary <string, int> dict_already_seen_ids = new Dictionary <string, int>();

            var list = new List <SearchIndexModel>();

            // insert the search results into a temp table which we will join with what's in the database
            for (int i = 0; i < hits.Length(); i++)
            {
                if (dict_already_seen_ids.Count < 100)
                {
                    Lucene.Net.Documents.Document doc = hits.Doc(i);
                    string id = doc.Get("id");
                    if (!dict_already_seen_ids.ContainsKey(id))
                    {
                        dict_already_seen_ids[id] = 1;
                        var model = new SearchIndexModel();
                        model.Id      = id;
                        model.Score   = hits.Score(i);
                        model.Subject = doc.Get("subject");
                        model.Type    = (EntryTypes)Enum.Parse(typeof(EntryTypes), doc.Get("type"));

                        string raw_text = HttpUtility.HtmlEncode(doc.Get("raw_text"));
                        //string raw_text = doc.Get("raw_text");

                        Lucene.Net.Analysis.TokenStream stream = analyzer.TokenStream("text",
                                                                                      new System.IO.StringReader(
                                                                                          raw_text));
                        string highlighted_text = highlighter.GetBestFragments(stream, raw_text, 3, "...").Replace("'",
                                                                                                                   "''");


                        if (highlighted_text == "") // someties the highlighter fails to emit text...
                        {
                            highlighted_text = raw_text.Replace("'", "''");
                        }
                        if (highlighted_text.Length > 3000)
                        {
                            highlighted_text = highlighted_text.Substring(0, 3000);
                        }

                        model.HighlightedText = highlighted_text;

                        list.Add(model);
                    }
                }
                else
                {
                    break;
                }
            }

            result.List         = list;
            result.SearchPhrase = searchText;
            if (list.Count == 0)
            {
                result.Message = string.Format("Įrašų pagal užklausą '{0}' nerasta. Patikslinkite paieškos duomenis.", searchText);
            }

            return(result);
        }