예제 #1
0
        private void GetLine(TNews news, StringBuilder html)
        {
            html.Append("<div>");
            html.Append("<a id=dfs6 href='" + news.Url + "' target='_blank'>");
            html.Append("<font size=\"3\">" + news.Title + "</font>");
            html.Append("</a><br>");

            Uri uri  = new Uri(news.Url);
            int size = news.Content.Length;

            html.AppendFormat(news.Abstract);
            html.Append("<br>");
            html.AppendFormat(@"<font color=#008000>{0}/ {1} {2}</font>", uri.Host, size, news.Time.ToString("yyyy-M-d"));
        }
예제 #2
0
파일: Index.cs 프로젝트: wj60387/hubble
        public static List <TNews> Search(String indexDir, string searchType, String q, int pageLen, int pageNo, string sortBy,
                                          out int recCount, out long elapsedMilliseconds, out string sql)
        {
            List <TNews> result = new List <TNews>();

            string keywords = q;

            //string matchString = GetKeyWordsSplit(q, new PanGuAnalyzer(), out wordssplitbyspace);

            System.Configuration.ConnectionStringSettings connString =
                System.Web.Configuration.WebConfigurationManager.ConnectionStrings["News"];

            System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();

            string connectString = connString.ConnectionString;

            System.Data.DataSet ds;
            //System.Data.DataTable titleWordsPositions;
            //System.Data.DataTable contentWordsPositions;

            sw.Start();

            using (HubbleAsyncConnection conn = new HubbleAsyncConnection(connectString))
            {
                conn.Open();

                GetAnalyzerName(conn, "News");

                if (string.IsNullOrEmpty(sortBy))
                {
                    sortBy = "score";
                }

                string wordssplitbyspace;

                HubbleCommand matchCmd = new HubbleCommand(conn);

                string matchString = matchCmd.GetKeywordAnalyzerStringFromServer("News",
                                                                                 "Content", keywords, int.MaxValue, out wordssplitbyspace);

                //HubbleCommand cmd = new HubbleCommand("select between {0} to {1} * from News where content match {2} or title^2 match {2} order by " + sortBy,
                //    conn, (pageNo - 1) * pageLen, pageNo * pageLen - 1, matchString);


                HubbleDataAdapter adapter = new HubbleDataAdapter();

                if (searchType == "Precise")
                {
                    adapter.SelectCommand = new HubbleCommand("select between @begin to @end * from News where content contains @matchString or title^2 contains @matchString order by " + sortBy,
                                                              conn);
                }
                else if (searchType == "Fuzzy")
                {
                    adapter.SelectCommand = new HubbleCommand("select between @begin to @end * from News where content match @matchString or title^2 match @matchString order by " + sortBy,
                                                              conn);
                }
                else if (searchType == "Like")
                {
                    adapter.SelectCommand = new HubbleCommand("select between @begin to @end * from News where content like @likeString or title^2 like @likeString order by " + sortBy,
                                                              conn);
                }
                else
                {
                    throw new ArgumentException(string.Format("Invalid search type: {0}", searchType));
                }


                adapter.SelectCommand.Parameters.Add("@begin", (pageNo - 1) * pageLen);
                adapter.SelectCommand.Parameters.Add("@end", pageNo * pageLen - 1);
                adapter.SelectCommand.Parameters.Add("@matchString", matchString);
                adapter.SelectCommand.Parameters.Add("@likeString", "*" + q.Trim() + "*");

                adapter.SelectCommand.CacheTimeout = CacheTimeout;

                sql = adapter.SelectCommand.Sql;

                ds = new System.Data.DataSet();
                //adapter.Fill(ds);

                HubbleCommand cmd = adapter.SelectCommand;

                ds = cmd.Query(CacheTimeout);

                long[] docids = new long[ds.Tables[0].Rows.Count];

                int i = 0;

                foreach (System.Data.DataRow row in ds.Tables[0].Rows)
                {
                    docids[i++] = (long)row["DocId"];
                }

                //titleWordsPositions = cmd.GetWordsPositions(wordssplitbyspace, "News", "Title", docids, int.MaxValue);
                //contentWordsPositions = cmd.GetWordsPositions(wordssplitbyspace, "News", "Content", docids, int.MaxValue);
            }

            recCount = ds.Tables[0].MinimumCapacity;

            foreach (System.Data.DataRow row in ds.Tables[0].Rows)
            {
                TNews news = new TNews();
                news.Title   = row["Title"].ToString();
                news.Content = row["Content"].ToString();
                news.Url     = row["Url"].ToString();
                news.Time    = (DateTime)row["Time"];

                SimpleHTMLFormatter simpleHTMLFormatter =
                    new SimpleHTMLFormatter("<font color=\"red\">", "</font>");

                Highlighter titleHighlighter;
                Highlighter contentHighlighter;

                if (_TitleAnalyzerName.Equals("PanGuSegment", StringComparison.CurrentCultureIgnoreCase))
                {
                    titleHighlighter =
                        new Highlighter(simpleHTMLFormatter, new PanGuAnalyzer());
                }
                else if (_TitleAnalyzerName.Equals("EnglishAnalyzer", StringComparison.CurrentCultureIgnoreCase))
                {
                    titleHighlighter = new Highlighter(simpleHTMLFormatter, new Hubble.Core.Analysis.EnglishAnalyzer());
                }
                else
                {
                    titleHighlighter = new Highlighter(simpleHTMLFormatter, new Hubble.Core.Analysis.SimpleAnalyzer());
                }

                if (_ContentAnalyzerName.Equals("PanGuSegment", StringComparison.CurrentCultureIgnoreCase))
                {
                    contentHighlighter =
                        new Highlighter(simpleHTMLFormatter, new PanGuAnalyzer());
                }
                else if (_ContentAnalyzerName.Equals("EnglishAnalyzer", StringComparison.CurrentCultureIgnoreCase))
                {
                    contentHighlighter = new Highlighter(simpleHTMLFormatter, new Hubble.Core.Analysis.EnglishAnalyzer());
                }
                else
                {
                    contentHighlighter = new Highlighter(simpleHTMLFormatter, new Hubble.Core.Analysis.SimpleAnalyzer());
                }

                titleHighlighter.FragmentSize   = 50;
                contentHighlighter.FragmentSize = 50;

                //news.Abstract = highlighter.GetBestFragment(contentWordsPositions, news.Content, (long)row["DocId"]);
                //news.TitleHighLighter = highlighter.GetBestFragment(titleWordsPositions, news.Title, (long)row["DocId"]);

                news.Abstract         = contentHighlighter.GetBestFragment(keywords, news.Content);
                news.TitleHighLighter = titleHighlighter.GetBestFragment(keywords, news.Title);
                if (string.IsNullOrEmpty(news.TitleHighLighter))
                {
                    news.TitleHighLighter = news.Title;
                }

                result.Add(news);
            }

            sw.Stop();
            elapsedMilliseconds = sw.ElapsedMilliseconds;

            return(result);


            //QueryParser queryParser = new QueryParser("contents", new PanGuAnalyzer(true));
            //Query query = queryParser.Parse(q);

            //QueryParser titleQueryParser = new QueryParser("title", new PanGuAnalyzer(true));
            //Query titleQuery = titleQueryParser.Parse(q);

            //BooleanQuery bq = new BooleanQuery();
            //bq.Add(query, BooleanClause.Occur.SHOULD);
            //bq.Add(titleQuery, BooleanClause.Occur.SHOULD);

            //Hits hits = search.Search(bq);

            //List<TNews> result = new List<TNews>();

            //recCount = hits.Length();
            //int i = (pageNo - 1) * pageLen;

            //while (i < recCount && result.Count < pageLen)
            //{
            //    TNews news = null;

            //    try
            //    {


            //        //// 高亮显示设置
            //        ////TermQuery tQuery = new TermQuery(new Term("contents", q));

            //        //SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");
            //        //Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
            //        ////关键内容显示大小设置
            //        //highlighter.SetTextFragmenter(new SimpleFragmenter(50));
            //        ////取出高亮显示内容
            //        //Lucene.Net.Analysis.KTDictSeg.KTDictSegAnalyzer analyzer = new Lucene.Net.Analysis.KTDictSeg.KTDictSegAnalyzer();
            //        //TokenStream tokenStream = analyzer.TokenStream("contents", new StringReader(news.Content));
            //        //news.Abstract = highlighter.GetBestFragment(tokenStream, news.Content);

            //    }
            //    catch (Exception e)
            //    {
            //        Console.WriteLine(e.Message);
            //    }
            //    finally
            //    {
            //        result.Add(news);
            //        i++;
            //    }
            //}

            //search.Close();
            //return result;
        }