示例#1
0
        /// <summary>
        /// 搜索可能喜欢的文档
        /// </summary>  
        /// <param name="pageNo"></param>
        /// <param name="pageLen"></param>
        /// <returns></returns>
        public ArrayList SearchMaybeLike(string title, int pageNo, int pageLen,int docId)
        {
            string connStr = Helper.ConfigHelper.HubbleConnStr;            
            DataSet ds;
            ArrayList rList = new ArrayList();
            using (HubbleConnection conn = new HubbleConnection(connStr))
            {
                #region hubble
                conn.Open();
                GetAnalyzerName(conn, TableName);
                string wordssplitbyspace;
                HubbleCommand matchCmd = new HubbleCommand(conn);

                string matchString = matchCmd.GetKeywordAnalyzerStringFromServer(
                    TableName, "Title", title, int.MaxValue, out wordssplitbyspace);

                HubbleDataAdapter da = new HubbleDataAdapter();
                da.SelectCommand = new HubbleCommand(
                    string.Format(
                        "select between @begin to @end * from {0} where IsAudit=1 and ( Description match @matchString or title^2 match @matchString or tags^2 match @matchString ) "
                        + " and DocumentId<>@docid order by score desc"
                        , TableName),
                        conn);
                da.SelectCommand.Parameters.Add("@begin", pageNo * pageLen);
                da.SelectCommand.Parameters.Add("@end", (pageNo + 1) * pageLen - 1);
                da.SelectCommand.Parameters.Add("@matchString", matchString);
                da.SelectCommand.Parameters.Add("@docid",docId);

                da.SelectCommand.CacheTimeout = 0;

                ds = new DataSet();
                HubbleCommand cmd = da.SelectCommand;
                ds = cmd.Query(0);

                long[] docids = new long[ds.Tables[0].Rows.Count];

                int i = 0;

                foreach (System.Data.DataRow row in ds.Tables[0].Rows)
                {
                    docids[i++] = (long)row["DocId"];
                }

                rList.AddRange(DataToList(ds, title, false));
                
                #endregion
            }
               
            
            return rList;
        }
示例#2
0
        private string descAnalyzerName; //描述分析器名称

        public ArrayList Search(string keyWords, int pageNo, int pageLen, out int recCount, out long elapsedMilliseconds)
        {
            string connStr = Helper.ConfigHelper.HubbleConnStr;
            System.Diagnostics.Stopwatch watch = new System.Diagnostics.Stopwatch();
            watch.Start();
            DataSet ds;
            using (HubbleConnection conn = new HubbleConnection(connStr))
            {
                #region hubble
                conn.Open();
                GetAnalyzerName(conn, TableName);
                string wordssplitbyspace;
                HubbleCommand matchCmd = new HubbleCommand(conn);
                string matchString = matchCmd.GetKeywordAnalyzerStringFromServer(
                    TableName, "Title", keyWords, int.MaxValue, out wordssplitbyspace);
                HubbleDataAdapter da = new HubbleDataAdapter();
                string sqlTemplate = string.Empty;
                if (string.IsNullOrEmpty(this.DocType))
                    sqlTemplate = "select between @begin to @end * from {0} where IsAudit=1 and ( Description match @matchString or title^2 match @matchString or tags^2 match @matchString ) order by score desc";
                else
                    sqlTemplate = "select between @begin to @end * from {0} where IsAudit=1 and DocType='" + this.DocType + "' and ( Description match @matchString or title^2 match @matchString or tags^2 match @matchString ) order by score desc";
                
                da.SelectCommand = new HubbleCommand( string.Format(sqlTemplate,TableName),conn);
                da.SelectCommand.Parameters.Add("@begin",pageNo * pageLen);
                da.SelectCommand.Parameters.Add("@end", (pageNo+1) * pageLen - 1);
                da.SelectCommand.Parameters.Add("@matchString", matchString);
                

                da.SelectCommand.CacheTimeout = 0;

                ds = new DataSet();
                HubbleCommand cmd = da.SelectCommand;
                ds = cmd.Query(0);

                long[] docids = new long[ds.Tables[0].Rows.Count];

                int i = 0;

                //foreach (System.Data.DataRow row in ds.Tables[0].Rows)
                //{
                //    docids[i++] = (long)row["DocumentId"];
                //}
                #endregion
            }
            recCount = ds.Tables[0].MinimumCapacity;
            ArrayList rList = DataToList(ds, keyWords,true);
            watch.Stop();
            elapsedMilliseconds = watch.ElapsedMilliseconds;
            return rList;
        }
示例#3
0
 /// <summary>
 /// 获取格式化后的关键字
 /// eg. '要出发旅行网' --> '要出发^rank^0 旅行网^rank^1'
 /// 其中,rank为分词后词性的等级,position为词的位置,为计分提供参数
 /// </summary>
 /// <param name="keyWords">搜索的关键字</param>
 /// <param name="tableName">搜索Hubble中对应的表(索引)</param>
 /// <param name="fieldName">被搜索的字段(如果是多字段搜索,只放其中一个字段)</param>
 /// <returns></returns>
 public string GetKeywordAnalyzerStringFromat(string keyWords, string tableName, string fieldName)
 {
     HubbleCommand matchCmd = CreateHubbleCommand();
     string wordssplitbyspace;
     string matchString;
     try
     {
         matchString = matchCmd.GetKeywordAnalyzerStringFromServer(tableName,
               fieldName, keyWords, int.MaxValue, out wordssplitbyspace);
     }
     catch (Exception ex)
     {
         throw new Exception(ex.Message);
     }
     return matchString;
 }
示例#4
0
文件: Index.cs 项目: qiuliang/tumumi
        /// <summary>
        /// 搜索可能喜欢的文档
        /// </summary>  
        /// <param name="pageNo"></param>
        /// <param name="pageLen"></param>
        /// <returns></returns>
        public ArrayList SearchMaybeLike(string title, int pageNo, int pageLen,int docId)
        {
            string connStr = Helper.ConfigHelper.HubbleConnStr;
            DataSet ds;
            ArrayList rList = new ArrayList();
            using (HubbleConnection conn = new HubbleConnection(connStr))
            {
                #region hubble
                conn.Open();
                GetAnalyzerName(conn, TableName);
                string wordssplitbyspace;
                HubbleCommand matchCmd = new HubbleCommand(conn);

                string matchString = matchCmd.GetKeywordAnalyzerStringFromServer(
                    TableName, "Title", title, int.MaxValue, out wordssplitbyspace);

                HubbleDataAdapter da = new HubbleDataAdapter();
                da.SelectCommand = new HubbleCommand(
                    string.Format(
                        "select between @begin to @end * from {0} where IsAudit=1 and ( Description match @matchString or title^2 match @matchString or tags^2 match @matchString ) "
                        + " and DocumentId<>@docid order by score desc"
                        , TableName),
                        conn);
                da.SelectCommand.Parameters.Add("@begin", pageNo * pageLen);
                da.SelectCommand.Parameters.Add("@end", (pageNo + 1) * pageLen - 1);
                da.SelectCommand.Parameters.Add("@matchString", matchString);
                da.SelectCommand.Parameters.Add("@docid",docId);

                da.SelectCommand.CacheTimeout = 0;

                ds = new DataSet();
                HubbleCommand cmd = da.SelectCommand;
                ds = cmd.Query(0);

                long[] docids = new long[ds.Tables[0].Rows.Count];

                int i = 0;

                foreach (System.Data.DataRow row in ds.Tables[0].Rows)
                {
                    docids[i++] = (long)row["DocId"];
                }

                rList.AddRange(DataToList(ds, title, false));

                #endregion
            }

            return rList;
        }
示例#5
0
文件: Index.cs 项目: qiuliang/tumumi
        public ArrayList Search(string keyWords, int pageNo, int pageLen, out int recCount, out long elapsedMilliseconds)
        {
            string connStr = Helper.ConfigHelper.HubbleConnStr;
            System.Diagnostics.Stopwatch watch = new System.Diagnostics.Stopwatch();
            watch.Start();
            DataSet ds;
            using (HubbleConnection conn = new HubbleConnection(connStr))
            {
                #region hubble
                conn.Open();
                GetAnalyzerName(conn, TableName);
                string wordssplitbyspace;
                HubbleCommand matchCmd = new HubbleCommand(conn);
                string matchString = matchCmd.GetKeywordAnalyzerStringFromServer(
                    TableName, "Title", keyWords, int.MaxValue, out wordssplitbyspace);
                HubbleDataAdapter da = new HubbleDataAdapter();
                string sqlTemplate = string.Empty;
                if (string.IsNullOrEmpty(this.DocType))
                    sqlTemplate = "select between @begin to @end * from {0} where IsAudit=1 and ( Description match @matchString or title^2 match @matchString or tags^2 match @matchString ) order by score desc";
                else
                    sqlTemplate = "select between @begin to @end * from {0} where IsAudit=1 and DocType='" + this.DocType + "' and ( Description match @matchString or title^2 match @matchString or tags^2 match @matchString ) order by score desc";

                da.SelectCommand = new HubbleCommand( string.Format(sqlTemplate,TableName),conn);
                da.SelectCommand.Parameters.Add("@begin",pageNo * pageLen);
                da.SelectCommand.Parameters.Add("@end", (pageNo+1) * pageLen - 1);
                da.SelectCommand.Parameters.Add("@matchString", matchString);

                da.SelectCommand.CacheTimeout = 0;

                ds = new DataSet();
                HubbleCommand cmd = da.SelectCommand;
                ds = cmd.Query(0);

                long[] docids = new long[ds.Tables[0].Rows.Count];

                int i = 0;

                //foreach (System.Data.DataRow row in ds.Tables[0].Rows)
                //{
                //    docids[i++] = (long)row["DocumentId"];
                //}
                #endregion
            }
            recCount = ds.Tables[0].MinimumCapacity;
            ArrayList rList = DataToList(ds, keyWords,true);
            watch.Stop();
            elapsedMilliseconds = watch.ElapsedMilliseconds;
            return rList;
        }
示例#6
0
文件: Index.cs 项目: wj60387/hubble
        public static List <TNews> Search(String indexDir, string searchType, String q, int pageLen, int pageNo, string sortBy,
                                          out int recCount, out long elapsedMilliseconds, out string sql)
        {
            List <TNews> result = new List <TNews>();

            string keywords = q;

            //string matchString = GetKeyWordsSplit(q, new PanGuAnalyzer(), out wordssplitbyspace);

            System.Configuration.ConnectionStringSettings connString =
                System.Web.Configuration.WebConfigurationManager.ConnectionStrings["News"];

            System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();

            string connectString = connString.ConnectionString;

            System.Data.DataSet ds;
            //System.Data.DataTable titleWordsPositions;
            //System.Data.DataTable contentWordsPositions;

            sw.Start();

            using (HubbleAsyncConnection conn = new HubbleAsyncConnection(connectString))
            {
                conn.Open();

                GetAnalyzerName(conn, "News");

                if (string.IsNullOrEmpty(sortBy))
                {
                    sortBy = "score";
                }

                string wordssplitbyspace;

                HubbleCommand matchCmd = new HubbleCommand(conn);

                string matchString = matchCmd.GetKeywordAnalyzerStringFromServer("News",
                                                                                 "Content", keywords, int.MaxValue, out wordssplitbyspace);

                //HubbleCommand cmd = new HubbleCommand("select between {0} to {1} * from News where content match {2} or title^2 match {2} order by " + sortBy,
                //    conn, (pageNo - 1) * pageLen, pageNo * pageLen - 1, matchString);


                HubbleDataAdapter adapter = new HubbleDataAdapter();

                if (searchType == "Precise")
                {
                    adapter.SelectCommand = new HubbleCommand("select between @begin to @end * from News where content contains @matchString or title^2 contains @matchString order by " + sortBy,
                                                              conn);
                }
                else if (searchType == "Fuzzy")
                {
                    adapter.SelectCommand = new HubbleCommand("select between @begin to @end * from News where content match @matchString or title^2 match @matchString order by " + sortBy,
                                                              conn);
                }
                else if (searchType == "Like")
                {
                    adapter.SelectCommand = new HubbleCommand("select between @begin to @end * from News where content like @likeString or title^2 like @likeString order by " + sortBy,
                                                              conn);
                }
                else
                {
                    throw new ArgumentException(string.Format("Invalid search type: {0}", searchType));
                }


                adapter.SelectCommand.Parameters.Add("@begin", (pageNo - 1) * pageLen);
                adapter.SelectCommand.Parameters.Add("@end", pageNo * pageLen - 1);
                adapter.SelectCommand.Parameters.Add("@matchString", matchString);
                adapter.SelectCommand.Parameters.Add("@likeString", "*" + q.Trim() + "*");

                adapter.SelectCommand.CacheTimeout = CacheTimeout;

                sql = adapter.SelectCommand.Sql;

                ds = new System.Data.DataSet();
                //adapter.Fill(ds);

                HubbleCommand cmd = adapter.SelectCommand;

                ds = cmd.Query(CacheTimeout);

                long[] docids = new long[ds.Tables[0].Rows.Count];

                int i = 0;

                foreach (System.Data.DataRow row in ds.Tables[0].Rows)
                {
                    docids[i++] = (long)row["DocId"];
                }

                //titleWordsPositions = cmd.GetWordsPositions(wordssplitbyspace, "News", "Title", docids, int.MaxValue);
                //contentWordsPositions = cmd.GetWordsPositions(wordssplitbyspace, "News", "Content", docids, int.MaxValue);
            }

            recCount = ds.Tables[0].MinimumCapacity;

            foreach (System.Data.DataRow row in ds.Tables[0].Rows)
            {
                TNews news = new TNews();
                news.Title   = row["Title"].ToString();
                news.Content = row["Content"].ToString();
                news.Url     = row["Url"].ToString();
                news.Time    = (DateTime)row["Time"];

                SimpleHTMLFormatter simpleHTMLFormatter =
                    new SimpleHTMLFormatter("<font color=\"red\">", "</font>");

                Highlighter titleHighlighter;
                Highlighter contentHighlighter;

                if (_TitleAnalyzerName.Equals("PanGuSegment", StringComparison.CurrentCultureIgnoreCase))
                {
                    titleHighlighter =
                        new Highlighter(simpleHTMLFormatter, new PanGuAnalyzer());
                }
                else if (_TitleAnalyzerName.Equals("EnglishAnalyzer", StringComparison.CurrentCultureIgnoreCase))
                {
                    titleHighlighter = new Highlighter(simpleHTMLFormatter, new Hubble.Core.Analysis.EnglishAnalyzer());
                }
                else
                {
                    titleHighlighter = new Highlighter(simpleHTMLFormatter, new Hubble.Core.Analysis.SimpleAnalyzer());
                }

                if (_ContentAnalyzerName.Equals("PanGuSegment", StringComparison.CurrentCultureIgnoreCase))
                {
                    contentHighlighter =
                        new Highlighter(simpleHTMLFormatter, new PanGuAnalyzer());
                }
                else if (_ContentAnalyzerName.Equals("EnglishAnalyzer", StringComparison.CurrentCultureIgnoreCase))
                {
                    contentHighlighter = new Highlighter(simpleHTMLFormatter, new Hubble.Core.Analysis.EnglishAnalyzer());
                }
                else
                {
                    contentHighlighter = new Highlighter(simpleHTMLFormatter, new Hubble.Core.Analysis.SimpleAnalyzer());
                }

                titleHighlighter.FragmentSize   = 50;
                contentHighlighter.FragmentSize = 50;

                //news.Abstract = highlighter.GetBestFragment(contentWordsPositions, news.Content, (long)row["DocId"]);
                //news.TitleHighLighter = highlighter.GetBestFragment(titleWordsPositions, news.Title, (long)row["DocId"]);

                news.Abstract         = contentHighlighter.GetBestFragment(keywords, news.Content);
                news.TitleHighLighter = titleHighlighter.GetBestFragment(keywords, news.Title);
                if (string.IsNullOrEmpty(news.TitleHighLighter))
                {
                    news.TitleHighLighter = news.Title;
                }

                result.Add(news);
            }

            sw.Stop();
            elapsedMilliseconds = sw.ElapsedMilliseconds;

            return(result);


            //QueryParser queryParser = new QueryParser("contents", new PanGuAnalyzer(true));
            //Query query = queryParser.Parse(q);

            //QueryParser titleQueryParser = new QueryParser("title", new PanGuAnalyzer(true));
            //Query titleQuery = titleQueryParser.Parse(q);

            //BooleanQuery bq = new BooleanQuery();
            //bq.Add(query, BooleanClause.Occur.SHOULD);
            //bq.Add(titleQuery, BooleanClause.Occur.SHOULD);

            //Hits hits = search.Search(bq);

            //List<TNews> result = new List<TNews>();

            //recCount = hits.Length();
            //int i = (pageNo - 1) * pageLen;

            //while (i < recCount && result.Count < pageLen)
            //{
            //    TNews news = null;

            //    try
            //    {


            //        //// 高亮显示设置
            //        ////TermQuery tQuery = new TermQuery(new Term("contents", q));

            //        //SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");
            //        //Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
            //        ////关键内容显示大小设置
            //        //highlighter.SetTextFragmenter(new SimpleFragmenter(50));
            //        ////取出高亮显示内容
            //        //Lucene.Net.Analysis.KTDictSeg.KTDictSegAnalyzer analyzer = new Lucene.Net.Analysis.KTDictSeg.KTDictSegAnalyzer();
            //        //TokenStream tokenStream = analyzer.TokenStream("contents", new StringReader(news.Content));
            //        //news.Abstract = highlighter.GetBestFragment(tokenStream, news.Content);

            //    }
            //    catch (Exception e)
            //    {
            //        Console.WriteLine(e.Message);
            //    }
            //    finally
            //    {
            //        result.Add(news);
            //        i++;
            //    }
            //}

            //search.Close();
            //return result;
        }