/// <summary> /// 搜索可能喜欢的文档 /// </summary> /// <param name="pageNo"></param> /// <param name="pageLen"></param> /// <returns></returns> public ArrayList SearchMaybeLike(string title, int pageNo, int pageLen,int docId) { string connStr = Helper.ConfigHelper.HubbleConnStr; DataSet ds; ArrayList rList = new ArrayList(); using (HubbleConnection conn = new HubbleConnection(connStr)) { #region hubble conn.Open(); GetAnalyzerName(conn, TableName); string wordssplitbyspace; HubbleCommand matchCmd = new HubbleCommand(conn); string matchString = matchCmd.GetKeywordAnalyzerStringFromServer( TableName, "Title", title, int.MaxValue, out wordssplitbyspace); HubbleDataAdapter da = new HubbleDataAdapter(); da.SelectCommand = new HubbleCommand( string.Format( "select between @begin to @end * from {0} where IsAudit=1 and ( Description match @matchString or title^2 match @matchString or tags^2 match @matchString ) " + " and DocumentId<>@docid order by score desc" , TableName), conn); da.SelectCommand.Parameters.Add("@begin", pageNo * pageLen); da.SelectCommand.Parameters.Add("@end", (pageNo + 1) * pageLen - 1); da.SelectCommand.Parameters.Add("@matchString", matchString); da.SelectCommand.Parameters.Add("@docid",docId); da.SelectCommand.CacheTimeout = 0; ds = new DataSet(); HubbleCommand cmd = da.SelectCommand; ds = cmd.Query(0); long[] docids = new long[ds.Tables[0].Rows.Count]; int i = 0; foreach (System.Data.DataRow row in ds.Tables[0].Rows) { docids[i++] = (long)row["DocId"]; } rList.AddRange(DataToList(ds, title, false)); #endregion } return rList; }
private string descAnalyzerName; //描述分析器名称 public ArrayList Search(string keyWords, int pageNo, int pageLen, out int recCount, out long elapsedMilliseconds) { string connStr = Helper.ConfigHelper.HubbleConnStr; System.Diagnostics.Stopwatch watch = new System.Diagnostics.Stopwatch(); watch.Start(); DataSet ds; using (HubbleConnection conn = new HubbleConnection(connStr)) { #region hubble conn.Open(); GetAnalyzerName(conn, TableName); string wordssplitbyspace; HubbleCommand matchCmd = new HubbleCommand(conn); string matchString = matchCmd.GetKeywordAnalyzerStringFromServer( TableName, "Title", keyWords, int.MaxValue, out wordssplitbyspace); HubbleDataAdapter da = new HubbleDataAdapter(); string sqlTemplate = string.Empty; if (string.IsNullOrEmpty(this.DocType)) sqlTemplate = "select between @begin to @end * from {0} where IsAudit=1 and ( Description match @matchString or title^2 match @matchString or tags^2 match @matchString ) order by score desc"; else sqlTemplate = "select between @begin to @end * from {0} where IsAudit=1 and DocType='" + this.DocType + "' and ( Description match @matchString or title^2 match @matchString or tags^2 match @matchString ) order by score desc"; da.SelectCommand = new HubbleCommand( string.Format(sqlTemplate,TableName),conn); da.SelectCommand.Parameters.Add("@begin",pageNo * pageLen); da.SelectCommand.Parameters.Add("@end", (pageNo+1) * pageLen - 1); da.SelectCommand.Parameters.Add("@matchString", matchString); da.SelectCommand.CacheTimeout = 0; ds = new DataSet(); HubbleCommand cmd = da.SelectCommand; ds = cmd.Query(0); long[] docids = new long[ds.Tables[0].Rows.Count]; int i = 0; //foreach (System.Data.DataRow row in ds.Tables[0].Rows) //{ // docids[i++] = (long)row["DocumentId"]; //} #endregion } recCount = ds.Tables[0].MinimumCapacity; ArrayList rList = DataToList(ds, keyWords,true); watch.Stop(); elapsedMilliseconds = watch.ElapsedMilliseconds; return rList; }
/// <summary> /// 获取格式化后的关键字 /// eg. '要出发旅行网' --> '要出发^rank^0 旅行网^rank^1' /// 其中,rank为分词后词性的等级,position为词的位置,为计分提供参数 /// </summary> /// <param name="keyWords">搜索的关键字</param> /// <param name="tableName">搜索Hubble中对应的表(索引)</param> /// <param name="fieldName">被搜索的字段(如果是多字段搜索,只放其中一个字段)</param> /// <returns></returns> public string GetKeywordAnalyzerStringFromat(string keyWords, string tableName, string fieldName) { HubbleCommand matchCmd = CreateHubbleCommand(); string wordssplitbyspace; string matchString; try { matchString = matchCmd.GetKeywordAnalyzerStringFromServer(tableName, fieldName, keyWords, int.MaxValue, out wordssplitbyspace); } catch (Exception ex) { throw new Exception(ex.Message); } return matchString; }
public ArrayList Search(string keyWords, int pageNo, int pageLen, out int recCount, out long elapsedMilliseconds) { string connStr = Helper.ConfigHelper.HubbleConnStr; System.Diagnostics.Stopwatch watch = new System.Diagnostics.Stopwatch(); watch.Start(); DataSet ds; using (HubbleConnection conn = new HubbleConnection(connStr)) { #region hubble conn.Open(); GetAnalyzerName(conn, TableName); string wordssplitbyspace; HubbleCommand matchCmd = new HubbleCommand(conn); string matchString = matchCmd.GetKeywordAnalyzerStringFromServer( TableName, "Title", keyWords, int.MaxValue, out wordssplitbyspace); HubbleDataAdapter da = new HubbleDataAdapter(); string sqlTemplate = string.Empty; if (string.IsNullOrEmpty(this.DocType)) sqlTemplate = "select between @begin to @end * from {0} where IsAudit=1 and ( Description match @matchString or title^2 match @matchString or tags^2 match @matchString ) order by score desc"; else sqlTemplate = "select between @begin to @end * from {0} where IsAudit=1 and DocType='" + this.DocType + "' and ( Description match @matchString or title^2 match @matchString or tags^2 match @matchString ) order by score desc"; da.SelectCommand = new HubbleCommand( string.Format(sqlTemplate,TableName),conn); da.SelectCommand.Parameters.Add("@begin",pageNo * pageLen); da.SelectCommand.Parameters.Add("@end", (pageNo+1) * pageLen - 1); da.SelectCommand.Parameters.Add("@matchString", matchString); da.SelectCommand.CacheTimeout = 0; ds = new DataSet(); HubbleCommand cmd = da.SelectCommand; ds = cmd.Query(0); long[] docids = new long[ds.Tables[0].Rows.Count]; int i = 0; //foreach (System.Data.DataRow row in ds.Tables[0].Rows) //{ // docids[i++] = (long)row["DocumentId"]; //} #endregion } recCount = ds.Tables[0].MinimumCapacity; ArrayList rList = DataToList(ds, keyWords,true); watch.Stop(); elapsedMilliseconds = watch.ElapsedMilliseconds; return rList; }
public static List <TNews> Search(String indexDir, string searchType, String q, int pageLen, int pageNo, string sortBy, out int recCount, out long elapsedMilliseconds, out string sql) { List <TNews> result = new List <TNews>(); string keywords = q; //string matchString = GetKeyWordsSplit(q, new PanGuAnalyzer(), out wordssplitbyspace); System.Configuration.ConnectionStringSettings connString = System.Web.Configuration.WebConfigurationManager.ConnectionStrings["News"]; System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch(); string connectString = connString.ConnectionString; System.Data.DataSet ds; //System.Data.DataTable titleWordsPositions; //System.Data.DataTable contentWordsPositions; sw.Start(); using (HubbleAsyncConnection conn = new HubbleAsyncConnection(connectString)) { conn.Open(); GetAnalyzerName(conn, "News"); if (string.IsNullOrEmpty(sortBy)) { sortBy = "score"; } string wordssplitbyspace; HubbleCommand matchCmd = new HubbleCommand(conn); string matchString = matchCmd.GetKeywordAnalyzerStringFromServer("News", "Content", keywords, int.MaxValue, out wordssplitbyspace); //HubbleCommand cmd = new HubbleCommand("select between {0} to {1} * from News where content match {2} or title^2 match {2} order by " + sortBy, // conn, (pageNo - 1) * pageLen, pageNo * pageLen - 1, matchString); HubbleDataAdapter adapter = new HubbleDataAdapter(); if (searchType == "Precise") { adapter.SelectCommand = new HubbleCommand("select between @begin to @end * from News where content contains @matchString or title^2 contains @matchString order by " + sortBy, conn); } else if (searchType == "Fuzzy") { adapter.SelectCommand = new HubbleCommand("select between @begin to @end * from News where content match @matchString or title^2 match @matchString order by " + sortBy, conn); } else if (searchType == "Like") { adapter.SelectCommand = new HubbleCommand("select between @begin to @end * from News where content like @likeString or title^2 like @likeString order by " + sortBy, conn); } else { throw new ArgumentException(string.Format("Invalid search type: {0}", searchType)); } adapter.SelectCommand.Parameters.Add("@begin", (pageNo - 1) * pageLen); adapter.SelectCommand.Parameters.Add("@end", pageNo * pageLen - 1); adapter.SelectCommand.Parameters.Add("@matchString", matchString); adapter.SelectCommand.Parameters.Add("@likeString", "*" + q.Trim() + "*"); adapter.SelectCommand.CacheTimeout = CacheTimeout; sql = adapter.SelectCommand.Sql; ds = new System.Data.DataSet(); //adapter.Fill(ds); HubbleCommand cmd = adapter.SelectCommand; ds = cmd.Query(CacheTimeout); long[] docids = new long[ds.Tables[0].Rows.Count]; int i = 0; foreach (System.Data.DataRow row in ds.Tables[0].Rows) { docids[i++] = (long)row["DocId"]; } //titleWordsPositions = cmd.GetWordsPositions(wordssplitbyspace, "News", "Title", docids, int.MaxValue); //contentWordsPositions = cmd.GetWordsPositions(wordssplitbyspace, "News", "Content", docids, int.MaxValue); } recCount = ds.Tables[0].MinimumCapacity; foreach (System.Data.DataRow row in ds.Tables[0].Rows) { TNews news = new TNews(); news.Title = row["Title"].ToString(); news.Content = row["Content"].ToString(); news.Url = row["Url"].ToString(); news.Time = (DateTime)row["Time"]; SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>"); Highlighter titleHighlighter; Highlighter contentHighlighter; if (_TitleAnalyzerName.Equals("PanGuSegment", StringComparison.CurrentCultureIgnoreCase)) { titleHighlighter = new Highlighter(simpleHTMLFormatter, new PanGuAnalyzer()); } else if (_TitleAnalyzerName.Equals("EnglishAnalyzer", StringComparison.CurrentCultureIgnoreCase)) { titleHighlighter = new Highlighter(simpleHTMLFormatter, new Hubble.Core.Analysis.EnglishAnalyzer()); } else { titleHighlighter = new Highlighter(simpleHTMLFormatter, new Hubble.Core.Analysis.SimpleAnalyzer()); } if (_ContentAnalyzerName.Equals("PanGuSegment", StringComparison.CurrentCultureIgnoreCase)) { contentHighlighter = new Highlighter(simpleHTMLFormatter, new PanGuAnalyzer()); } else if (_ContentAnalyzerName.Equals("EnglishAnalyzer", StringComparison.CurrentCultureIgnoreCase)) { contentHighlighter = new Highlighter(simpleHTMLFormatter, new Hubble.Core.Analysis.EnglishAnalyzer()); } else { contentHighlighter = new Highlighter(simpleHTMLFormatter, new Hubble.Core.Analysis.SimpleAnalyzer()); } titleHighlighter.FragmentSize = 50; contentHighlighter.FragmentSize = 50; //news.Abstract = highlighter.GetBestFragment(contentWordsPositions, news.Content, (long)row["DocId"]); //news.TitleHighLighter = highlighter.GetBestFragment(titleWordsPositions, news.Title, (long)row["DocId"]); news.Abstract = contentHighlighter.GetBestFragment(keywords, news.Content); news.TitleHighLighter = titleHighlighter.GetBestFragment(keywords, news.Title); if (string.IsNullOrEmpty(news.TitleHighLighter)) { news.TitleHighLighter = news.Title; } result.Add(news); } sw.Stop(); elapsedMilliseconds = sw.ElapsedMilliseconds; return(result); //QueryParser queryParser = new QueryParser("contents", new PanGuAnalyzer(true)); //Query query = queryParser.Parse(q); //QueryParser titleQueryParser = new QueryParser("title", new PanGuAnalyzer(true)); //Query titleQuery = titleQueryParser.Parse(q); //BooleanQuery bq = new BooleanQuery(); //bq.Add(query, BooleanClause.Occur.SHOULD); //bq.Add(titleQuery, BooleanClause.Occur.SHOULD); //Hits hits = search.Search(bq); //List<TNews> result = new List<TNews>(); //recCount = hits.Length(); //int i = (pageNo - 1) * pageLen; //while (i < recCount && result.Count < pageLen) //{ // TNews news = null; // try // { // //// 高亮显示设置 // ////TermQuery tQuery = new TermQuery(new Term("contents", q)); // //SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>"); // //Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); // ////关键内容显示大小设置 // //highlighter.SetTextFragmenter(new SimpleFragmenter(50)); // ////取出高亮显示内容 // //Lucene.Net.Analysis.KTDictSeg.KTDictSegAnalyzer analyzer = new Lucene.Net.Analysis.KTDictSeg.KTDictSegAnalyzer(); // //TokenStream tokenStream = analyzer.TokenStream("contents", new StringReader(news.Content)); // //news.Abstract = highlighter.GetBestFragment(tokenStream, news.Content); // } // catch (Exception e) // { // Console.WriteLine(e.Message); // } // finally // { // result.Add(news); // i++; // } //} //search.Close(); //return result; }