/// <summary> /// 检索结果 /// </summary> /// <param name="query">检索词</param> /// <param name="page">分页号</param> /// <param name="filter">来源/等级不同过滤</param> /// <param name="filetype">类型过滤</param> /// <param name="num">返回结果数目</param> /// <param name="XapResList">返回检索结果</param> /// <param name="ts">返回检索时间</param> public void SearchReturn(string query, int page, int filter, string filetype, out uint num, out List <SearchResult> XapResList, out TimeSpan ts) { DateTime DateTimestart = DateTime.Now; DateTime DateTimeend; Xapian.MSet xm; XapResList = new List <SearchResult>(); { query = query.Replace("\\", ""); query = query.Replace("/", ""); } string querystr = _cs.JiebaSegnotSearch(query); //分词 if (!Directory.Exists(localdb)) { Directory.CreateDirectory(localdb); } if (filetype == "1980/01/01") { xm = searchforpush(query, filter, filetype); //检索 } else { xm = search(querystr, filter, filetype); //检索 } //若返回不为空 if (xm != null) { num = xm.Size(); //结果数目 int pagecount = 0; for (Xapian.MSetIterator iter = xm.Begin(); iter != xm.End(); ++iter) { SearchResult sr = new SearchResult(); ++pagecount; if (pagecount <= ((page - 1) * 10)) //获得分页 { continue; } else { if (XapResList.Count >= 10) //每页10个结果 { break; } Xapian.Document iterdoc = iter.GetDocument(); bool ftpflag = false; //ftp标记,转码用 bool emflag = false; string strcontent = iterdoc.GetData(); //取出正文 string strtitle = iterdoc.GetValue(3); //取出标题 ValueTitle string strahref = iterdoc.GetValue(1); //取出链接 string source = iterdoc.GetValue(0); string strcut = ""; int contentlen = strcontent.Length; //判断正文长度,为下面筛选含有关键词片段做准备 uint docid = iter.GetDocId(); if (source == "4") { sr.allcontent = strcontent; } if (source == "2") { ftpflag = true; strahref = UrlEncode(strahref); //若为ftp链接,需要转码 } string[] strquerycut = querystr.Split(' '); string emlink = ""; List <string> tmp = new List <string>(); foreach (var item in strquerycut) { if (item == "e" || item == "E" || item == "m" || item == "M" || item == "em" || item == "Em" || item == "Em" || item == "EM" || item == "<" || item == ">") { emflag = true; if (emlink != "") { emlink = emlink + "|" + item; } else { emlink = item; } } else { tmp.Add(item); } } HashSet <string> hs = new HashSet <string>(tmp); //此时已经去掉重复的数据保存在hashset中 String[] strunique = new String[hs.Count]; hs.CopyTo(strunique); int cutlen = strunique.Length; int count = 0; if (emlink != "" && cutlen == 0) { foreach (var item in strquerycut) { //消掉*问号空格 if (item == " " || item == "") { continue; } CompareInfo Compare = CultureInfo.InvariantCulture.CompareInfo; int conpos = Compare.IndexOf(strcontent, item, CompareOptions.IgnoreCase); //根据位置标红 //int conpos = strcontent.IndexOf(item); //根据位置标红 if (conpos != -1) { if (contentlen - conpos > 150 && conpos > 50) { //截取150字作为cache strcut = strcontent.Substring(conpos - 50, 200); if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } strcut = "..." + strcut + "..."; goto Finally; } else if (conpos > 50) { ////截取150字作为cache strcut = strcontent.Substring(conpos - 50, contentlen - conpos + 50); if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } strcut = "..." + strcut + "..."; goto Finally; } else if (contentlen - conpos > 150) { //截取150字作为cache strcut = strcontent.Substring(0, conpos + 150); if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } strcut = "..." + strcut + "..."; goto Finally; } else { //strcut = HttpUtility.HtmlEncode(strcut); //不够150的全拿出 strcut = strcontent; if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } strcut = "..." + strcut + "..."; goto Finally; } } else { CompareInfo Comparetitle = CultureInfo.InvariantCulture.CompareInfo; int conpostitle = Comparetitle.IndexOf(strtitle, item, CompareOptions.IgnoreCase); //根据位置标红 if (conpostitle != -1) { if (contentlen > 200) { strcut = strcontent.Substring(0, 200); if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } strcut = "..." + strcut + "..."; goto Finally; } else { strcut = strcontent; if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } strcut = "..." + strcut + "..."; goto Finally; } } else { ++count; } } } } else { //每一个词都查一遍 foreach (var item in strunique) { //消掉*问号空格 if (item == " " || item == "") { continue; } CompareInfo Compare = CultureInfo.InvariantCulture.CompareInfo; int conpos = Compare.IndexOf(strcontent, item, CompareOptions.IgnoreCase); //根据位置标红 //int conpos = strcontent.IndexOf(item); //根据位置标红 if (conpos != -1) { if (contentlen - conpos > 150 && conpos > 50) { //截取150字作为cache strcut = strcontent.Substring(conpos - 50, 200); if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } //strcut = HttpUtility.HtmlEncode(strcut); for (; count < cutlen; count++) { if (strunique[count] == " " || strunique[count] == "") { continue; } //标红,大小写不敏感,regex替换法,replace大小写敏感 strtitle = Regex.Replace(strtitle, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); //strtitle = strtitle.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); //strcut = strcut.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); strcut = Regex.Replace(strcut, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); } strcut = "..." + strcut + "..."; goto Finally; } else if (conpos > 50) { ////截取150字作为cache strcut = strcontent.Substring(conpos - 50, contentlen - conpos + 50); if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } //strcut = HttpUtility.HtmlEncode(strcut); for (; count < cutlen; count++) { if (strunique[count] == " " || strunique[count] == "") { continue; } //标红 strtitle = Regex.Replace(strtitle, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); //strtitle = strtitle.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); //strcut = strcut.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); strcut = Regex.Replace(strcut, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); } strcut = "..." + strcut + "..."; goto Finally; } else if (contentlen - conpos > 150) { //截取150字作为cache strcut = strcontent.Substring(0, conpos + 150); if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } //strcut = HttpUtility.HtmlEncode(strcut); for (; count < cutlen; count++) { if (strunique[count] == " " || strunique[count] == "") { continue; } //标红 strtitle = Regex.Replace(strtitle, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); //strtitle = strtitle.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); //strcut = strcut.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); strcut = Regex.Replace(strcut, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); } strcut = "..." + strcut + "..."; goto Finally; } else { //strcut = HttpUtility.HtmlEncode(strcut); //不够150的全拿出 strcut = strcontent; if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } for (; count < cutlen; count++) { if (strunique[count] == " " || strunique[count] == "") { continue; } //标红 strtitle = Regex.Replace(strtitle, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); //strtitle = strtitle.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); //strcut = strcut.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); strcut = Regex.Replace(strcut, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); } strcut = "..." + strcut + "..."; goto Finally; } } else { CompareInfo Comparetitle = CultureInfo.InvariantCulture.CompareInfo; int conpostitle = Comparetitle.IndexOf(strtitle, item, CompareOptions.IgnoreCase); //根据位置标红 if (conpostitle != -1) { if (contentlen > 200) { strcut = strcontent.Substring(0, 200); if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } //strcut = HttpUtility.HtmlEncode(strcut); for (; count < cutlen; count++) { if (strunique[count] == " " || strunique[count] == "") { continue; } //标红 strtitle = Regex.Replace(strtitle, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); //strtitle = strtitle.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); //strcut = strcut.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); strcut = Regex.Replace(strcut, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); } strcut = "..." + strcut + "..."; goto Finally; } else { strcut = strcontent; if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } //strcut = HttpUtility.HtmlEncode(strcut); for (; count < cutlen; count++) { if (strunique[count] == " " || strunique[count] == "") { continue; } //标红 strtitle = Regex.Replace(strtitle, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); //strtitle = strtitle.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); //strcut = strcut.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); strcut = Regex.Replace(strcut, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); } strcut = "..." + strcut + "..."; goto Finally; } } else { ++count; } } } } //找到合适的内容之后返回结果 Finally: sr.ahref = iterdoc.GetValue(1); if (ftpflag) //判断是否需要转码链接 { sr.ahrefencode = strahref; //ftp则使用转码链接 } else { sr.ahrefencode = sr.ahref; } sr.link = iterdoc.GetValue(2); sr.title = strtitle; sr.snippet = strcut; XapResList.Add(sr); } } } else { num = 0; } DateTimeend = DateTime.Now; ts = DateTimeend - DateTimestart; ts.TotalMilliseconds.ToString(); //查询时间返回 }
private void SearchReturn(Xapian.MSet xm, SearchInfo Searchwords, out AnsInfo ai) { ai = new AnsInfo(); List <SearchResult> XapResList = new List <SearchResult>(); string query = Searchwords.SearchString; query = query.Replace("\\", ""); query = query.Replace("/", ""); int page = Searchwords.page; var segmenter = new JiebaSegmenter(); var segments = segmenter.Cut(query); string querystr = string.Join(" ", segments); //分词 //若返回不为空 if (xm != null) { ai.totalnum = xm.Size(); //结果数目 int pagecount = 0; for (Xapian.MSetIterator iter = xm.Begin(); iter != xm.End(); ++iter) { SearchResult sr = new SearchResult(); ++pagecount; if (pagecount <= ((page - 1) * 10)) //获得分页 { continue; } else { if (XapResList.Count >= 10) //每页10个结果 { break; } Xapian.Document iterdoc = iter.GetDocument(); bool ftpflag = false; //ftp标记,转码用 bool emflag = false; string strcontent = iterdoc.GetData(); //取出正文 string strtitle = iterdoc.GetValue(3); //取出标题 ValueTitle string strahref = iterdoc.GetValue(1); //取出链接 string source = iterdoc.GetValue(0); string strcut = ""; int contentlen = strcontent.Length; //判断正文长度,为下面筛选含有关键词片段做准备 uint docid = iter.GetDocId(); if (source == "4") { sr.allcontent = strcontent; } if (source == "2") { ftpflag = true; strahref = UrlEncode(strahref); //若为ftp链接,需要转码 } string[] strquerycut = querystr.Split(' '); string emlink = ""; List <string> tmp = new List <string>(); foreach (var item in strquerycut) { if (item == "e" || item == "E" || item == "m" || item == "M" || item == "em" || item == "Em" || item == "Em" || item == "EM" || item == "<" || item == ">") { emflag = true; if (emlink != "") { emlink = emlink + "|" + item; } else { emlink = item; } } else { tmp.Add(item); } } HashSet <string> hs = new HashSet <string>(tmp); //此时已经去掉重复的数据保存在hashset中 String[] strunique = new String[hs.Count]; hs.CopyTo(strunique); int cutlen = strunique.Length; int count = 0; if (emlink != "" && cutlen == 0) { foreach (var item in strquerycut) { //消掉*问号空格 if (item == " " || item == "") { continue; } CompareInfo Compare = CultureInfo.InvariantCulture.CompareInfo; int conpos = Compare.IndexOf(strcontent, item, CompareOptions.IgnoreCase); //根据位置标红 //int conpos = strcontent.IndexOf(item); //根据位置标红 if (conpos != -1) { if (contentlen - conpos > 150 && conpos > 50) { //截取150字作为cache strcut = strcontent.Substring(conpos - 50, 200); if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } strcut = "..." + strcut + "..."; goto Finally; } else if (conpos > 50) { ////截取150字作为cache strcut = strcontent.Substring(conpos - 50, contentlen - conpos + 50); if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } strcut = "..." + strcut + "..."; goto Finally; } else if (contentlen - conpos > 150) { //截取150字作为cache strcut = strcontent.Substring(0, conpos + 150); if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } strcut = "..." + strcut + "..."; goto Finally; } else { //strcut = HttpUtility.HtmlEncode(strcut); //不够150的全拿出 strcut = strcontent; if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } strcut = "..." + strcut + "..."; goto Finally; } } else { CompareInfo Comparetitle = CultureInfo.InvariantCulture.CompareInfo; int conpostitle = Comparetitle.IndexOf(strtitle, item, CompareOptions.IgnoreCase); //根据位置标红 if (conpostitle != -1) { if (contentlen > 200) { strcut = strcontent.Substring(0, 200); if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } strcut = "..." + strcut + "..."; goto Finally; } else { strcut = strcontent; if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } strcut = "..." + strcut + "..."; goto Finally; } } else { ++count; } } } } else { //每一个词都查一遍 foreach (var item in strunique) { //消掉*问号空格 if (item == " " || item == "") { continue; } CompareInfo Compare = CultureInfo.InvariantCulture.CompareInfo; int conpos = Compare.IndexOf(strcontent, item, CompareOptions.IgnoreCase); //根据位置标红 //int conpos = strcontent.IndexOf(item); //根据位置标红 if (conpos != -1) { if (contentlen - conpos > 150 && conpos > 50) { //截取150字作为cache strcut = strcontent.Substring(conpos - 50, 200); if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } //strcut = HttpUtility.HtmlEncode(strcut); for (; count < cutlen; count++) { if (strunique[count] == " " || strunique[count] == "") { continue; } //标红,大小写不敏感,regex替换法,replace大小写敏感 strtitle = Regex.Replace(strtitle, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); //strtitle = strtitle.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); //strcut = strcut.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); strcut = Regex.Replace(strcut, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); } strcut = "..." + strcut + "..."; goto Finally; } else if (conpos > 50) { ////截取150字作为cache strcut = strcontent.Substring(conpos - 50, contentlen - conpos + 50); if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } //strcut = HttpUtility.HtmlEncode(strcut); for (; count < cutlen; count++) { if (strunique[count] == " " || strunique[count] == "") { continue; } //标红 strtitle = Regex.Replace(strtitle, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); //strtitle = strtitle.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); //strcut = strcut.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); strcut = Regex.Replace(strcut, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); } strcut = "..." + strcut + "..."; goto Finally; } else if (contentlen - conpos > 150) { //截取150字作为cache strcut = strcontent.Substring(0, conpos + 150); if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } //strcut = HttpUtility.HtmlEncode(strcut); for (; count < cutlen; count++) { if (strunique[count] == " " || strunique[count] == "") { continue; } //标红 strtitle = Regex.Replace(strtitle, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); //strtitle = strtitle.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); //strcut = strcut.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); strcut = Regex.Replace(strcut, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); } strcut = "..." + strcut + "..."; goto Finally; } else { //strcut = HttpUtility.HtmlEncode(strcut); //不够150的全拿出 strcut = strcontent; if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } for (; count < cutlen; count++) { if (strunique[count] == " " || strunique[count] == "") { continue; } //标红 strtitle = Regex.Replace(strtitle, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); //strtitle = strtitle.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); //strcut = strcut.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); strcut = Regex.Replace(strcut, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); } strcut = "..." + strcut + "..."; goto Finally; } } else { CompareInfo Comparetitle = CultureInfo.InvariantCulture.CompareInfo; int conpostitle = Comparetitle.IndexOf(strtitle, item, CompareOptions.IgnoreCase); //根据位置标红 if (conpostitle != -1) { if (contentlen > 200) { strcut = strcontent.Substring(0, 200); if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } //strcut = HttpUtility.HtmlEncode(strcut); for (; count < cutlen; count++) { if (strunique[count] == " " || strunique[count] == "") { continue; } //标红 strtitle = Regex.Replace(strtitle, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); //strtitle = strtitle.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); //strcut = strcut.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); strcut = Regex.Replace(strcut, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); } strcut = "..." + strcut + "..."; goto Finally; } else { strcut = strcontent; if (emflag) { strtitle = ReplaceCntent(emlink, strtitle); strcut = ReplaceCntent(emlink, strcut); } //strcut = HttpUtility.HtmlEncode(strcut); for (; count < cutlen; count++) { if (strunique[count] == " " || strunique[count] == "") { continue; } //标红 strtitle = Regex.Replace(strtitle, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); //strtitle = strtitle.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); //strcut = strcut.Replace(strquerycut[count], "<font color = red>" + strquerycut[count] + "</font>"); strcut = Regex.Replace(strcut, Regex.Escape(strunique[count]), "<em>" + strunique[count] + "</em>", RegexOptions.IgnoreCase); } strcut = "..." + strcut + "..."; goto Finally; } } else { ++count; } } } } //找到合适的内容之后返回结果 Finally: sr.ahref = iterdoc.GetValue(1); if (ftpflag) //判断是否需要转码链接 { sr.ahrefencode = strahref; //ftp则使用转码链接 } else { sr.ahrefencode = sr.ahref; } sr.link = iterdoc.GetValue(2); sr.title = strtitle; sr.snippet = strcut; XapResList.Add(sr); } } ai.retinfo = XapResList; } else { ai.totalnum = 0; ai.retinfo = null; } }