/// <summary> /// 全文检索处理 /// </summary> /// <param name="content"></param> /// <returns></returns> public NpgsqlTsVector GetNpgsqlTsVector(string content) { NpgsqlTsVector vector; try { var segmenter = new JiebaSegmenter(); HtmlToTextHelper htmlToTextHelper = new HtmlToTextHelper(); if (string.IsNullOrWhiteSpace(content)) { return(null);; } string noHtmlConent = htmlToTextHelper.Convert(content); var list = segmenter.CutForSearch(noHtmlConent, hmm: true); var cutList = new List <string>(); foreach (var item in list) { if (item.Length > 1) { cutList.Add(item.ToUpper()); } } string str = string.Join(" ", cutList); vector = NpgsqlTsVector.Parse(str); } catch (Exception ex) { return(null); } return(vector); }
/// <summary> /// 全文检索 查询 /// </summary> /// <param name="keyword"></param> /// <returns></returns> public NpgsqlTsQuery GetSerachNpgsqlTsQuery(string keyword) { if (keyword.Contains('&')) { string[] keys = keyword.Split('&'); return(GetSerachNpgsqlTsQuery_And(keys)); } if (keyword.Contains("|")) { string[] keys = keyword.Split('|'); return(GetSerachNpgsqlTsQuery_Or(keys)); } NpgsqlTsQuery vector; try { var segmenter = new JiebaSegmenter(); HtmlToTextHelper htmlToTextHelper = new HtmlToTextHelper(); if (string.IsNullOrWhiteSpace(keyword)) { return(null);; } string noHtmlConent = htmlToTextHelper.Convert(keyword); var list = segmenter.Cut(noHtmlConent, hmm: true); var cutList = new List <string>(); foreach (var item in list) { if (item.Length > 1) { cutList.Add(item.ToUpper()); } } string str = string.Join(" & ", cutList); vector = NpgsqlTsQuery.Parse(str); } catch (Exception ex) { return(null); } return(vector); }
public void ConvertTest() { HtmlToTextHelper convert = new HtmlToTextHelper(); string md_text = File.ReadAllText(@"html.txt"); string html = ""; using (var reader = new StringReader(md_text)) { using (var writer = new StringWriter()) { CommonMark.CommonMarkConverter.Convert(reader, writer); //writer.ToString()即为转换好的html html = writer.ToString(); } } string text = convert.Convert(html); Console.WriteLine(text); }