private readonly string SESSION_NAME = "ListResultHrefs"; //缓存的key protected void Page_Load(object sender, EventArgs e) { List <ExampleMyHref> resultHrefs; if (!IsPostBack) { Session.RemoveAll(); //清空已有缓存以免冲突 resultHrefs = HrefCrawler(15); //爬取前15页 //分词并保存, 词频大于20的写入json var segmenter = new JiebaSegmenter(); string allTItles = ""; foreach (ExampleMyHref href in resultHrefs) { allTItles += href.HrefTitle + ";"; href.KeywordList = new List <string>(); var word4Search = segmenter.CutForSearch(href.HrefTitle); foreach (var word in word4Search) { if (!href.KeywordList.Contains(word)) { href.KeywordList.Add(word); } } } var allWords = segmenter.CutForSearch(allTItles); Dictionary <string, int> wordsInts = new Dictionary <string, int>(); string jsonstr = "["; int i = 0; foreach (var item in allWords.Distinct <string>()) { if (item.Length >= 2 && item.Length <= 5) { if (!wordsInts.ContainsKey(item)) { int f = GetFrequency(allWords, item); //统计词频 wordsInts.Add(item.Trim(), f); if (f >= 20) { if (i == 0) { jsonstr += "{\"name\":\"" + item.Trim() + "\",\"value\":" + f + "}"; } else { jsonstr += ",{\"name\":\"" + item.Trim() + "\",\"value\":" + f + "}"; } i++; } } } } jsonstr += "]"; WriteData("tieba.json", jsonstr); Session[SESSION_NAME] = resultHrefs; //使用Session缓存查询到的数据 } }
static void Main(string[] args) { List <string> stopWords = new List <string>(); string stopUrl = "./stopwords.txt"; StreamReader rd = File.OpenText(stopUrl); string ss = ""; while ((ss = rd.ReadLine()) != null) { stopWords.Add(ss); } var segmenter = new JiebaSegmenter(); var segments = segmenter.Cut("我来到北京清华大学", cutAll: true); Console.WriteLine("【全模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("我来到北京清华大学"); // 默认为精确模式 Console.WriteLine("【精确模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("他来到了网易杭研大厦"); // 默认为精确模式,同时也使用HMM模型 Console.WriteLine("【新词识别】:{0}", string.Join("/ ", segments)); segments = segmenter.CutForSearch("小明硕士毕业于中国科学院计算所,后在日本京都大学深造"); // 搜索引擎模式 Console.WriteLine("【搜索引擎模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("结过婚的和尚未结过婚的"); Console.WriteLine("【歧义消除】:{0}", string.Join("/ ", segments)); Console.ReadKey(); }
public void CutDemo() { var segmenter = new JiebaSegmenter(); var segments = segmenter.Cut("我来到北京清华大学", cutAll: true); Console.WriteLine("【全模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("我来到北京清华大学"); // 默认为精确模式 Console.WriteLine("【精确模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("他来到了网易杭研大厦"); // 默认为精确模式,同时也使用HMM模型 Console.WriteLine("【新词识别】:{0}", string.Join("/ ", segments)); segments = segmenter.CutForSearch("小明硕士毕业于中国科学院计算所,后在日本京都大学深造"); // 搜索引擎模式 Console.WriteLine("【搜索引擎模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("结过婚的和尚未结过婚的"); Console.WriteLine("【歧义消除】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("北京大学生喝进口红酒"); Console.WriteLine("【歧义消除】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("在北京大学生活区喝进口红酒"); Console.WriteLine("【歧义消除】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("腾讯视频致力于打造中国最大的在线视频媒体平台,以丰富的内容、极致的观看体验"); Console.WriteLine("【精确模式】:{0}", string.Join("/ ", segments)); segmenter.DeleteWord("湖南"); segmenter.AddWord("湖南"); //segmenter.AddWord("长沙市"); segments = segmenter.Cut("湖南长沙市天心区"); Console.WriteLine("【精确模式】:{0}", string.Join("/ ", segments)); }
static void ApartWords(ref ArrayList text) { for (int i = 0; i < text.Count; i++) { string str = text[i].ToString().Trim(' ').Replace(" ", ""); var segmenter = new JiebaSegmenter(); var segments = segmenter.CutForSearch(str); string[] x = segments.ToArray(); List <string> y = new List <string>(); int flag = 1; for (int j = 0; j < x.Length; j++) { flag = 1; foreach (var k in list) { if (x[j] == k.ToString()) { flag = 0; break; } } if (flag == 1) { y.Add(x[j]); } } text[i] = string.Join(" ", y); } }
/// <summary> /// 全文检索处理 /// </summary> /// <param name="content"></param> /// <returns></returns> public NpgsqlTsVector GetNpgsqlTsVector(string content) { NpgsqlTsVector vector; try { var segmenter = new JiebaSegmenter(); HtmlToTextHelper htmlToTextHelper = new HtmlToTextHelper(); if (string.IsNullOrWhiteSpace(content)) { return(null);; } string noHtmlConent = htmlToTextHelper.Convert(content); var list = segmenter.CutForSearch(noHtmlConent, hmm: true); var cutList = new List <string>(); foreach (var item in list) { if (item.Length > 1) { cutList.Add(item.ToUpper()); } } string str = string.Join(" ", cutList); vector = NpgsqlTsVector.Parse(str); } catch (Exception ex) { return(null); } return(vector); }
/// <summary> /// 分词 /// </summary> /// <param name="keyword"></param> /// <returns></returns> public List <string> CutKeywords(string keyword) { if (_memoryCache.TryGetValue(keyword, out List <string> list)) { return(list); } var set = new HashSet <string> { keyword }; var mc = Regex.Matches(keyword, @"(([A-Z]*[a-z]*)[\d]*)([\u4E00-\u9FA5]+)*((?!\p{P}).)*"); foreach (Match m in mc) { set.Add(m.Value); foreach (Group g in m.Groups) { set.Add(g.Value); } } var segmenter = new JiebaSegmenter(); foreach (string word in segmenter.CutForSearch(keyword)) { set.Add(word); } set.RemoveWhere(s => s.Length < 2 || Regex.IsMatch(s, @"^\p{P}.*")); list = set.OrderByDescending(s => s.Length).ToList(); _memoryCache.Set(keyword, list, TimeSpan.FromHours(1)); return(list); }
public void CutDemo() { var segmenter = new JiebaSegmenter(); var segments = segmenter.Cut("我来到北京清华大学", cutAll: true); Console.WriteLine("【全模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("我来到北京清华大学"); // 默认为精确模式 Console.WriteLine("【精确模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("他来到了网易杭研大厦"); // 默认为精确模式,同时也使用HMM模型 Console.WriteLine("【新词识别】:{0}", string.Join("/ ", segments)); segments = segmenter.CutForSearch("小明硕士毕业于中国科学院计算所,后在日本京都大学深造"); // 搜索引擎模式 Console.WriteLine("【搜索引擎模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("结过婚的和尚未结过婚的"); Console.WriteLine("【歧义消除】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("北京大学生喝进口红酒"); Console.WriteLine("【歧义消除】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("在北京大学生活区喝进口红酒"); Console.WriteLine("【歧义消除】:{0}", string.Join("/ ", segments)); }
public static void Add(string text) { var segmenter = new JiebaSegmenter(); var result = segmenter.CutForSearch(text) .GroupBy(s => s) .Where(s => s.Key.Length > 1) .Select(s => new { Key = s.Key, Count = s.Count() }) .ToList(); //using (ApplicationDbContext db = new ApplicationDbContext()) //{ var temp = result.Select(s => s.Key).ToList(); var keys = db.Keywords.Where(s => temp.Contains(s.Word)).ToList(); var words = keys.Select(s => s.Word).ToList(); foreach (var item in keys) { item.CouponNameCount += result.FirstOrDefault(s => s.Key == item.Word)?.Count ?? 0; } var addKeys = result.Where(s => !words.Contains(s.Key)).Select(s => new Keyword { CouponNameCount = s.Count, Word = s.Key }).ToList(); db.Keywords.AddRange(addKeys); db.SaveChanges(); //} }
private List <string> WordSplitResult(string strWords) { List <string> result = new List <string>(); IEnumerable <string> segments; switch (comboBoxCutMode.SelectedIndex) { case 0: segments = segmenter.Cut(strWords); break; case 1: segments = segmenter.CutForSearch(strWords); break; case 2: var idf = new TfidfExtractor(); segments = idf.ExtractTags(strWords, 20, Constants.NounAndVerbPos); break; default: var textRank = new TextRankExtractor(); segments = textRank.ExtractTags(strWords, 20, Constants.NounAndVerbPos); break; } foreach (string str in string.Join(" ", segments).Split(' ')) { if (!stopwordsList.Contains(str)) { result.Add(str); } } return(result); }
private void button1_Click(object sender, EventArgs e) { JiebaSegmenter jb = new JiebaSegmenter(); var words = jb.CutForSearch(this.richTextBox1.Text); MsgDtlHandle(string.Format("【搜索引擎模式】:{0}", string.Join("/ ", words))); }
static void Main(string[] args) { var segmenter = new JiebaSegmenter(); segmenter.LoadUserDict("userdict.txt"); var segments = segmenter.Cut("我来到北京清华大学", cutAll: true); Console.WriteLine("【全模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("我来到北京清华大学"); // 默认为精确模式 Console.WriteLine("【精确模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("他来到了网易杭研大厦"); // 默认为精确模式,同时也使用HMM模型 Console.WriteLine("【新词识别】:{0}", string.Join("/ ", segments)); segments = segmenter.CutForSearch("小明硕士毕业于中国科学院计算所,后在日本京都大学深造"); // 搜索引擎模式 Console.WriteLine("【搜索引擎模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("结过婚的和尚未结过婚的"); Console.WriteLine("【歧义消除】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("linezerodemo机器学习学习机器"); Console.WriteLine("【用户字典】:{0}", string.Join("/ ", segments)); Console.ReadKey(); }
/// <summary> /// jieba.net分词,为检索专用,分的更细 /// </summary> /// <param name="str">输入字符串</param> /// <param name="strout">输出字符串</param> /// <returns></returns> public string JiebaSeg(string str) { var segmenter = new JiebaSegmenter(); var segments = segmenter.CutForSearch(str); return(string.Join(" ", segments)); }
static void Main(string[] args) { var segmenter = new JiebaSegmenter(); segmenter.LoadUserDict("userdict.txt"); var segments = segmenter.Cut("我来到北京清华大学", cutAll: true); Console.WriteLine("【全模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("我来到北京清华大学"); // 默认为精确模式 Console.WriteLine("【精确模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("他来到了网易杭研大厦"); // 默认为精确模式,同时也使用HMM模型 Console.WriteLine("【新词识别】:{0}", string.Join("/ ", segments)); segments = segmenter.CutForSearch("小明硕士毕业于中国科学院计算所,后在日本京都大学深造"); // 搜索引擎模式 Console.WriteLine("【搜索引擎模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("结过婚的和尚未结过婚的"); Console.WriteLine("【歧义消除】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("linezerodemo机器学习学习机器"); Console.WriteLine("【用户字典】:{0}", string.Join("/ ", segments)); //词频统计 var s = "此领域探讨如何处理及运用自然语言。自然语言生成系统把计算机数据转化为自然语言。自然语言理解系统把自然语言转化为计算机程序更易于处理的形式。"; var freqs = new Counter <string>(segmenter.Cut(s)); foreach (var pair in freqs.MostCommon(5)) { Console.WriteLine($"{pair.Key}: {pair.Value}"); } Console.ReadKey(); }
public string GetSearchKeyWorlds(string keyword) { var segmenter = new JiebaSegmenter(); var list = segmenter.CutForSearch(keyword, hmm: true); string result = string.Join(" ", list); return(result); }
protected void Button1_Click(object sender, EventArgs e) { string text = TextBox1.Text; var segmenter = new JiebaSegmenter(); string aimFile = string.Format(@"./Resources/{0}.txt", text); string content = GetContent(aimFile); Stopwatch sw = new Stopwatch(); sw.Start(); var wordsforSearch = segmenter.CutForSearch(content); Dictionary <string, int> persons = new Dictionary <string, int>(); string jsonstr = "["; int i = 0; foreach (string item in wordsforSearch.Distinct <string>()) { if (item.Length >= 2 && item.Length <= 4) { if (!persons.ContainsKey(item)) { int f = GetFrequence(wordsforSearch, item); persons.Add(item.Trim(), f); if (f >= 20 && f != 2406) { if (i == 0) { jsonstr += "{\"name\":\"" + item.Trim() + "\",\"value\":" + f + "}"; } else { jsonstr += ",{\"name\":\"" + item.Trim() + "\",\"value\":" + f + "}"; } i++; } } } } jsonstr += "]"; string name = TextBox2.Text; GetJson(name, jsonstr); persons = (from entry in persons orderby entry.Value descending select entry).ToDictionary(pair => pair.Key, pair => pair.Value); string result = ""; foreach (var person in persons) { if (person.Value >= 20) { result += ("<br>" + person.Key + "-" + person.Value.ToString()); } } Response.Write(result); sw.Stop(); TimeSpan ts2 = sw.Elapsed; Response.Write(string.Format("</br>Stopwatch 总共花费{0}ms.", ts2.TotalMilliseconds.ToString())); if (!(content == "")) { Response.Write(string.Format("</br>" + "结果已输出至{0}.json" + "</br>", name)); } }
protected void btnSearch_Click(object sender, EventArgs e) { var segmenter = new JiebaSegmenter(); string aimFile = @"./Resources/三国演义.txt"; string content = ReadData(aimFile); var wordsforSearch = segmenter.CutForSearch(content); Response.Write("</br>【搜索引擎模式】:{0}" + string.Join("/ ", wordsforSearch)); }
public static IEnumerable <string> CutForSearch(this string str) { if (string.IsNullOrWhiteSpace(str)) { return(new string[0]); } var segement = new JiebaSegmenter(); return(segement.CutForSearch(str)); }
public static List <string> GetWords(string content) { JiebaSegmenter segmenter = new JiebaSegmenter(); string goodstr = RemoveBadSymbols(content); List <string> words = segmenter.CutForSearch(goodstr).ToList(); words.RemoveAll(str => str.Equals("")); words.RemoveAll(str => str.Equals(" ")); return(words); }
public static string[] ToSeparateByJieba(this string key) { var segmenter = new JiebaSegmenter(); segmenter.LoadUserDict(GlobalConstants.MusicDictionaryPath); var stopDict = LoadStopDict(GlobalConstants.MusicStopDictionaryPath); var segmentList = (segmenter.CutForSearch(key)) .Where(s => !string.IsNullOrWhiteSpace(s)) .ToList(); var segments = segmentList.Except(stopDict); return(segments.ToArray()); }
public void Get_Word6_Test() { string val = "今年累计发货多少车"; var segmenter = new JiebaSegmenter(); var segments = segmenter.Cut(val, cutAll: true); Console.WriteLine(string.Join('/', segments)); segments = segmenter.Cut(val); Console.WriteLine(string.Join('/', segments)); segments = segmenter.CutForSearch(val); Console.WriteLine(string.Join('/', segments)); }
protected void Page_Load(object sender, EventArgs e) { var segmenter = new JiebaSegmenter(); var seg = segmenter.Cut("我来自华东师范大学", cutAll: true); Response.Write(string.Format("[全模式]:{0}</br>", string.Join("/", seg))); seg = segmenter.Cut("我来自华东师范大学");//默认为精确模式,同时使用HMM模型 Response.Write(string.Format("[精确模式]:{0}</br>", string.Join("/", seg))); seg = segmenter.Cut("他来到了华东师范大学群贤堂"); Response.Write(string.Format("[新词识别]:{0}</br>", string.Join("/", seg))); seg = segmenter.CutForSearch("李白硕士毕业于东方大学计算所,后在日本京都大学深造"); Response.Write(string.Format("[搜索引擎模式]:{0}</br>", string.Join("/", seg))); seg = segmenter.Cut("结过婚的和尚结过婚的"); Response.Write(string.Format("[歧义消除]:{0}</br>", string.Join("/", seg))); }
protected void Page_Load(object sender, EventArgs e) { var segmenter = new JiebaSegmenter(); var segments = segmenter.Cut("我来自华东师范大学", cutAll: true); Response.Write(string.Format("【全模式】:{0}</br>", string.Join("/ ", segments))); segments = segmenter.Cut("我来自华东师范大学"); //默认为精确模式 Response.Write(string.Format("【精确模式】:{0}</br>", string.Join("/ ", segments))); segments = segmenter.Cut("他来到了华东师范大学群贤堂"); //默认为精确模式,同时也使用HMM模型 Response.Write(string.Format("【新词识别】:{0}</br>", string.Join("/ ", segments))); segments = segmenter.CutForSearch("李白硕士毕业于东方大学计算所,后在日本京都大学深造"); //搜索引擎模式 Response.Write(string.Format("【搜索引擎模式】:{0}</br>", string.Join("/ ", segments))); segments = segmenter.Cut("结过婚的和尚未结过婚的"); Response.Write(string.Format("【歧义消除】:{0}</br>", string.Join("/ ", segments))); }
/// <summary> /// 分词 /// </summary> /// <param name="keyword"></param> /// <returns></returns> public List <string> CutKeywords(string keyword) { if (_memoryCache.TryGetValue(keyword, out List <string> list)) { return(list); } var set = new HashSet <string> { keyword }; var mc = Regex.Matches(keyword, @"(([A-Z]*[a-z]*)[\d]*)([\u4E00-\u9FA5]+)*((?!\p{P}).)*"); foreach (Match m in mc) { set.Add(m.Value); foreach (Group g in m.Groups) { set.Add(g.Value); } } if (keyword.Length >= 6) { try { var res = HttpClient.GetAsync($"/api/customsearch/keywords?title={keyword}").Result; if (res.StatusCode == HttpStatusCode.OK) { BaiduAnalysisModel model = JsonConvert.DeserializeObject <BaiduAnalysisModel>(res.Content.ReadAsStringAsync().Result); model.Result.Res.KeywordList?.ForEach(s => set.Add(s)); } } catch { // ignored } } var segmenter = new JiebaSegmenter(); foreach (string word in segmenter.CutForSearch(keyword)) { set.Add(word); } set.RemoveWhere(s => s.Length < 2 || Regex.IsMatch(s, @"^\p{P}.*")); list = set.OrderByDescending(s => s.Length).ToList(); _memoryCache.Set(keyword, list, TimeSpan.FromHours(1)); return(list); }
public void TestCut() { var segmenter = new JiebaSegmenter(); segmenter.LoadUserDict(@"D:\lucene\dict.txt"); segmenter.LoadUserDictForEmbedded(Assembly.GetCallingAssembly(), "dict.txt"); var segments = segmenter.Cut("我来到北京清华大学", cutAll: true); var resultWords = new List <string> { "我", "来到", "北京", "清华", "清华大学", "华大", "大学" }; Compared(segments, resultWords); segments = segmenter.Cut("我来到北京清华大学"); resultWords = new List <string> { "我", "来到", "北京", "清华大学" }; Compared(segments, resultWords); segments = segmenter.Cut("他来到了网易杭研大厦"); // 默认为精确模式,同时也使用HMM模型 resultWords = new List <string> { "他", "来到", "了", "网易", "杭研", "大厦" }; Compared(segments, resultWords); segments = segmenter.CutForSearch("小明硕士毕业于中国科学院计算所,后在日本京都大学深造"); // 搜索引擎模式 resultWords = new List <string> { "小明", "硕士", "毕业", "于", "中国", "科学", "学院", "科学院", "中国科学院", "计算", "计算所", ",", "后" , "在", "日本", "京都", "大学", "日本京都大学", "深造" }; Compared(segments, resultWords); segments = segmenter.Cut("结过婚的和尚未结过婚的"); resultWords = new List <string> { "结过婚", "的", "和", "尚未", "结过婚", "的" }; Compared(segments, resultWords); segments = segmenter.Cut("快奔三", false, false); resultWords = new List <string> { "快", "奔三" }; Compared(segments, resultWords); }
public void CutDemo() { var segmenter = new JiebaSegmenter(); var segments = segmenter.Cut("我来到北京清华大学", cutAll: true); Console.WriteLine("【全模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("我来到北京清华大学"); // 默认为精确模式 Console.WriteLine("【精确模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("他来到了网易杭研大厦"); // 默认为精确模式,同时也使用HMM模型 Console.WriteLine("【新词识别】:{0}", string.Join("/ ", segments)); segments = segmenter.CutForSearch("小明硕士毕业于中国科学院计算所,后在日本京都大学深造"); // 搜索引擎模式 Console.WriteLine("【搜索引擎模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("结过婚的和尚未结过婚的"); Console.WriteLine("【歧义消除】:{0}", string.Join("/ ", segments)); }
/// <summary> /// 获取分词之后的字符串集合 /// </summary> /// <param name="objStr"></param> /// <param name="type"></param> /// <returns></returns> public static IEnumerable <string> GetSplitWords(string objStr, JiebaTypeEnum type = JiebaTypeEnum.Default) { var jieba = new JiebaSegmenter(); switch (type) { case JiebaTypeEnum.Default: return(jieba.Cut(objStr)); //精确模式-带HMM case JiebaTypeEnum.CutAll: return(jieba.Cut(objStr, cutAll: true)); //全模式 case JiebaTypeEnum.CutForSearch: return(jieba.CutForSearch(objStr)); //搜索引擎模式 default: return(jieba.Cut(objStr, false, false)); //精确模式-不带HMM } }
public static void UpdateSearchCount(string text) { var segmenter = new JiebaSegmenter(); var result = segmenter.CutForSearch(text) .GroupBy(s => s) .Where(s => s.Key.Length > 1) .Select(s => new { Key = s.Key, Count = s.Count() }) .ToList(); using (ApplicationDbContext db = new ApplicationDbContext()) { var temp = result.Select(s => s.Key).ToList(); var keys = db.Keywords.Where(s => temp.Contains(s.Word)).ToList(); foreach (var item in keys) { item.SearchCount += result.FirstOrDefault(s => s.Key == item.Word).Count; } db.SaveChanges(); } }
public void JiebaSegmenterTest() { var segmenter = new JiebaSegmenter(); string message = "多线程"; var segments = segmenter.Cut(message, cutAll: true); Console.WriteLine("【全模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut(message); // 默认为精确模式 Console.WriteLine("【精确模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut(message, hmm: true); // 默认为精确模式,同时也使用HMM模型 Console.WriteLine("【新词识别】:{0}", string.Join("/ ", segments)); segments = segmenter.CutForSearch(message); // 搜索引擎模式 Console.WriteLine("【搜索引擎模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut(message); Console.WriteLine("【歧义消除】:{0}", string.Join("/ ", segments)); }
static void Main(string[] args) { var segmenter = new JiebaSegmenter(); var segments = segmenter.Cut("WebApi 教程", cutAll: true); Console.WriteLine("【全模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("WebApi 教程"); // 默认为精确模式 Console.WriteLine("【精确模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("WebApi 教程"); // 默认为精确模式,同时也使用HMM模型 Console.WriteLine("【新词识别】:{0}", string.Join("/ ", segments)); segments = segmenter.CutForSearch("webapi 教程"); // 搜索引擎模式 Console.WriteLine("【搜索引擎模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("webapi 教程"); Console.WriteLine("【歧义消除】:{0}", string.Join("/ ", segments)); Console.ReadKey(); }
/// <summary> /// 分词测试 /// </summary> /// <param name="keyword"></param> /// <returns></returns> public static string Token(string keyword) { string ret = ""; var segmenter = new JiebaSegmenter(); var segments = segmenter.Cut("我来到北京清华大学", cutAll: true); ret = string.Format("【全模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("我来到北京清华大学"); // 默认为精确模式 ret = string.Format("【精确模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("他来到了网易杭研大厦"); // 默认为精确模式,同时也使用HMM模型 ret = string.Format("【新词识别】:{0}", string.Join("/ ", segments)); segments = segmenter.CutForSearch("小明硕士毕业于中国科学院计算所,后在日本京都大学深造"); // 搜索引擎模式 ret = string.Format("【搜索引擎模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("结过婚的和尚未结过婚的"); ret = string.Format("【歧义消除】:{0}", string.Join("/ ", segments)); return(ret); }
protected void BtnSearch_OnClick(object sender, EventArgs e) { string strSearch = TxtKeyword.Text; var segmenter = new JiebaSegmenter(); var searchKeywords = segmenter.CutForSearch(strSearch); //为了尽可能匹配到将搜索内容也分解为关键字 var searchResults = new List <ExampleMyHref>(); var resultHrefs = (List <ExampleMyHref>)Session[SESSION_NAME]; //读取缓存 var hrefUrls = new List <string>(); // 用于去重 foreach (var href in resultHrefs) { if (!hrefUrls.Contains(href.HrefSrc)) { foreach (var hrefKeyword in href.KeywordList) { if (searchKeywords.Contains(hrefKeyword)) { hrefUrls.Add(href.HrefSrc); searchResults.Add(href); break; } } } } if (searchResults.IsNotEmpty()) { foreach (var item in searchResults) { string resulttag = "<a href=\"" + item.HrefSrc + "\">" + item.HrefTitle + "</a><br />"; Response.Write(resulttag); //Response.Write(item.HrefTitle + "|" + item.HrefSrc + "<br />"); } } else { Response.Write("没有找到相关内容<br />"); } }