private int DoSearchOldName(string keyword, int score) { string querystring = string.Format("oldName:\"{0}\"", keyword); CardDescription[] cards = cardLibrary.Search(querystring); if (cards.Length > 0) { for (int i = 0; i < cards.Length; i++) { CardDescription card = cards[i]; DictSearcherNode node = GetSearcherNodeByID(card.ID); string[] ss = card.oldName.Split(','); int len = card.oldName.Length; for (int j = 0; j < ss.Length; j++) { if (ss[j].IndexOf(keyword) >= 0 && len > ss[j].Length) { len = ss[j].Length; } } if (score - len > node.Score) { node.Score = score - len; } } } return(cards.Length); }
public int Compare(object x, object y) { DictSearcherNode nx = (DictSearcherNode)x; DictSearcherNode ny = (DictSearcherNode)y; return(ny.Score - nx.Score); }
public DictSearcher() { Result = new ArrayList(cardLibrary.GetCount()); for (int i = 0; i < cardLibrary.GetCount(); i++) { DictSearcherNode node = new DictSearcherNode(); node.CardIndex = i; Result.Add(node); } }
private int DoSearchPYOldName(string querystring, int score) { CardDescription[] cards = cardLibrary.Search(querystring); if (cards.Length > 0) { for (int i = 0; i < cards.Length; i++) { CardDescription card = cards[i]; DictSearcherNode node = GetSearcherNodeByID(card.ID); if (score > node.Score) { node.Score = score; } } } return(cards.Length); }
private int DoSearchEnName(string keyword, int score) { string querystring = string.Format("enName:\"{0}\"", keyword); CardDescription[] cards = cardLibrary.Search(querystring); if (cards.Length > 0) { for (int i = 0; i < cards.Length; i++) { CardDescription card = cards[i]; DictSearcherNode node = GetSearcherNodeByID(card.ID); if (score - card.enName.Length > node.Score) { node.Score = score - card.enName.Length; } } } return(cards.Length); }
//通常搜索,返回相似度最高的前top个搜索结果 public CardDescription[] NormalSearch(string SentenceString, int Top) { //转为简体 //string simplesen = CharacterSet.BIG5ToGB(SentenceString); //这里应该是全角转半角 string simplesen = CharacterSet.SBCToDBC(SentenceString); //过滤stopwords simplesen = stopwrods.Replace(simplesen, " "); //过滤标点符号 StringBuilder sb = new StringBuilder(); for (int i = 0; i < simplesen.Length; i++) { if (Char.IsLetterOrDigit(simplesen[i])) { sb.Append(simplesen[i]); } else { sb.Append(' '); } } simplesen = sb.ToString(); if (simplesen.Length == 0) { return(new CardDescription[0]); } //清0 for (int i = 0; i < Result.Count; i++) { ((DictSearcherNode)Result[i]).CardIndex = i; ((DictSearcherNode)Result[i]).Score = 0; } DoSearch(simplesen); DictSearcherCompare dsc = new DictSearcherCompare(); Result.Sort(dsc); ArrayList al = new ArrayList(); if (Top == 0) { Top = cardLibrary.GetCount(); } for (int i = 0; i < Top; i++) { DictSearcherNode node = (DictSearcherNode)Result[i]; if (node.Score > 0) { CardDescription card = cardLibrary.GetCardByIndex(node.CardIndex); al.Add(card); } else { break; } } return((CardDescription[])al.ToArray(typeof(CardDescription))); }
//智能扩展搜索,从鼠标位置自动句子中有效截取范围,返回相似度最高的前top个搜索结果 public SearchResult TopSearch(string SentenceString, int lLoc, int top) { SearchResult sr = new SearchResult(); //转为简体 //string simplesen = CharacterSet.BIG5ToGB(SentenceString); //这里应该是全角转半角 string simplesen = CharacterSet.SBCToDBC(SentenceString); //过滤stopwords simplesen = stopwrods.Replace(simplesen, " "); //过滤标点符号 StringBuilder sb = new StringBuilder(); int skip = 0; for (int i = 0; i < simplesen.Length; i++) { if (Char.IsLetterOrDigit(simplesen[i])) { sb.Append(simplesen[i]); } else { sb.Append(' '); if (i <= lLoc) { skip++; } } } simplesen = sb.ToString(); if (simplesen.Length == 0) { return(sr); } //清0 for (int i = 0; i < Result.Count; i++) { DictSearcherNode node = (DictSearcherNode)Result[i]; node.CardIndex = i; node.Score = 0; } Hashtable SearchedKeyword = new Hashtable(); string lMainWord = ""; int lMainWordLenth = 0; string rMainWord = ""; int rMainWordLenth = 0; for (int i = lLoc; i >= 0; i--) { string s = simplesen.Substring(i, lLoc - i + 1); string trims = GetTokenizerText(s); if (trims.Length > 0 && !SearchedKeyword.ContainsKey(trims)) { SearchedKeyword.Add(trims, 0); if (DoSearch(s) > 0) { if (trims.Length > lMainWordLenth) { int blank = 0; while (s[blank] == ' ') { blank++; } lMainWord = SentenceString.Substring(i + blank, lLoc - i + 1 - blank); lMainWordLenth = trims.Length; } } else { break; } } } for (int i = lLoc + 1; i < simplesen.Length; i++) { string s = simplesen.Substring(lLoc, i - lLoc + 1); string trims = GetTokenizerText(s); if (trims.Length > 0 && !SearchedKeyword.ContainsKey(trims)) { SearchedKeyword.Add(trims, 0); if (DoSearch(s) > 0) { if (trims.Length > rMainWordLenth) { int blank = 0; while (s[blank] == ' ') { blank++; } rMainWord = SentenceString.Substring(lLoc + blank, i - lLoc + 1 - blank); rMainWordLenth = trims.Length; } } else { break; } } } DictSearcherCompare dsc = new DictSearcherCompare(); Result.Sort(dsc); sr.Cards = new CardDescription[top]; if (rMainWord.Length > 1) { if (GetTokenizerText(SentenceString.Substring(lLoc, 1)).Length > 0) { sr.KeyWord = lMainWord + rMainWord.Substring(1); } else { sr.KeyWord = lMainWord + rMainWord; } } else { sr.KeyWord = lMainWord; } for (int i = 0; i < top; i++) { sr.Cards[i] = cardLibrary.GetCardByIndex(((DictSearcherNode)Result[i]).CardIndex); } return(sr); }
private int DoSearch(string keyword) { Result2 = new ArrayList(cardLibrary.GetCount()); for (int i = 0; i < cardLibrary.GetCount(); i++) { DictSearcherNode node = (DictSearcherNode)Result[i]; DictSearcherNode newnode = new DictSearcherNode(); newnode.CardIndex = node.CardIndex; newnode.Score = node.Score; node.Score = 0; Result2.Add(newnode); } int len = GetTokenizerLength(keyword); if (len > 5) { len = 5; } int factor = (int)Math.Pow(5, len); int ret = 0; ret += DoSearchShortName(keyword, 4000 * factor); ret += DoSearchName(keyword, 2000 * factor); ret += DoSearchJapName(keyword, 1500 * factor); ret += DoSearchEnName(keyword, 1500 * factor); ret += DoSearchOldName(keyword, 1000 * factor); if (GetPingyin.GetChineseLength(keyword) <= 8) { int pyfactor = (int)Math.Pow(3, len); string[] ss = GetPingyin.converts(keyword); string s = ""; for (int i = 0; i < ss.Length; i++) { s += string.Format("pyname:\"{0}\"", ss[i]); } ret += DoSearchPYName(s, 1000 * pyfactor); s = ""; for (int i = 0; i < ss.Length; i++) { s += string.Format("pyshortName:\"{0}\"", ss[i]); } ret += DoSearchPYShortName(s, 1000 * pyfactor); s = ""; for (int i = 0; i < ss.Length; i++) { s += string.Format("pyoldName:\"{0}\"", ss[i]); } ret += DoSearchPYOldName(s, 500 * pyfactor); } for (int i = 0; i < cardLibrary.GetCount(); i++) { DictSearcherNode node1 = (DictSearcherNode)Result[i]; DictSearcherNode node2 = (DictSearcherNode)Result2[i]; node1.Score += node2.Score; } return(ret); }