public static string getHash(string msg) { string str = msg.ToLower().Trim(); string[] words = Regex.Split(str, "[^a-zA-Z]"); List <String> k = new List <string>(); foreach (var s in words) { string key = Lingvo.getKey(s); if (String.IsNullOrEmpty(key)) { continue; } k.Add(key); } k.Sort(); string keys = ""; foreach (string s in k) { keys += s.Trim(); } return(keys); }
private static void prepareData(string text, int num) { foreach (var sent in Regex.Split(text, "[^a-zA-Z0-9 ]")) { if (string.IsNullOrEmpty(sent)) { continue; } List <String> tail = new List <string>(); List <String> srcTail = new List <string>(); string str = ""; int swcnt = 0; foreach (var word in Regex.Split(sent, "[^a-zA-Z0-9]")) { string key = Lingvo.getKey(word); if (String.IsNullOrEmpty(key) || Lingvo.isNotNoun(word)) { /* * str = (str + " " + word).Trim(); * swcnt++; * if (swcnt > 2) * { * tail.Clear(); * srcTail.Clear(); * str = ""; * swcnt = 0; * } */ continue; } if (srcTail.Count > 0) { srcTail[srcTail.Count - 1] = (srcTail[srcTail.Count - 1] + " " + str).Trim(); } str = ""; swcnt = 0; srcTail.Add(word); while (tail.Count() > 4) { tail.Remove(tail[0]); //!!! } tail.Add(key); string comlpexName = ""; string dicName = ""; for (int i = 1; i <= tail.Count; i++) { comlpexName = tail[tail.Count - i] + " " + comlpexName; comlpexName = comlpexName.Trim(); string comlpexKey = ""; foreach (var s in comlpexName.Split(' ').OrderBy(t => t)) { comlpexKey += s + " "; } comlpexKey = comlpexKey.Trim(); if (freq.ContainsKey(comlpexKey)) { freq[comlpexKey]++; } else { freq.Add(comlpexKey, 1); } /* * if (!XWord.Contains(comlpexKey)) XWord.Add(comlpexKey); * int xid = XWord.IndexOf(comlpexKey); * if (!XRel[num].Contains(xid)) XRel[num].Add(xid); */ if (!source.ContainsKey(comlpexKey)) { source.Add(comlpexKey, new List <int>()); } if (!source[comlpexKey].Contains(num)) { source[comlpexKey].Add(num); } dicName = srcTail[srcTail.Count - i] + " " + dicName; dicName = dicName.Trim(); List <string> list = new List <string>(); if (!dicNames.ContainsKey(comlpexKey)) { dicNames.Add(comlpexKey, list); } else { list = dicNames[comlpexKey]; } if (!list.Contains(dicName)) { list.Add(dicName); } //if (!xray[num].Contains(comlpexKey)) xray[num].Add(comlpexKey); } } } }