/// <summary> /// 余弦相似度 /// </summary> /// <param name="itemName">商品名</param> /// <param name="Corpus">语料</param> /// <returns></returns> public static double CosSimilarity(string itemName, string Corpus) { double Ew2 = 0, Eb2 = 0, Ewb = 0; double cosSimilarity = 0; string splitword = SplitWordHelper.RemoveSplitWord(SplitWordHelper.SplitWords(itemName)); string[] swarray = splitword.Split(' ').Where(s => !string.IsNullOrEmpty(s)).ToArray(); Dictionary <string, double> item = new Dictionary <string, double>(); TFIDFJson tfidf = JsonConvert.DeserializeObject <TFIDFJson>(Corpus); List <KeyValuePair <string, double> > corpus = tfidf.Data; foreach (string sw in swarray) { if (!string.IsNullOrEmpty(sw)) { string temp = splitword.Replace(" ", ""); MatchCollection m = Regex.Matches(temp, sw); if (!item.ContainsKey(sw)) { double tf = (double)m.Count / swarray.Length; item.Add(sw, tf); } } } for (int i = 0; i < item.Count; i++) { Ew2 = Ew2 + Math.Pow(item.ElementAt(i).Value, 2); for (int j = 0; j < corpus.Count; j++) { if (i == 0) { Eb2 = Eb2 + Math.Pow(corpus.ElementAt(j).Value, 2); } if (item.ElementAt(i).Key.Equals(corpus.ElementAt(j).Key)) { Ewb = Ewb + (item.ElementAt(i).Value *corpus.ElementAt(j).Value); } else { continue; } } } cosSimilarity = Ewb / (Math.Sqrt(Ew2) * Math.Sqrt(Eb2)); return(cosSimilarity); }
/// <summary> /// 欧氏距离相似度 /// </summary> /// <param name="itemName">商品名</param> /// <param name="Corpus">语料</param> /// <returns></returns> public static double Euclidean(string itemName, string Corpus) { double sum = 0, same = 0; double similarity = 0; string splitword = SplitWordHelper.RemoveSplitWord(SplitWordHelper.SplitWords(itemName)); string[] swarray = splitword.Split(' ').Where(s => !string.IsNullOrEmpty(s)).ToArray(); Dictionary <string, double> item = new Dictionary <string, double>(); TFIDFJson tfidf = JsonConvert.DeserializeObject <TFIDFJson>(Corpus); List <KeyValuePair <string, double> > corpus = tfidf.Data; foreach (string sw in swarray) { if (!string.IsNullOrEmpty(sw)) { string temp = splitword.Replace(" ", ""); MatchCollection m = Regex.Matches(temp, sw); if (!item.ContainsKey(sw)) { double tf = (double)m.Count / swarray.Length * 100000; int input = (int)tf; item.Add(sw, input); } } } for (int i = 0; i < item.Count; i++) { sum += Math.Pow(item.ElementAt(i).Value, 2); for (int j = 0; j < corpus.Count; j++) { if (i == 0) { sum += Math.Pow(corpus.ElementAt(j).Value, 2); } if (item.ElementAt(i).Key.Equals(corpus.ElementAt(j).Key)) { sum -= (Math.Pow(item.ElementAt(i).Value, 2) + Math.Pow(corpus.ElementAt(j).Value, 2)); same = Math.Pow((item.ElementAt(i).Value - corpus.ElementAt(j).Value), 2); sum += same; } } } similarity = Math.Sqrt(sum); return(similarity); }