/// <summary> /// Return the similarity of two given words with a taxonomy. /// </summary> /// <param name="word1"></param> /// <param name="word2"></param> /// <param name="strategy"></param> /// <returns></returns> float GetSimilarity(HierarchicalWordData word1, HierarchicalWordData word2, int strategy) { if (word1.WordInfo.Pos != word2.WordInfo.Pos || word1.WordInfo.Pos == PartsOfSpeech.Unknown) return 0.0F; if (word1.WordInfo.Word == word2.WordInfo.Word) return 1.0F; int pathLength, lcaDepth, depth_1, depth_2; FindLeastCommonAncestor(new HierarchicalWordData[2] { word1, word2 }, out pathLength, out lcaDepth, out depth_1, out depth_2); if (pathLength == int.MaxValue) return 0.0F; float sim=0.0F; if (strategy == 1)//Path Length { if (pathLength == 0) return 1.0F; else sim=1.0F / (float)pathLength; } else if (strategy == 2) //Wu & Palmer { if (pathLength == 0) return 1.0F; else sim=(float)(lcaDepth) / (float)(depth_1 + depth_2); } return (float)Math.Round(sim, 2); }
//MyWordInfo[] _myWordsInfo_i, _myWordsInfo_j; //private void MyInitOld() //{ // _myWordsInfo1 = Disambiguate(_source); // _myWordsInfo2 = Disambiguate(_target); // m = _myWordsInfo1.Length; n = _myWordsInfo2.Length; // _similarity =new float[m, n] ; // for (int i=0; i < m; i++) // { // _myWordsInfo1[i].Sense = _myWordsInfo1[i].Sense < 0 ? 0 : _myWordsInfo1[i].Sense; // string word1 = _source[i]; // for (int j=0; j < n; j++) // { // _myWordsInfo2[i].Sense = _myWordsInfo2[i].Sense < 0 ? 0 : _myWordsInfo2[i].Sense; // string word2=_target[j]; // WordDistance distance = new WordDistance(); // float weight = distance.GetSimilarity(_myWordsInfo1[i], _myWordsInfo2[j]); // _similarity[i, j]=weight; // } // } //} float[][] GetSimilarityMatrix(string[] string1, string[] string2) { m = string1.Length; n = string2.Length; float[][] simMatrix = new float[m][]; Wnlib.PartsOfSpeech[] POSEnum = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech)); HierarchicalWordData[][] wordData_1 = new HierarchicalWordData[m][]; HierarchicalWordData[][] wordData_2 = new HierarchicalWordData[n][]; for (int i = 0; i < m; i++) { simMatrix[i] = new float[n]; } for (int i = 0; i < m; i++) { wordData_1[i] = new HierarchicalWordData[POSEnum.Length]; } for (int j = 0; j < n; j++) { wordData_2[j] = new HierarchicalWordData[POSEnum.Length]; } for (int i = 0; i < m; i++) { for (int j = 0; j < n; j++) { float synDist = AcronymChecker.GetEditDistanceSimilarity(string1[i], string2[j]); for (int partOfSpeech = 1; partOfSpeech < POSEnum.Length; partOfSpeech++) { if (wordData_1[i][partOfSpeech] == null) { MyWordInfo myWordsInfo_i = new MyWordInfo(string1[i], POSEnum[partOfSpeech]); wordData_1[i][partOfSpeech] = new HierarchicalWordData(myWordsInfo_i); } if (wordData_2[j][partOfSpeech] == null) { MyWordInfo myWordsInfo_j = new MyWordInfo(string2[j], POSEnum[partOfSpeech]); wordData_2[j][partOfSpeech] = new HierarchicalWordData(myWordsInfo_j); } WordSimilarity wordDistance = new WordSimilarity(); float semDist = wordDistance.GetSimilarity(wordData_1[i][partOfSpeech], wordData_2[j][partOfSpeech]); float weight = Math.Max(synDist, semDist); if (simMatrix[i][j] < weight) { simMatrix[i][j] = weight; } } } } return(simMatrix); }
public float GetSimilarity(string word1, string word2) { Wnlib.PartsOfSpeech[] POSEnum = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech)); float minSim = float.MaxValue; for (int partOfSpeech = 1; partOfSpeech < POSEnum.Length; partOfSpeech++) { HierarchicalWordData data_1 = new HierarchicalWordData(new MyWordInfo(word1, POSEnum[partOfSpeech])); HierarchicalWordData data_2 = new HierarchicalWordData(new MyWordInfo(word2, POSEnum[partOfSpeech])); float sim=GetSimilarity(data_1, data_2 ); if (minSim > sim) minSim = sim; } return minSim; }
public float GetSimilarity(HierarchicalWordData word1, HierarchicalWordData word2) { return(GetSimilarity(word1, word2, 2)); }
/// <summary> /// Return the similarity of two given words with a taxonomy. /// </summary> /// <param name="word1"></param> /// <param name="word2"></param> /// <param name="strategy"></param> /// <returns></returns> float GetSimilarity(HierarchicalWordData word1, HierarchicalWordData word2, int strategy) { if (word1.WordInfo.Pos != word2.WordInfo.Pos || word1.WordInfo.Pos == PartsOfSpeech.Unknown) return 0.0F; if (word1.WordInfo.Word == word2.WordInfo.Word) return 1.0F; int pathLength, lcaDepth, depth_1, depth_2; FindLeastCommonAncestor(new HierarchicalWordData[2] { word1, word2 }, out pathLength, out lcaDepth, out depth_1, out depth_2); if (pathLength == int.MaxValue) return 0.0F; float sim=0.0F; if (strategy == 1)//Path Length { if (pathLength == 0) return 1.0F; else sim=1.0F / (float)pathLength; } else if (strategy == 2) //Wu & Palmer { if (pathLength == 0) return 1.0F; else sim=(float)(lcaDepth) / (float)(depth_1 + depth_2); } return (float)Math.Round(sim, 2); }
public float GetSimilarity(HierarchicalWordData word1, HierarchicalWordData word2) { return GetSimilarity(word1, word2, 2); }
public float GetSimilarity(string word1, string word2) { Wnlib.PartsOfSpeech[] POSEnum = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech)); float minSim = float.MaxValue; for (int partOfSpeech = 1; partOfSpeech < POSEnum.Length; partOfSpeech++) { HierarchicalWordData data_1 = new HierarchicalWordData(new MyWordInfo(word1, POSEnum[partOfSpeech])); HierarchicalWordData data_2 = new HierarchicalWordData(new MyWordInfo(word2, POSEnum[partOfSpeech])); float sim=GetSimilarity(data_1, data_2 ); if (minSim > sim) minSim = sim; } return minSim; }
/// <summary> /// Return the least common ancestor/subsummer of two words /// No unique "join root node" is at present in use. /// </summary> /// <param name="words"></param> /// <param name="distance"></param> /// <param name="lcaDepth"></param> /// <param name="depth1"></param> /// <param name="depth2"></param> /// <returns></returns> public long FindLeastCommonAncestor(HierarchicalWordData[] words, out int distance, out int lcaDepth, out int depth1, out int depth2) { long LCA = -1; lcaDepth = -1; depth1 = -1; depth2 = -1; distance = int.MaxValue; int i=-1; while (++i < 1 && LCA == -1) { IDictionaryEnumerator trackEnum = words[1 - i].Distance.GetEnumerator(); if (trackEnum == null) return -1; while (trackEnum.MoveNext()) { int commonAcestor = (int)trackEnum.Key; if (words[i].Distance.ContainsKey(commonAcestor)) { int dis_1 = words[i].GetDistance (commonAcestor); int dis_2 = words[1 - i].GetDistance(commonAcestor); int len = dis_1 + dis_2 - 1; if (distance > len) { int lcaDepth_1 = words[i].GetDepth(commonAcestor); int lcaDepth_2 = words[1 - i].GetDepth(commonAcestor); lcaDepth = lcaDepth_1 + lcaDepth_2; depth1 = dis_1 + lcaDepth_1 - 1; depth2 = dis_2 + lcaDepth_2 - 1; distance = len; LCA = commonAcestor; } } } } return LCA; }
private string Replace(string[] words, int index) { string word = words[index]; WordNetEngine.POS wnepos = StaticHelper.GetWordNetEnginePOS(word); if (wnepos == WordNetEngine.POS.None) return word; PartsOfSpeech wnlibpos = StaticHelper.GetWnlibPOSFromWordNetEnginePOS(wnepos); LAIR.ResourceAPIs.WordNet.SynSet[] synsets = wne.GetSynSets(word, wnepos).ToArray(); double currentSimilarity = THRESHOLD; List<string> possibleWords = new List<string>(); string sentence = StaticHelper.composer.ComposeSentence(words); string[] tempWords = words; foreach (LAIR.ResourceAPIs.WordNet.SynSet synset in synsets) { foreach(string possibleWord in synset.Words) { if (word.Equals(possibleWord, StringComparison.CurrentCultureIgnoreCase)) continue; tempWords[index] = StaticHelper.WithoutUnderScore(possibleWord); string newSentence = StaticHelper.composer.ComposeSentence(tempWords); SentenceSimilarity ss = new SentenceSimilarity(); double similarity = ss.GetScore(sentence, newSentence); if(similarity >= currentSimilarity) { possibleWords.Add(StaticHelper.WithoutUnderScore(possibleWord)); } } } words[index] = word; if (possibleWords.Count > 1) { WordSimilarity ws = new WordSimilarity(); HierarchicalWordData defaultWord = new HierarchicalWordData(new MyWordInfo(word, StaticHelper.GetWnlibPOS(word))); currentSimilarity = 0; string replacement = word; foreach(string possibleWord in possibleWords) { HierarchicalWordData newWord = new HierarchicalWordData(new MyWordInfo(possibleWord, wnlibpos)); double similarity = ws.GetSimilarity(defaultWord, newWord); if(similarity > currentSimilarity) { replacement = possibleWord; currentSimilarity = similarity; } } return replacement; } else if (possibleWords.Count == 1) return possibleWords.ToArray()[0]; else { return word; } }
//MyWordInfo[] _myWordsInfo_i, _myWordsInfo_j; //private void MyInitOld() //{ // _myWordsInfo1 = Disambiguate(_source); // _myWordsInfo2 = Disambiguate(_target); // m = _myWordsInfo1.Length; n = _myWordsInfo2.Length; // _similarity =new float[m, n] ; // for (int i=0; i < m; i++) // { // _myWordsInfo1[i].Sense = _myWordsInfo1[i].Sense < 0 ? 0 : _myWordsInfo1[i].Sense; // string word1 = _source[i]; // for (int j=0; j < n; j++) // { // _myWordsInfo2[i].Sense = _myWordsInfo2[i].Sense < 0 ? 0 : _myWordsInfo2[i].Sense; // string word2=_target[j]; // WordDistance distance = new WordDistance(); // float weight = distance.GetSimilarity(_myWordsInfo1[i], _myWordsInfo2[j]); // _similarity[i, j]=weight; // } // } //} float[][] GetSimilarityMatrix(string[] string1, string[] string2) { m = string1.Length; n = string2.Length; float[][] simMatrix = new float[m][]; Wnlib.PartsOfSpeech[] POSEnum = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech)); HierarchicalWordData[][] wordData_1 = new HierarchicalWordData[m][]; HierarchicalWordData[][] wordData_2 = new HierarchicalWordData[n][]; for (int i = 0; i < m; i++) simMatrix[i] = new float[n]; for (int i = 0; i < m; i++) wordData_1[i] = new HierarchicalWordData[POSEnum.Length]; for (int j = 0; j < n; j++) wordData_2[j] = new HierarchicalWordData[POSEnum.Length]; for (int i = 0; i < m; i++) { for (int j = 0; j < n; j++) { float synDist = AcronymChecker.GetEditDistanceSimilarity(string1[i], string2[j]); for (int partOfSpeech = 1; partOfSpeech < POSEnum.Length; partOfSpeech++) { if (wordData_1[i][partOfSpeech] == null) { MyWordInfo myWordsInfo_i = new MyWordInfo(string1[i], POSEnum[partOfSpeech]); wordData_1[i][partOfSpeech] = new HierarchicalWordData(myWordsInfo_i); } if (wordData_2[j][partOfSpeech] == null) { MyWordInfo myWordsInfo_j = new MyWordInfo(string2[j], POSEnum[partOfSpeech]); wordData_2[j][partOfSpeech] = new HierarchicalWordData(myWordsInfo_j); } WordSimilarity wordDistance = new WordSimilarity(); float semDist = wordDistance.GetSimilarity(wordData_1[i][partOfSpeech], wordData_2[j][partOfSpeech]); float weight = Math.Max(synDist, semDist); if (simMatrix[i][j] < weight) simMatrix[i][j] = weight; } } } return simMatrix; }