//MyWordInfo[] _myWordsInfo_i, _myWordsInfo_j; //private void MyInitOld() //{ // _myWordsInfo1 = Disambiguate(_source); // _myWordsInfo2 = Disambiguate(_target); // m = _myWordsInfo1.Length; n = _myWordsInfo2.Length; // _similarity =new float[m, n] ; // for (int i=0; i < m; i++) // { // _myWordsInfo1[i].Sense = _myWordsInfo1[i].Sense < 0 ? 0 : _myWordsInfo1[i].Sense; // string word1 = _source[i]; // for (int j=0; j < n; j++) // { // _myWordsInfo2[i].Sense = _myWordsInfo2[i].Sense < 0 ? 0 : _myWordsInfo2[i].Sense; // string word2=_target[j]; // WordDistance distance = new WordDistance(); // float weight = distance.GetSimilarity(_myWordsInfo1[i], _myWordsInfo2[j]); // _similarity[i, j]=weight; // } // } //} float[][] GetSimilarityMatrix(string[] string1, string[] string2) { m = string1.Length; n = string2.Length; float[][] simMatrix = new float[m][]; Wnlib.PartsOfSpeech[] POSEnum = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech)); HierarchicalWordData[][] wordData_1 = new HierarchicalWordData[m][]; HierarchicalWordData[][] wordData_2 = new HierarchicalWordData[n][]; for (int i = 0; i < m; i++) { simMatrix[i] = new float[n]; } for (int i = 0; i < m; i++) { wordData_1[i] = new HierarchicalWordData[POSEnum.Length]; } for (int j = 0; j < n; j++) { wordData_2[j] = new HierarchicalWordData[POSEnum.Length]; } for (int i = 0; i < m; i++) { for (int j = 0; j < n; j++) { float synDist = AcronymChecker.GetEditDistanceSimilarity(string1[i], string2[j]); for (int partOfSpeech = 1; partOfSpeech < POSEnum.Length; partOfSpeech++) { if (wordData_1[i][partOfSpeech] == null) { MyWordInfo myWordsInfo_i = new MyWordInfo(string1[i], POSEnum[partOfSpeech]); wordData_1[i][partOfSpeech] = new HierarchicalWordData(myWordsInfo_i); } if (wordData_2[j][partOfSpeech] == null) { MyWordInfo myWordsInfo_j = new MyWordInfo(string2[j], POSEnum[partOfSpeech]); wordData_2[j][partOfSpeech] = new HierarchicalWordData(myWordsInfo_j); } WordSimilarity wordDistance = new WordSimilarity(); float semDist = wordDistance.GetSimilarity(wordData_1[i][partOfSpeech], wordData_2[j][partOfSpeech]); float weight = Math.Max(synDist, semDist); if (simMatrix[i][j] < weight) { simMatrix[i][j] = weight; } } } } return(simMatrix); }
private string Replace(string[] words, int index) { string word = words[index]; WordNetEngine.POS wnepos = StaticHelper.GetWordNetEnginePOS(word); if (wnepos == WordNetEngine.POS.None) return word; PartsOfSpeech wnlibpos = StaticHelper.GetWnlibPOSFromWordNetEnginePOS(wnepos); LAIR.ResourceAPIs.WordNet.SynSet[] synsets = wne.GetSynSets(word, wnepos).ToArray(); double currentSimilarity = THRESHOLD; List<string> possibleWords = new List<string>(); string sentence = StaticHelper.composer.ComposeSentence(words); string[] tempWords = words; foreach (LAIR.ResourceAPIs.WordNet.SynSet synset in synsets) { foreach(string possibleWord in synset.Words) { if (word.Equals(possibleWord, StringComparison.CurrentCultureIgnoreCase)) continue; tempWords[index] = StaticHelper.WithoutUnderScore(possibleWord); string newSentence = StaticHelper.composer.ComposeSentence(tempWords); SentenceSimilarity ss = new SentenceSimilarity(); double similarity = ss.GetScore(sentence, newSentence); if(similarity >= currentSimilarity) { possibleWords.Add(StaticHelper.WithoutUnderScore(possibleWord)); } } } words[index] = word; if (possibleWords.Count > 1) { WordSimilarity ws = new WordSimilarity(); HierarchicalWordData defaultWord = new HierarchicalWordData(new MyWordInfo(word, StaticHelper.GetWnlibPOS(word))); currentSimilarity = 0; string replacement = word; foreach(string possibleWord in possibleWords) { HierarchicalWordData newWord = new HierarchicalWordData(new MyWordInfo(possibleWord, wnlibpos)); double similarity = ws.GetSimilarity(defaultWord, newWord); if(similarity > currentSimilarity) { replacement = possibleWord; currentSimilarity = similarity; } } return replacement; } else if (possibleWords.Count == 1) return possibleWords.ToArray()[0]; else { return word; } }
//MyWordInfo[] _myWordsInfo_i, _myWordsInfo_j; //private void MyInitOld() //{ // _myWordsInfo1 = Disambiguate(_source); // _myWordsInfo2 = Disambiguate(_target); // m = _myWordsInfo1.Length; n = _myWordsInfo2.Length; // _similarity =new float[m, n] ; // for (int i=0; i < m; i++) // { // _myWordsInfo1[i].Sense = _myWordsInfo1[i].Sense < 0 ? 0 : _myWordsInfo1[i].Sense; // string word1 = _source[i]; // for (int j=0; j < n; j++) // { // _myWordsInfo2[i].Sense = _myWordsInfo2[i].Sense < 0 ? 0 : _myWordsInfo2[i].Sense; // string word2=_target[j]; // WordDistance distance = new WordDistance(); // float weight = distance.GetSimilarity(_myWordsInfo1[i], _myWordsInfo2[j]); // _similarity[i, j]=weight; // } // } //} float[][] GetSimilarityMatrix(string[] string1, string[] string2) { m = string1.Length; n = string2.Length; float[][] simMatrix = new float[m][]; Wnlib.PartsOfSpeech[] POSEnum = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech)); HierarchicalWordData[][] wordData_1 = new HierarchicalWordData[m][]; HierarchicalWordData[][] wordData_2 = new HierarchicalWordData[n][]; for (int i = 0; i < m; i++) simMatrix[i] = new float[n]; for (int i = 0; i < m; i++) wordData_1[i] = new HierarchicalWordData[POSEnum.Length]; for (int j = 0; j < n; j++) wordData_2[j] = new HierarchicalWordData[POSEnum.Length]; for (int i = 0; i < m; i++) { for (int j = 0; j < n; j++) { float synDist = AcronymChecker.GetEditDistanceSimilarity(string1[i], string2[j]); for (int partOfSpeech = 1; partOfSpeech < POSEnum.Length; partOfSpeech++) { if (wordData_1[i][partOfSpeech] == null) { MyWordInfo myWordsInfo_i = new MyWordInfo(string1[i], POSEnum[partOfSpeech]); wordData_1[i][partOfSpeech] = new HierarchicalWordData(myWordsInfo_i); } if (wordData_2[j][partOfSpeech] == null) { MyWordInfo myWordsInfo_j = new MyWordInfo(string2[j], POSEnum[partOfSpeech]); wordData_2[j][partOfSpeech] = new HierarchicalWordData(myWordsInfo_j); } WordSimilarity wordDistance = new WordSimilarity(); float semDist = wordDistance.GetSimilarity(wordData_1[i][partOfSpeech], wordData_2[j][partOfSpeech]); float weight = Math.Max(synDist, semDist); if (simMatrix[i][j] < weight) simMatrix[i][j] = weight; } } } return simMatrix; }