Exemple #1
0
        /// <summary>
        /// Return the similarity of two given words with a taxonomy.
        /// </summary>
        /// <param name="word1"></param>
        /// <param name="word2"></param>
        /// <param name="strategy"></param>
        /// <returns></returns>
        float GetSimilarity(HierarchicalWordData word1, HierarchicalWordData word2, int strategy)
        {
            if (word1.WordInfo.Pos != word2.WordInfo.Pos || word1.WordInfo.Pos == PartsOfSpeech.Unknown) return 0.0F;
            if (word1.WordInfo.Word == word2.WordInfo.Word) return 1.0F;

            int pathLength, lcaDepth, depth_1, depth_2;
            FindLeastCommonAncestor(new HierarchicalWordData[2] { word1, word2 }, out pathLength, out lcaDepth, out depth_1, out depth_2);
             
            if (pathLength == int.MaxValue) return 0.0F;
            float sim=0.0F;
            if (strategy == 1)//Path Length
            {

                if (pathLength == 0) return 1.0F;
            	else                	            	
                    sim=1.0F / (float)pathLength;
            }
            else
                if (strategy == 2) //Wu & Palmer
                {
                    if (pathLength == 0) return 1.0F;
                    else                                                
                        sim=(float)(lcaDepth) / (float)(depth_1 + depth_2);                            
                }

            return (float)Math.Round(sim, 2);
        }
Exemple #2
0
        //MyWordInfo[] _myWordsInfo_i, _myWordsInfo_j;
        //private void MyInitOld()
        //{
        //    _myWordsInfo1 = Disambiguate(_source);
        //    _myWordsInfo2 = Disambiguate(_target);

        //    m = _myWordsInfo1.Length; n = _myWordsInfo2.Length;
        //    _similarity =new float[m, n] ;

        //    for (int i=0; i < m; i++)
        //    {
        //        _myWordsInfo1[i].Sense = _myWordsInfo1[i].Sense < 0 ? 0 : _myWordsInfo1[i].Sense;

        //        string word1 = _source[i];
        //        for (int j=0; j < n; j++)
        //        {
        //            _myWordsInfo2[i].Sense = _myWordsInfo2[i].Sense < 0 ? 0 : _myWordsInfo2[i].Sense;

        //            string word2=_target[j];
        //            WordDistance distance = new WordDistance();
        //            float weight = distance.GetSimilarity(_myWordsInfo1[i], _myWordsInfo2[j]);

        //            _similarity[i, j]=weight;
        //        }
        //    }
        //}



        float[][] GetSimilarityMatrix(string[] string1, string[] string2)
        {
            m = string1.Length; n = string2.Length;
            float[][] simMatrix = new float[m][];

            Wnlib.PartsOfSpeech[]    POSEnum    = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech));
            HierarchicalWordData[][] wordData_1 = new HierarchicalWordData[m][];
            HierarchicalWordData[][] wordData_2 = new HierarchicalWordData[n][];
            for (int i = 0; i < m; i++)
            {
                simMatrix[i] = new float[n];
            }

            for (int i = 0; i < m; i++)
            {
                wordData_1[i] = new HierarchicalWordData[POSEnum.Length];
            }
            for (int j = 0; j < n; j++)
            {
                wordData_2[j] = new HierarchicalWordData[POSEnum.Length];
            }

            for (int i = 0; i < m; i++)
            {
                for (int j = 0; j < n; j++)
                {
                    float synDist = AcronymChecker.GetEditDistanceSimilarity(string1[i], string2[j]);

                    for (int partOfSpeech = 1; partOfSpeech < POSEnum.Length; partOfSpeech++)
                    {
                        if (wordData_1[i][partOfSpeech] == null)
                        {
                            MyWordInfo myWordsInfo_i = new MyWordInfo(string1[i], POSEnum[partOfSpeech]);
                            wordData_1[i][partOfSpeech] = new HierarchicalWordData(myWordsInfo_i);
                        }
                        if (wordData_2[j][partOfSpeech] == null)
                        {
                            MyWordInfo myWordsInfo_j = new MyWordInfo(string2[j], POSEnum[partOfSpeech]);
                            wordData_2[j][partOfSpeech] = new HierarchicalWordData(myWordsInfo_j);
                        }

                        WordSimilarity wordDistance = new WordSimilarity();
                        float          semDist      = wordDistance.GetSimilarity(wordData_1[i][partOfSpeech], wordData_2[j][partOfSpeech]);
                        float          weight       = Math.Max(synDist, semDist);
                        if (simMatrix[i][j] < weight)
                        {
                            simMatrix[i][j] = weight;
                        }
                    }
                }
            }

            return(simMatrix);
        }
Exemple #3
0
        public float GetSimilarity(string word1, string word2)
        {
            Wnlib.PartsOfSpeech[] POSEnum = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech));

            float minSim = float.MaxValue;
            for (int partOfSpeech = 1; partOfSpeech < POSEnum.Length; partOfSpeech++)
            {
                HierarchicalWordData data_1 = new HierarchicalWordData(new MyWordInfo(word1, POSEnum[partOfSpeech]));
                HierarchicalWordData data_2 = new HierarchicalWordData(new MyWordInfo(word2, POSEnum[partOfSpeech]));
                float sim=GetSimilarity(data_1, data_2 );
                if (minSim > sim) minSim = sim;
            }
            return minSim;
        }
Exemple #4
0
 public float GetSimilarity(HierarchicalWordData word1, HierarchicalWordData word2)
 {
     return(GetSimilarity(word1, word2, 2));
 }
        /// <summary>
        /// Return the similarity of two given words with a taxonomy.
        /// </summary>
        /// <param name="word1"></param>
        /// <param name="word2"></param>
        /// <param name="strategy"></param>
        /// <returns></returns>
        float GetSimilarity(HierarchicalWordData word1, HierarchicalWordData word2, int strategy)
        {
            if (word1.WordInfo.Pos != word2.WordInfo.Pos || word1.WordInfo.Pos == PartsOfSpeech.Unknown) return 0.0F;
            if (word1.WordInfo.Word == word2.WordInfo.Word) return 1.0F;

            int pathLength, lcaDepth, depth_1, depth_2;
            FindLeastCommonAncestor(new HierarchicalWordData[2] { word1, word2 }, out pathLength, out lcaDepth, out depth_1, out depth_2);
             
            if (pathLength == int.MaxValue) return 0.0F;
            float sim=0.0F;
            if (strategy == 1)//Path Length
            {

                if (pathLength == 0) return 1.0F;
            	else                	            	
                    sim=1.0F / (float)pathLength;
            }
            else
                if (strategy == 2) //Wu & Palmer
                {
                    if (pathLength == 0) return 1.0F;
                    else                                                
                        sim=(float)(lcaDepth) / (float)(depth_1 + depth_2);                            
                }

            return (float)Math.Round(sim, 2);
        }
 public float GetSimilarity(HierarchicalWordData word1, HierarchicalWordData word2)
 {
     return GetSimilarity(word1, word2, 2);
 }
        public float GetSimilarity(string word1, string word2)
        {
            Wnlib.PartsOfSpeech[] POSEnum = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech));

            float minSim = float.MaxValue;
            for (int partOfSpeech = 1; partOfSpeech < POSEnum.Length; partOfSpeech++)
            {
                HierarchicalWordData data_1 = new HierarchicalWordData(new MyWordInfo(word1, POSEnum[partOfSpeech]));
                HierarchicalWordData data_2 = new HierarchicalWordData(new MyWordInfo(word2, POSEnum[partOfSpeech]));
                float sim=GetSimilarity(data_1, data_2 );
                if (minSim > sim) minSim = sim;
            }
            return minSim;
        }
        /// <summary>
        /// Return the least common ancestor/subsummer of two words
        /// No unique "join root node" is at present in use. 
        /// </summary>
        /// <param name="words"></param>
        /// <param name="distance"></param>
        /// <param name="lcaDepth"></param>
        /// <param name="depth1"></param>
        /// <param name="depth2"></param>
        /// <returns></returns>
        public long FindLeastCommonAncestor(HierarchicalWordData[] words, out int distance, out int lcaDepth, out int depth1, out int depth2)
        {            
            long LCA = -1;
            lcaDepth = -1;
            depth1 = -1;
            depth2 = -1;

            distance = int.MaxValue;
            int i=-1;
            while (++i < 1 && LCA == -1)
            {
                IDictionaryEnumerator trackEnum = words[1 - i].Distance.GetEnumerator();
                if (trackEnum == null) return -1;
                while (trackEnum.MoveNext())
                {
                    int commonAcestor = (int)trackEnum.Key;
                    if (words[i].Distance.ContainsKey(commonAcestor))
                    {
                        int dis_1 = words[i].GetDistance (commonAcestor);
                        int dis_2 = words[1 - i].GetDistance(commonAcestor);

                        int len = dis_1 + dis_2 - 1;
                        if (distance > len)
                        {
                            int lcaDepth_1 = words[i].GetDepth(commonAcestor);
                            int lcaDepth_2 = words[1 - i].GetDepth(commonAcestor);
                            lcaDepth = lcaDepth_1 + lcaDepth_2;
                            depth1 = dis_1 + lcaDepth_1 - 1;
                            depth2 = dis_2 + lcaDepth_2 - 1;
                            distance = len;                            
                            LCA = commonAcestor;
                        }
                    }
                }
            }

            return LCA;
        }
        private string Replace(string[] words, int index)
        {
            string word = words[index];

            WordNetEngine.POS wnepos = StaticHelper.GetWordNetEnginePOS(word);

            if (wnepos == WordNetEngine.POS.None) return word;

            PartsOfSpeech wnlibpos = StaticHelper.GetWnlibPOSFromWordNetEnginePOS(wnepos);

            LAIR.ResourceAPIs.WordNet.SynSet[] synsets = wne.GetSynSets(word, wnepos).ToArray();

            double currentSimilarity = THRESHOLD;
            List<string> possibleWords = new List<string>();

            string sentence = StaticHelper.composer.ComposeSentence(words);

            string[] tempWords = words;

            foreach (LAIR.ResourceAPIs.WordNet.SynSet synset in synsets)
            {
                foreach(string possibleWord in synset.Words)
                {
                    if (word.Equals(possibleWord, StringComparison.CurrentCultureIgnoreCase)) continue;

                    tempWords[index] = StaticHelper.WithoutUnderScore(possibleWord);
                    string newSentence = StaticHelper.composer.ComposeSentence(tempWords);

                    SentenceSimilarity ss = new SentenceSimilarity();

                    double similarity = ss.GetScore(sentence, newSentence);

                    if(similarity >= currentSimilarity)
                    {
                        possibleWords.Add(StaticHelper.WithoutUnderScore(possibleWord));
                    }
                }
            }

            words[index] = word;

            if (possibleWords.Count > 1)
            {
                WordSimilarity ws = new WordSimilarity();

                HierarchicalWordData defaultWord = new HierarchicalWordData(new MyWordInfo(word, StaticHelper.GetWnlibPOS(word)));

                currentSimilarity = 0;
                string replacement = word;

                foreach(string possibleWord in possibleWords)
                {
                    HierarchicalWordData newWord = new HierarchicalWordData(new MyWordInfo(possibleWord, wnlibpos));

                    double similarity = ws.GetSimilarity(defaultWord, newWord);

                    if(similarity > currentSimilarity)
                    {
                        replacement = possibleWord;
                        currentSimilarity = similarity;
                    }
                }

                return replacement;
            }
            else if (possibleWords.Count == 1) return possibleWords.ToArray()[0];
            else
            {
                return word;
            }
        }
        //MyWordInfo[] _myWordsInfo_i, _myWordsInfo_j;        
        //private void MyInitOld()
        //{
        //    _myWordsInfo1 = Disambiguate(_source);
        //    _myWordsInfo2 = Disambiguate(_target);

        //    m = _myWordsInfo1.Length; n = _myWordsInfo2.Length;
        //    _similarity =new float[m, n] ;

        //    for (int i=0; i < m; i++)
        //    {
        //        _myWordsInfo1[i].Sense = _myWordsInfo1[i].Sense < 0 ? 0 : _myWordsInfo1[i].Sense;                

        //        string word1 = _source[i];
        //        for (int j=0; j < n; j++)
        //        {
        //            _myWordsInfo2[i].Sense = _myWordsInfo2[i].Sense < 0 ? 0 : _myWordsInfo2[i].Sense;					

        //            string word2=_target[j];
        //            WordDistance distance = new WordDistance();
        //            float weight = distance.GetSimilarity(_myWordsInfo1[i], _myWordsInfo2[j]);					

        //            _similarity[i, j]=weight;					
        //        }
        //    }
        //}



        float[][] GetSimilarityMatrix(string[] string1, string[] string2)
        {
            m = string1.Length; n = string2.Length;            
            float[][] simMatrix = new float[m][];            
            
            Wnlib.PartsOfSpeech[] POSEnum = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech));
        	HierarchicalWordData[][] wordData_1 = new HierarchicalWordData[m][];
        	HierarchicalWordData[][] wordData_2 = new HierarchicalWordData[n][];
            for (int i = 0; i < m; i++) 
                simMatrix[i] = new float[n];

            for (int i = 0; i < m; i++)
                wordData_1[i] = new HierarchicalWordData[POSEnum.Length];
            for (int j = 0; j < n; j++)
                wordData_2[j] = new HierarchicalWordData[POSEnum.Length];

            for (int i = 0; i < m; i++)             
            {                                                                                
                for (int j = 0; j < n; j++)
                {
                    float synDist = AcronymChecker.GetEditDistanceSimilarity(string1[i], string2[j]);

                    for (int partOfSpeech = 1; partOfSpeech < POSEnum.Length; partOfSpeech++)
                    {
                         if (wordData_1[i][partOfSpeech] == null)
                         {
                             MyWordInfo myWordsInfo_i = new MyWordInfo(string1[i], POSEnum[partOfSpeech]);
                             wordData_1[i][partOfSpeech] = new HierarchicalWordData(myWordsInfo_i);
                         }
                         if (wordData_2[j][partOfSpeech] == null)
                         {
                             MyWordInfo myWordsInfo_j = new MyWordInfo(string2[j], POSEnum[partOfSpeech]);
                             wordData_2[j][partOfSpeech] = new HierarchicalWordData(myWordsInfo_j);
                         }

                         WordSimilarity wordDistance = new WordSimilarity();
                         float semDist = wordDistance.GetSimilarity(wordData_1[i][partOfSpeech], wordData_2[j][partOfSpeech]);
                         float weight = Math.Max(synDist, semDist);
                         if (simMatrix[i][j] < weight)
                             simMatrix[i][j] = weight;                    
                    }
                }                                    
            }            
         
         return simMatrix;
      }