Ejemplo n.º 1
0
		public MyWordInfo[] FindSynonyms(ref MyWordInfo pos, bool includeMorphs)
		{
			pos.Word = pos.Word.ToLower();
			Wnlib.Index index = Wnlib.Index.lookup( pos.Word, PartOfSpeech.of( pos.Pos  ) );
		
			if( index == null )
			{
				if( !includeMorphs )
					return null;

				Wnlib.MorphStr morphs = new Wnlib.MorphStr( pos.Word, Wnlib.PartOfSpeech.of( pos.Pos  ) );
				string morph = "";
				while( ( morph = morphs.next() ) != null )
				{
					index = Wnlib.Index.lookup( morph, Wnlib.PartOfSpeech.of( pos.Pos  ) );
					pos.Word=morph;
					if( index != null )
						break;
				}
			}

			
			if( index == null )
				return null;
			
			return LookupCandidates( index, pos );
		}
        private MyWordInfo[] Disambiguate(string[] words)
        {
            if (words.Length == 0) return null;

            MyWordInfo[] wordInfos=new MyWordInfo [words.Length];
            
            for (int i = 0; i < words.Length; i++)
            {
                
                WnLexicon.WordInfo wordInfo = WnLexicon.Lexicon.FindWordInfo(words[i], true);

                if (wordInfo.partOfSpeech != Wnlib.PartsOfSpeech.Unknown)
                {
                    if (wordInfo.text != string.Empty)
                        words[i] = wordInfo.text;

                    Wnlib.PartsOfSpeech[] posEnum = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech));

                    for (int j = 0; j < posEnum.Length; j++)
                    {
                        if (wordInfo.senseCounts[j] > 0) // get the first part of speech
                        {
                            wordInfos[i] = new MyWordInfo(words[i], posEnum[j]);                             
                            break;
                        }
                    }
                }
            }

            WordSenseDisambiguator wsd = new WordSenseDisambiguator();
            wordInfos=wsd.Disambiguate(wordInfos);

            return wordInfos;
        }
Ejemplo n.º 3
0
        //MyWordInfo[] _myWordsInfo_i, _myWordsInfo_j;
        //private void MyInitOld()
        //{
        //    _myWordsInfo1 = Disambiguate(_source);
        //    _myWordsInfo2 = Disambiguate(_target);

        //    m = _myWordsInfo1.Length; n = _myWordsInfo2.Length;
        //    _similarity =new float[m, n] ;

        //    for (int i=0; i < m; i++)
        //    {
        //        _myWordsInfo1[i].Sense = _myWordsInfo1[i].Sense < 0 ? 0 : _myWordsInfo1[i].Sense;

        //        string word1 = _source[i];
        //        for (int j=0; j < n; j++)
        //        {
        //            _myWordsInfo2[i].Sense = _myWordsInfo2[i].Sense < 0 ? 0 : _myWordsInfo2[i].Sense;

        //            string word2=_target[j];
        //            WordDistance distance = new WordDistance();
        //            float weight = distance.GetSimilarity(_myWordsInfo1[i], _myWordsInfo2[j]);

        //            _similarity[i, j]=weight;
        //        }
        //    }
        //}



        float[][] GetSimilarityMatrix(string[] string1, string[] string2)
        {
            m = string1.Length; n = string2.Length;
            float[][] simMatrix = new float[m][];

            Wnlib.PartsOfSpeech[]    POSEnum    = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech));
            HierarchicalWordData[][] wordData_1 = new HierarchicalWordData[m][];
            HierarchicalWordData[][] wordData_2 = new HierarchicalWordData[n][];
            for (int i = 0; i < m; i++)
            {
                simMatrix[i] = new float[n];
            }

            for (int i = 0; i < m; i++)
            {
                wordData_1[i] = new HierarchicalWordData[POSEnum.Length];
            }
            for (int j = 0; j < n; j++)
            {
                wordData_2[j] = new HierarchicalWordData[POSEnum.Length];
            }

            for (int i = 0; i < m; i++)
            {
                for (int j = 0; j < n; j++)
                {
                    float synDist = AcronymChecker.GetEditDistanceSimilarity(string1[i], string2[j]);

                    for (int partOfSpeech = 1; partOfSpeech < POSEnum.Length; partOfSpeech++)
                    {
                        if (wordData_1[i][partOfSpeech] == null)
                        {
                            MyWordInfo myWordsInfo_i = new MyWordInfo(string1[i], POSEnum[partOfSpeech]);
                            wordData_1[i][partOfSpeech] = new HierarchicalWordData(myWordsInfo_i);
                        }
                        if (wordData_2[j][partOfSpeech] == null)
                        {
                            MyWordInfo myWordsInfo_j = new MyWordInfo(string2[j], POSEnum[partOfSpeech]);
                            wordData_2[j][partOfSpeech] = new HierarchicalWordData(myWordsInfo_j);
                        }

                        WordSimilarity wordDistance = new WordSimilarity();
                        float          semDist      = wordDistance.GetSimilarity(wordData_1[i][partOfSpeech], wordData_2[j][partOfSpeech]);
                        float          weight       = Math.Max(synDist, semDist);
                        if (simMatrix[i][j] < weight)
                        {
                            simMatrix[i][j] = weight;
                        }
                    }
                }
            }

            return(simMatrix);
        }
Ejemplo n.º 4
0
		private int[][][][] _scores;//[i][alter_i][j][alter_j]

		public SimilarGenerator(MyWordInfo[] pos, string originalSentence)
		{
			_myPos=pos;
			_originalSentence=originalSentence;
			MyInit();			
			Generate();
			list=k_best_sentence;
			
			//
			// TODO: Add constructor logic here
			//
		}
Ejemplo n.º 5
0
        private MyWordInfo[] Disambiguate(string[] words)
        {
            if (words.Length == 0)
            {
                return(null);
            }

            MyWordInfo[] wordInfos = new MyWordInfo [words.Length];

            for (int i = 0; i < words.Length; i++)
            {
                MyWnLexicon.WSDWordInfo wordInfo = MyWnLexicon.Lexicon.FindWordInfo(words[i], true);

                if (wordInfo.partOfSpeech != Wnlib.PartsOfSpeech.Unknown)
                {
                    if (wordInfo.text != string.Empty)
                    {
                        words[i] = wordInfo.text;
                    }

                    Wnlib.PartsOfSpeech[] posEnum = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech));

                    for (int j = 0; j < posEnum.Length; j++)
                    {
                        if (wordInfo.senseCounts[j] > 0) // get the first part of speech
                        {
                            wordInfos[i] = new MyWordInfo(words[i], posEnum[j]);
                            break;
                        }
                    }
                }
            }

            WordSenseDisambiguator wsd = new WordSenseDisambiguator();

            wordInfos = wsd.Disambiguate(wordInfos);

            return(wordInfos);
        }
Ejemplo n.º 6
0
		private void Add_Sentence(int score)
		{
			++_numItems ;
			MyWordInfo[] pos=new MyWordInfo[_myPos.Length] ;
			string newsen="";
			string[] words=new string[_numWord] ;
			for (int i=0; i <_numWord ; i++)
			{									
				string word=string.Empty ;

				if (_selected[i] == -1 )
				{					
					word=_myPos[i].Word;
				}
				else
				{						
					word=_alterWord[i][_selected[i]].Word;										
				}
				words[i]=word;
				pos[i]=new MyWordInfo(word, _myPos[i].Pos) ;
			}

			newsen=string.Format(_originalSentence, words) ;
			
			if (score > bestScore)
			{
				bestSentence=newsen + " " + score;
				bestScore=score;
			}

			if (k_best_sentence.Count < 500)
			{
				newsen=newsen + " " + score;
				if (!k_best_sentence.Contains(newsen) )
				{
					k_best_sentence.Add(newsen);
					k_best_score.Add(score);
				}
			}
			else
			{
				int min=10000000;
				int rem=-1;
				for (int j=0; j < k_best_sentence.Count ; j++)
				{
					if ((int)k_best_score[j] < min)
					{
						min=(int)k_best_score[j];
						rem=j;
					}
				}
				newsen=newsen + " " + score;
				if (!k_best_sentence.Contains(newsen) && rem != -1)				
				{
					k_best_sentence.RemoveAt(rem) ;
					k_best_score.RemoveAt(rem) ;

					k_best_sentence.Add(newsen);
					k_best_score.Add(score);					
				}

			}							
			
		}
Ejemplo n.º 7
0
		private int InitialSentence(out MyWordInfo[] current)
		{
			current=new MyWordInfo[_myPos.Length] ;
			int overall=0;
			for (int i=0; i < current.Length; i++)
				if (_alterWord[i] != null && _alterWord[i].Length > 0)
			{
				for (int j = 0; j < _alterWord[i].Length; j++)
				{
					_selected[i]=j;
					_contextWords[i]=_alterWord[i][j];	
				
					if (Read_WordSenseInfo (i))
					{
						int score=ComputeScore(i);				
						overall +=score;

						break;
					}

				}
			}

			return overall;
		}
Ejemplo n.º 8
0
		private int GetNeighbour(MyWordInfo[] current, out MyWordInfo[] trial)
		{			
			trial=(MyWordInfo[])current.Clone() ;

			int wordIndex=random.Next(current.Length);
			if (_alterWord[wordIndex] != null )
			{
				int candIndex=random.Next(_alterWord[wordIndex].Length) ;
				
				_selected[wordIndex]=candIndex;
				_contextWords[wordIndex]=_alterWord[wordIndex][candIndex];
				
				if (!Read_WordSenseInfo (wordIndex))  return -1;

				trial[wordIndex]=_alterWord[wordIndex][candIndex];
				int overall=0;
				for (int i=0; i < trial.Length; i++)
				{
					overall += ComputeScore (i);
				}

				return overall;
			}

			return -1;
		}
Ejemplo n.º 9
0
		private MyWordInfo[] LookupCandidates(Wnlib.Index index, MyWordInfo pos )
		{						
			if (pos.Sense < 0) pos.Sense=1;						
			SynSet synset=new Wnlib.SynSet( index.offs[pos.Sense - 1 ], index.pos , index.wd, null , pos.Sense - 1);					
						
			ArrayList lexemes=new ArrayList() ;
			ArrayList synIndex=new ArrayList() ;

			foreach (Lexeme obj in synset.words)
			{
				lexemes.Add(obj) ;
				synIndex.Add(index.offs[pos.Sense - 1 ]);
			}
			
			if (index.offs.Length > 1)
			{
				if (lexemes.Count <= 1)
				{
					for(int i=0; i < index.offs.Length; i++ )
					{				
						synset=new Wnlib.SynSet( index.offs[i], index.pos, index.wd, null, i );

						foreach (Lexeme obj in synset.words)
						{
							synIndex.Add(index.offs[i]);
							lexemes.Add(obj) ;
						}
					}
				}
				else
				{
					synset=new Wnlib.SynSet( index.offs[0], index.pos, index.wd, null, 0 );
					int count=0; //get top most frequency word senses
					foreach (Lexeme obj in synset.words)
					{
						lexemes.Add(obj) ;
						synIndex.Add(index.offs[0]);
						++count;
						if (count > 4) break;
					}

				}
			}
			
			ArrayList sortedSet=new ArrayList() ;
			Hashtable trace=new Hashtable() ;
			int hasSem=0;
			for (int i = 0; i < lexemes.Count; i++)
			{				
				Lexeme word=(Lexeme)lexemes[i];				
				word.word=word.word.ToLower() ;

				int senIndex=(int)synIndex[i];
				if (senIndex != -1  && word.wnsns > 0)
				{
					word.semcor=new Wnlib.SemCor(word, senIndex);
					lexemes[i]=word;					
					++hasSem;
				}

				if (!trace.ContainsKey(word.word) )					
				{					
					if ((word.semcor != null &&  word.semcor.semcor  > 0 ) || (hasSem < 4))
					{
						trace[word.word]=1;
						sortedSet.Add(word) ;
					}
				}
				//catch
				{}
			}
			
			Lexeme[] words=(Lexeme[])sortedSet.ToArray( typeof(Lexeme) );						

			ArrayList candidates=new ArrayList();

			for( int i=0; i < words.Length; i++ )
			{
				string word=words[i].word.Replace("_", " " );				
				if( word[0] <= 'Z' ) continue;

				MyWordInfo newpos=new MyWordInfo(word, pos.Pos) ;
				newpos.Sense=words[i].wnsns;
				if (words[i].semcor != null)
					newpos.Frequency=words[i].semcor.semcor;
				else
					newpos.Frequency=0;

				candidates.Add( newpos);								
			}

			if (!trace.ContainsKey (index.wd))
				candidates.Add(pos) ;

			if (candidates.Count > 1)
			{
				CompareLexeme comparer=new CompareLexeme();
				candidates.Sort(comparer);
			}
			

			return (MyWordInfo[])candidates.ToArray( typeof(MyWordInfo) );
		}
		public MyWordInfo[] Disambiguate(MyWordInfo[] words)
		{			
			_contextWords=words;
			MyInit();
			Scoring_Overlaps();			
			for (int i=0; i < _contextWords.Length ; i++)			
				_contextWords[i].Sense=_bestSenses[i];
			
			return _contextWords;
		}
Ejemplo n.º 11
0
 public HierarchicalWordData(MyWordInfo wordInfo)
 {
     this.WordInfo = wordInfo;
     Build_WordData();
 }
Ejemplo n.º 12
0
 public HierarchicalWordData(MyWordInfo wordInfo)
 {
     this.WordInfo = wordInfo;
     Build_WordData();
 }
Ejemplo n.º 13
0
        //MyWordInfo[] _myWordsInfo_i, _myWordsInfo_j;        
        //private void MyInitOld()
        //{
        //    _myWordsInfo1 = Disambiguate(_source);
        //    _myWordsInfo2 = Disambiguate(_target);

        //    m = _myWordsInfo1.Length; n = _myWordsInfo2.Length;
        //    _similarity =new float[m, n] ;

        //    for (int i=0; i < m; i++)
        //    {
        //        _myWordsInfo1[i].Sense = _myWordsInfo1[i].Sense < 0 ? 0 : _myWordsInfo1[i].Sense;                

        //        string word1 = _source[i];
        //        for (int j=0; j < n; j++)
        //        {
        //            _myWordsInfo2[i].Sense = _myWordsInfo2[i].Sense < 0 ? 0 : _myWordsInfo2[i].Sense;					

        //            string word2=_target[j];
        //            WordDistance distance = new WordDistance();
        //            float weight = distance.GetSimilarity(_myWordsInfo1[i], _myWordsInfo2[j]);					

        //            _similarity[i, j]=weight;					
        //        }
        //    }
        //}



        float[][] GetSimilarityMatrix(string[] string1, string[] string2)
        {
            m = string1.Length; n = string2.Length;            
            float[][] simMatrix = new float[m][];            
            
            Wnlib.PartsOfSpeech[] POSEnum = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech));
        	HierarchicalWordData[][] wordData_1 = new HierarchicalWordData[m][];
        	HierarchicalWordData[][] wordData_2 = new HierarchicalWordData[n][];
            for (int i = 0; i < m; i++) 
                simMatrix[i] = new float[n];

            for (int i = 0; i < m; i++)
                wordData_1[i] = new HierarchicalWordData[POSEnum.Length];
            for (int j = 0; j < n; j++)
                wordData_2[j] = new HierarchicalWordData[POSEnum.Length];

            for (int i = 0; i < m; i++)             
            {                                                                                
                for (int j = 0; j < n; j++)
                {
                    float synDist = AcronymChecker.GetEditDistanceSimilarity(string1[i], string2[j]);

                    for (int partOfSpeech = 1; partOfSpeech < POSEnum.Length; partOfSpeech++)
                    {
                         if (wordData_1[i][partOfSpeech] == null)
                         {
                             MyWordInfo myWordsInfo_i = new MyWordInfo(string1[i], POSEnum[partOfSpeech]);
                             wordData_1[i][partOfSpeech] = new HierarchicalWordData(myWordsInfo_i);
                         }
                         if (wordData_2[j][partOfSpeech] == null)
                         {
                             MyWordInfo myWordsInfo_j = new MyWordInfo(string2[j], POSEnum[partOfSpeech]);
                             wordData_2[j][partOfSpeech] = new HierarchicalWordData(myWordsInfo_j);
                         }

                         WordSimilarity wordDistance = new WordSimilarity();
                         float semDist = wordDistance.GetSimilarity(wordData_1[i][partOfSpeech], wordData_2[j][partOfSpeech]);
                         float weight = Math.Max(synDist, semDist);
                         if (simMatrix[i][j] < weight)
                             simMatrix[i][j] = weight;                    
                    }
                }                                    
            }            
         
         return simMatrix;
      }
Ejemplo n.º 14
0
 public float GetWordNetSimScore(MyWordInfo[] words1, MyWordInfo[] words2, string key)
 {
     float[][] simMatrix = SentenceSim.GetSimilarityMatrix(words1, words2);
     return GetSimScore(simMatrix);
 }