public MyWordInfo[] FindSynonyms(ref MyWordInfo pos, bool includeMorphs) { pos.Word = pos.Word.ToLower(); Wnlib.Index index = Wnlib.Index.lookup( pos.Word, PartOfSpeech.of( pos.Pos ) ); if( index == null ) { if( !includeMorphs ) return null; Wnlib.MorphStr morphs = new Wnlib.MorphStr( pos.Word, Wnlib.PartOfSpeech.of( pos.Pos ) ); string morph = ""; while( ( morph = morphs.next() ) != null ) { index = Wnlib.Index.lookup( morph, Wnlib.PartOfSpeech.of( pos.Pos ) ); pos.Word=morph; if( index != null ) break; } } if( index == null ) return null; return LookupCandidates( index, pos ); }
private MyWordInfo[] Disambiguate(string[] words) { if (words.Length == 0) return null; MyWordInfo[] wordInfos=new MyWordInfo [words.Length]; for (int i = 0; i < words.Length; i++) { WnLexicon.WordInfo wordInfo = WnLexicon.Lexicon.FindWordInfo(words[i], true); if (wordInfo.partOfSpeech != Wnlib.PartsOfSpeech.Unknown) { if (wordInfo.text != string.Empty) words[i] = wordInfo.text; Wnlib.PartsOfSpeech[] posEnum = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech)); for (int j = 0; j < posEnum.Length; j++) { if (wordInfo.senseCounts[j] > 0) // get the first part of speech { wordInfos[i] = new MyWordInfo(words[i], posEnum[j]); break; } } } } WordSenseDisambiguator wsd = new WordSenseDisambiguator(); wordInfos=wsd.Disambiguate(wordInfos); return wordInfos; }
//MyWordInfo[] _myWordsInfo_i, _myWordsInfo_j; //private void MyInitOld() //{ // _myWordsInfo1 = Disambiguate(_source); // _myWordsInfo2 = Disambiguate(_target); // m = _myWordsInfo1.Length; n = _myWordsInfo2.Length; // _similarity =new float[m, n] ; // for (int i=0; i < m; i++) // { // _myWordsInfo1[i].Sense = _myWordsInfo1[i].Sense < 0 ? 0 : _myWordsInfo1[i].Sense; // string word1 = _source[i]; // for (int j=0; j < n; j++) // { // _myWordsInfo2[i].Sense = _myWordsInfo2[i].Sense < 0 ? 0 : _myWordsInfo2[i].Sense; // string word2=_target[j]; // WordDistance distance = new WordDistance(); // float weight = distance.GetSimilarity(_myWordsInfo1[i], _myWordsInfo2[j]); // _similarity[i, j]=weight; // } // } //} float[][] GetSimilarityMatrix(string[] string1, string[] string2) { m = string1.Length; n = string2.Length; float[][] simMatrix = new float[m][]; Wnlib.PartsOfSpeech[] POSEnum = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech)); HierarchicalWordData[][] wordData_1 = new HierarchicalWordData[m][]; HierarchicalWordData[][] wordData_2 = new HierarchicalWordData[n][]; for (int i = 0; i < m; i++) { simMatrix[i] = new float[n]; } for (int i = 0; i < m; i++) { wordData_1[i] = new HierarchicalWordData[POSEnum.Length]; } for (int j = 0; j < n; j++) { wordData_2[j] = new HierarchicalWordData[POSEnum.Length]; } for (int i = 0; i < m; i++) { for (int j = 0; j < n; j++) { float synDist = AcronymChecker.GetEditDistanceSimilarity(string1[i], string2[j]); for (int partOfSpeech = 1; partOfSpeech < POSEnum.Length; partOfSpeech++) { if (wordData_1[i][partOfSpeech] == null) { MyWordInfo myWordsInfo_i = new MyWordInfo(string1[i], POSEnum[partOfSpeech]); wordData_1[i][partOfSpeech] = new HierarchicalWordData(myWordsInfo_i); } if (wordData_2[j][partOfSpeech] == null) { MyWordInfo myWordsInfo_j = new MyWordInfo(string2[j], POSEnum[partOfSpeech]); wordData_2[j][partOfSpeech] = new HierarchicalWordData(myWordsInfo_j); } WordSimilarity wordDistance = new WordSimilarity(); float semDist = wordDistance.GetSimilarity(wordData_1[i][partOfSpeech], wordData_2[j][partOfSpeech]); float weight = Math.Max(synDist, semDist); if (simMatrix[i][j] < weight) { simMatrix[i][j] = weight; } } } } return(simMatrix); }
private int[][][][] _scores;//[i][alter_i][j][alter_j] public SimilarGenerator(MyWordInfo[] pos, string originalSentence) { _myPos=pos; _originalSentence=originalSentence; MyInit(); Generate(); list=k_best_sentence; // // TODO: Add constructor logic here // }
private MyWordInfo[] Disambiguate(string[] words) { if (words.Length == 0) { return(null); } MyWordInfo[] wordInfos = new MyWordInfo [words.Length]; for (int i = 0; i < words.Length; i++) { MyWnLexicon.WSDWordInfo wordInfo = MyWnLexicon.Lexicon.FindWordInfo(words[i], true); if (wordInfo.partOfSpeech != Wnlib.PartsOfSpeech.Unknown) { if (wordInfo.text != string.Empty) { words[i] = wordInfo.text; } Wnlib.PartsOfSpeech[] posEnum = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech)); for (int j = 0; j < posEnum.Length; j++) { if (wordInfo.senseCounts[j] > 0) // get the first part of speech { wordInfos[i] = new MyWordInfo(words[i], posEnum[j]); break; } } } } WordSenseDisambiguator wsd = new WordSenseDisambiguator(); wordInfos = wsd.Disambiguate(wordInfos); return(wordInfos); }
private void Add_Sentence(int score) { ++_numItems ; MyWordInfo[] pos=new MyWordInfo[_myPos.Length] ; string newsen=""; string[] words=new string[_numWord] ; for (int i=0; i <_numWord ; i++) { string word=string.Empty ; if (_selected[i] == -1 ) { word=_myPos[i].Word; } else { word=_alterWord[i][_selected[i]].Word; } words[i]=word; pos[i]=new MyWordInfo(word, _myPos[i].Pos) ; } newsen=string.Format(_originalSentence, words) ; if (score > bestScore) { bestSentence=newsen + " " + score; bestScore=score; } if (k_best_sentence.Count < 500) { newsen=newsen + " " + score; if (!k_best_sentence.Contains(newsen) ) { k_best_sentence.Add(newsen); k_best_score.Add(score); } } else { int min=10000000; int rem=-1; for (int j=0; j < k_best_sentence.Count ; j++) { if ((int)k_best_score[j] < min) { min=(int)k_best_score[j]; rem=j; } } newsen=newsen + " " + score; if (!k_best_sentence.Contains(newsen) && rem != -1) { k_best_sentence.RemoveAt(rem) ; k_best_score.RemoveAt(rem) ; k_best_sentence.Add(newsen); k_best_score.Add(score); } } }
private int InitialSentence(out MyWordInfo[] current) { current=new MyWordInfo[_myPos.Length] ; int overall=0; for (int i=0; i < current.Length; i++) if (_alterWord[i] != null && _alterWord[i].Length > 0) { for (int j = 0; j < _alterWord[i].Length; j++) { _selected[i]=j; _contextWords[i]=_alterWord[i][j]; if (Read_WordSenseInfo (i)) { int score=ComputeScore(i); overall +=score; break; } } } return overall; }
private int GetNeighbour(MyWordInfo[] current, out MyWordInfo[] trial) { trial=(MyWordInfo[])current.Clone() ; int wordIndex=random.Next(current.Length); if (_alterWord[wordIndex] != null ) { int candIndex=random.Next(_alterWord[wordIndex].Length) ; _selected[wordIndex]=candIndex; _contextWords[wordIndex]=_alterWord[wordIndex][candIndex]; if (!Read_WordSenseInfo (wordIndex)) return -1; trial[wordIndex]=_alterWord[wordIndex][candIndex]; int overall=0; for (int i=0; i < trial.Length; i++) { overall += ComputeScore (i); } return overall; } return -1; }
private MyWordInfo[] LookupCandidates(Wnlib.Index index, MyWordInfo pos ) { if (pos.Sense < 0) pos.Sense=1; SynSet synset=new Wnlib.SynSet( index.offs[pos.Sense - 1 ], index.pos , index.wd, null , pos.Sense - 1); ArrayList lexemes=new ArrayList() ; ArrayList synIndex=new ArrayList() ; foreach (Lexeme obj in synset.words) { lexemes.Add(obj) ; synIndex.Add(index.offs[pos.Sense - 1 ]); } if (index.offs.Length > 1) { if (lexemes.Count <= 1) { for(int i=0; i < index.offs.Length; i++ ) { synset=new Wnlib.SynSet( index.offs[i], index.pos, index.wd, null, i ); foreach (Lexeme obj in synset.words) { synIndex.Add(index.offs[i]); lexemes.Add(obj) ; } } } else { synset=new Wnlib.SynSet( index.offs[0], index.pos, index.wd, null, 0 ); int count=0; //get top most frequency word senses foreach (Lexeme obj in synset.words) { lexemes.Add(obj) ; synIndex.Add(index.offs[0]); ++count; if (count > 4) break; } } } ArrayList sortedSet=new ArrayList() ; Hashtable trace=new Hashtable() ; int hasSem=0; for (int i = 0; i < lexemes.Count; i++) { Lexeme word=(Lexeme)lexemes[i]; word.word=word.word.ToLower() ; int senIndex=(int)synIndex[i]; if (senIndex != -1 && word.wnsns > 0) { word.semcor=new Wnlib.SemCor(word, senIndex); lexemes[i]=word; ++hasSem; } if (!trace.ContainsKey(word.word) ) { if ((word.semcor != null && word.semcor.semcor > 0 ) || (hasSem < 4)) { trace[word.word]=1; sortedSet.Add(word) ; } } //catch {} } Lexeme[] words=(Lexeme[])sortedSet.ToArray( typeof(Lexeme) ); ArrayList candidates=new ArrayList(); for( int i=0; i < words.Length; i++ ) { string word=words[i].word.Replace("_", " " ); if( word[0] <= 'Z' ) continue; MyWordInfo newpos=new MyWordInfo(word, pos.Pos) ; newpos.Sense=words[i].wnsns; if (words[i].semcor != null) newpos.Frequency=words[i].semcor.semcor; else newpos.Frequency=0; candidates.Add( newpos); } if (!trace.ContainsKey (index.wd)) candidates.Add(pos) ; if (candidates.Count > 1) { CompareLexeme comparer=new CompareLexeme(); candidates.Sort(comparer); } return (MyWordInfo[])candidates.ToArray( typeof(MyWordInfo) ); }
public MyWordInfo[] Disambiguate(MyWordInfo[] words) { _contextWords=words; MyInit(); Scoring_Overlaps(); for (int i=0; i < _contextWords.Length ; i++) _contextWords[i].Sense=_bestSenses[i]; return _contextWords; }
public HierarchicalWordData(MyWordInfo wordInfo) { this.WordInfo = wordInfo; Build_WordData(); }
//MyWordInfo[] _myWordsInfo_i, _myWordsInfo_j; //private void MyInitOld() //{ // _myWordsInfo1 = Disambiguate(_source); // _myWordsInfo2 = Disambiguate(_target); // m = _myWordsInfo1.Length; n = _myWordsInfo2.Length; // _similarity =new float[m, n] ; // for (int i=0; i < m; i++) // { // _myWordsInfo1[i].Sense = _myWordsInfo1[i].Sense < 0 ? 0 : _myWordsInfo1[i].Sense; // string word1 = _source[i]; // for (int j=0; j < n; j++) // { // _myWordsInfo2[i].Sense = _myWordsInfo2[i].Sense < 0 ? 0 : _myWordsInfo2[i].Sense; // string word2=_target[j]; // WordDistance distance = new WordDistance(); // float weight = distance.GetSimilarity(_myWordsInfo1[i], _myWordsInfo2[j]); // _similarity[i, j]=weight; // } // } //} float[][] GetSimilarityMatrix(string[] string1, string[] string2) { m = string1.Length; n = string2.Length; float[][] simMatrix = new float[m][]; Wnlib.PartsOfSpeech[] POSEnum = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech)); HierarchicalWordData[][] wordData_1 = new HierarchicalWordData[m][]; HierarchicalWordData[][] wordData_2 = new HierarchicalWordData[n][]; for (int i = 0; i < m; i++) simMatrix[i] = new float[n]; for (int i = 0; i < m; i++) wordData_1[i] = new HierarchicalWordData[POSEnum.Length]; for (int j = 0; j < n; j++) wordData_2[j] = new HierarchicalWordData[POSEnum.Length]; for (int i = 0; i < m; i++) { for (int j = 0; j < n; j++) { float synDist = AcronymChecker.GetEditDistanceSimilarity(string1[i], string2[j]); for (int partOfSpeech = 1; partOfSpeech < POSEnum.Length; partOfSpeech++) { if (wordData_1[i][partOfSpeech] == null) { MyWordInfo myWordsInfo_i = new MyWordInfo(string1[i], POSEnum[partOfSpeech]); wordData_1[i][partOfSpeech] = new HierarchicalWordData(myWordsInfo_i); } if (wordData_2[j][partOfSpeech] == null) { MyWordInfo myWordsInfo_j = new MyWordInfo(string2[j], POSEnum[partOfSpeech]); wordData_2[j][partOfSpeech] = new HierarchicalWordData(myWordsInfo_j); } WordSimilarity wordDistance = new WordSimilarity(); float semDist = wordDistance.GetSimilarity(wordData_1[i][partOfSpeech], wordData_2[j][partOfSpeech]); float weight = Math.Max(synDist, semDist); if (simMatrix[i][j] < weight) simMatrix[i][j] = weight; } } } return simMatrix; }
public float GetWordNetSimScore(MyWordInfo[] words1, MyWordInfo[] words2, string key) { float[][] simMatrix = SentenceSim.GetSimilarityMatrix(words1, words2); return GetSimScore(simMatrix); }