/* From search.c: * Find word in index file and return parsed entry in data structure. Input word must be exact match of string in database. */ // From the WordNet Manual (http://wordnet.princeton.edu/man/wnsearch.3WN.html) // index_lookup() finds searchstr in the index file for pos and returns a pointer // to the parsed entry in an Index data structure. searchstr must exactly match the // form of the word (lower case only, hyphens and underscores in the same places) in // the index file. NULL is returned if a match is not found. public static Index lookup(string word, PartOfSpeech pos) { int j; if (word == "") return null; // TDMS 14 Aug 2005 - changed to allow for numbers as well // because the database contains searches that can start with // numerals //if (!char.IsLetter(word[0])) if (!char.IsLetter(word[0]) && !char.IsNumber(word[0])) return null; string line = WNDB.binSearch(word, pos); if (line == null) return null; Index idx = new Index(); StrTok st = new StrTok(line); idx.Wd = st.next(); /* the word */ idx.PartOfSpeech = PartOfSpeech.of(st.next()); /* the part of speech */ idx.SenseCnt = int.Parse(st.next()); /* collins count */ int ptruse_cnt = int.Parse(st.next()); /* number of pointers types */ idx.Ptruse = new PointerType[ptruse_cnt]; for (j = 0; j < ptruse_cnt; j++) idx.Ptruse[j] = PointerType.of(st.next()); int off_cnt = int.Parse(st.next()); idx.SynsetOffsets = new int[off_cnt]; idx.TaggedSensesCount = int.Parse(st.next()); for (j = 0; j < off_cnt; j++) idx.SynsetOffsets[j] = int.Parse(st.next()); return idx; }
public MyWordInfo[] FindSynonyms(ref MyWordInfo pos, bool includeMorphs) { pos.Word = pos.Word.ToLower(); Wnlib.Index index = Wnlib.Index.lookup(pos.Word, PartOfSpeech.of(pos.Pos)); if (index == null) { if (!includeMorphs) { return(null); } Wnlib.MorphStr morphs = new Wnlib.MorphStr(pos.Word, Wnlib.PartOfSpeech.of(pos.Pos)); string morph = ""; while ((morph = morphs.next()) != null) { index = Wnlib.Index.lookup(morph, Wnlib.PartOfSpeech.of(pos.Pos)); pos.Word = morph; if (index != null) { break; } } } if (index == null) { return(null); } return(LookupCandidates(index, pos)); }
public static int GetSynsetIndex(string word, PartsOfSpeech pos) { word = word.ToLower(); //word=RemoveBadChars (word); Wnlib.Index index = Wnlib.Index.lookup(word, PartOfSpeech.of(pos)); if (index == null) { Wnlib.MorphStr morphs = new Wnlib.MorphStr(word, Wnlib.PartOfSpeech.of(pos)); string morph = ""; while ((morph = morphs.next()) != null) { index = Wnlib.Index.lookup(morph, Wnlib.PartOfSpeech.of(pos)); if (index != null) { break; } } } if (index == null) { return(-1); } else { return(0); } }
public void beginDisambiguate() { Disambiguate(SParseTrees); ///////////////////////////get the text of senses /////////////////////// for (int i = 0; i < NewParseTreeSenses.Count; i++) { MyWordInfo[] mwiArr = (MyWordInfo[])NewParseTreeSenses[i]; ParseTree pt; pt = (ParseTree)NewSParseTrees[i]; AddArrStems(NewSParseTrees); for (int j = 0; j < mwiArr.Length; j++) { Wnlib.PartOfSpeech p = Wnlib.PartOfSpeech.of((Wnlib.PartsOfSpeech)mwiArr[j].Pos); try { //i need the stems here to get index Wnlib.Index index = Wnlib.Index.lookup(mwiArr[j].Word.ToLower(), p); SynSet sense = new SynSet(index, mwiArr[j].Sense, null); NodesSenses.Add(sense.defn); SensesNos.Add(mwiArr[j].Sense); string s = mwiArr[j].Word.ToLower() + " : " + sense.defn; DisambRes.Add(s); } catch { try { Wnlib.Index index = Wnlib.Index.lookup(Stems[j], p); SynSet sense = new SynSet(index, mwiArr[j].Sense, null); NodesSenses.Add(sense.defn); SensesNos.Add(mwiArr[j].Sense); string s = Stems[j].ToLower() + " : " + sense.defn; DisambRes.Add(s); } catch { }; }; } Senses = NodesSenses; } //////////////////////////add sense text & sense no to the nodes////////////////////////////// AddNodesSenses(NewSParseTrees); //////////////////////////put the output parsetrees in SparseTree again////////////////////////////// SParseTrees = NewSParseTrees; }
/// <summary> /// Use if sense of word is not known /// </summary> /// <param name="word">word to lookup</param> /// <returns>List of Winlib.Search</returns> public static List <Search> GetWordNetInfo(string word) { List <Search> SearchList = new List <Search>(); WnLexicon.WordInfo wordi = new WnLexicon.WordInfo(); wordi.partOfSpeech = Wnlib.PartsOfSpeech.Unknown; // for each part of speech... Wnlib.PartsOfSpeech[] enums = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech)); wordi.senseCounts = new int[enums.Length]; for (int i = 0; i < enums.Length; i++) { // get a valid part of speech Wnlib.PartsOfSpeech pos = enums[i]; if (pos == Wnlib.PartsOfSpeech.Unknown) { continue; } // get an index to a synset collection Wnlib.Index index = Wnlib.Index.lookup(word, Wnlib.PartOfSpeech.of(pos)); // none found? if (index == null) { continue; } //Add SearchType ser = new SearchType(false, "OVERVIEW"); Search s = new Search(word, true, Wnlib.PartOfSpeech.of(pos), ser, 0); SearchList.Add(s); // does this part of speech have a higher sense count? //wordi.senseCounts[i] = index.sense_cnt; //if (wordi.senseCounts[i] > maxCount) //{ // maxCount = wordi.senseCounts[i]; // wordi.partOfSpeech = pos; //} } return(SearchList); }
void doRelList(Index idx, RelList rellist) { int i; bool flag; SynSet synptr; BitSet outsenses = new BitSet(300); prflag = true; for (RelList rel = rellist; rel != null; rel = rel.next) { flag = false; for (i = 0; i < idx.offs.Length; i++) if (rel.senses[i] && !outsenses[i]) { flag = true; synptr = new SynSet(idx.offs[i], pos, "", this, i); synptr.strsns(i + 1); synptr.tracePtrs(PointerType.of("HYPERPTR"), pos, 0); synptr.frames.Clear(); // TDMS 03 Jul 2006 - frames get added in wordnet.cs after filtering // TDMS 11 Oct 2005 - build hierarchical results senses.Add(synptr); outsenses[i] = true; } if (flag) buf += "--------------\n"; } for (i = 0; i < idx.offs.Length; i++) if (!outsenses[i]) { synptr = new SynSet(idx.offs[i], pos, "", this, i); synptr.strsns(i + 1); synptr.tracePtrs(PointerType.of("HYPERPTR"), pos, 0); synptr.frames.Clear(); // TDMS 03 Jul 2006 - frames get added in wordnet.cs after filtering // TDMS 11 Oct 2005 - build hierarchical results senses.Add(synptr); buf += "---------------\n"; } }
RelList addRelatives(Index idx, int rel1, int rel2, RelList rellist) { /* If either of the new relatives are already in a relative group, then add the other to the existing group (transitivity). Otherwise create a new group and add these 2 senses to it. */ RelList rel, last = null; for (rel = rellist; rel != null; rel = rel.next) { if (rel.senses[rel1] || rel.senses[rel2]) { rel.senses[rel1] = rel.senses[rel2] = true; /* If part of another relative group, merge the groups */ for (RelList r = rellist; r != null; r = r.next) if (r != rel && r.senses[rel1] || r.senses[rel2]) rel.senses = rel.senses.Or(r.senses); return rellist; } last = rel; } rel = new RelList(); rel.senses[rel1] = rel.senses[rel2] = true; if (rellist == null) return rel; last.next = rel; return rellist; }
RelList findVerbGroups(Index idx, RelList rellist) { int i, j, k; /* Read all senses */ for (i = 0; i < idx.offs.Length; i++) { SynSet synset = new SynSet(idx.offs[i], pos, idx.wd, this, i); /* Look for VERBGROUP ptr(s) for this sense. If found, create group for senses, or add to existing group. */ for (j = 0; j < synset.ptrs.Length; j++) { Pointer p = synset.ptrs[j]; if (p.ptp.mnemonic == "VERBGROUP") /* Need to find sense number for ptr offset */ for (k = 0; k < idx.offs.Length; k++) if (p.off == idx.offs[k]) { rellist = addRelatives(idx, i, k, rellist); break; } } } return rellist; }
// TDMS - relatives - synonyms of verb - grouped by similarity of meaning void relatives(Index idx) { RelList rellist = null; switch (pos.name) { case "verb": rellist = findVerbGroups(idx, rellist); doRelList(idx, rellist); break; } }
public SynSet(Index idx, int sens, Search sch) : this(idx.offs[sens], idx.pos, idx.wd, sch, sens) { }
public void beginDisambiguate() { Disambiguate(SParseTrees); ///////////////////////////get the text of senses /////////////////////// for (int i = 0; i < NewParseTreeSenses.Count; i++) { MyWordInfo[] mwiArr = (MyWordInfo[])NewParseTreeSenses[i]; ParseTree pt; pt = (ParseTree)NewSParseTrees[i]; AddArrStems(NewSParseTrees); for (int j = 0; j < mwiArr.Length; j++) { Wnlib.PartOfSpeech p = Wnlib.PartOfSpeech.of((Wnlib.PartsOfSpeech)mwiArr[j].Pos); try { ArrayList results = new ArrayList(); Wnlib.Index index; //i need the stems here to get index if (mwiArr[j].Pos == PartsOfSpeech.Verb) { SentenceParser dummysp = new SentenceParser(); results = dummysp.GetINFOfVerb(mwiArr[j].Word.ToLower()); if (results.Count > 0) { index = Wnlib.Index.lookup((string)results[0], p); } else { index = Wnlib.Index.lookup(mwiArr[j].Word.ToLower(), p); } } else { index = Wnlib.Index.lookup(mwiArr[j].Word.ToLower(), p); } SynSet sense = new SynSet(index, mwiArr[j].Sense, null); NodesSenses.Add(sense.defn); SensesNos.Add(mwiArr[j].Sense); string s; if (results.Count > 0) { s = (string)results[0] + " : " + sense.defn; } else { s = mwiArr[j].Word.ToLower() + " : " + sense.defn; } DisambRes.Add(s); } catch { try { Wnlib.Index index = Wnlib.Index.lookup(Stems[j], p); SynSet sense = new SynSet(index, mwiArr[j].Sense, null); NodesSenses.Add(sense.defn); SensesNos.Add(mwiArr[j].Sense); string s = Stems[j].ToLower() + " : " + sense.defn; DisambRes.Add(s); } catch { }; }; } Senses = NodesSenses; } //////////////////////////add sense text & sense no to the nodes////////////////////////////// AddNodesSenses(NewSParseTrees); //////////////////////////put the output parsetrees in SparseTree again////////////////////////////// SParseTrees = NewSParseTrees; }
private MyWordInfo[] LookupCandidates(Wnlib.Index index, MyWordInfo pos) { if (pos.Sense < 0) { pos.Sense = 1; } SynSet synset = new Wnlib.SynSet(index.offs[pos.Sense - 1], index.pos, index.wd, null, pos.Sense - 1); ArrayList lexemes = new ArrayList(); ArrayList synIndex = new ArrayList(); foreach (Lexeme obj in synset.words) { lexemes.Add(obj); synIndex.Add(index.offs[pos.Sense - 1]); } if (index.offs.Length > 1) { if (lexemes.Count <= 1) { for (int i = 0; i < index.offs.Length; i++) { synset = new Wnlib.SynSet(index.offs[i], index.pos, index.wd, null, i); foreach (Lexeme obj in synset.words) { synIndex.Add(index.offs[i]); lexemes.Add(obj); } } } else { synset = new Wnlib.SynSet(index.offs[0], index.pos, index.wd, null, 0); int count = 0; //get top most frequency word senses foreach (Lexeme obj in synset.words) { lexemes.Add(obj); synIndex.Add(index.offs[0]); ++count; if (count > 4) { break; } } } } ArrayList sortedSet = new ArrayList(); Hashtable trace = new Hashtable(); int hasSem = 0; for (int i = 0; i < lexemes.Count; i++) { Lexeme word = (Lexeme)lexemes[i]; word.word = word.word.ToLower(); int senIndex = (int)synIndex[i]; if (senIndex != -1 && word.wnsns > 0) { word.semcor = new Wnlib.SemCor(word, senIndex); lexemes[i] = word; ++hasSem; } if (!trace.ContainsKey(word.word)) { if ((word.semcor != null && word.semcor.semcor > 0) || (hasSem < 4)) { trace[word.word] = 1; sortedSet.Add(word); } } //catch {} } Lexeme[] words = (Lexeme[])sortedSet.ToArray(typeof(Lexeme)); ArrayList candidates = new ArrayList(); for (int i = 0; i < words.Length; i++) { string word = words[i].word.Replace("_", " "); if (word[0] <= 'Z') { continue; } MyWordInfo newpos = new MyWordInfo(word, pos.Pos); newpos.Sense = words[i].wnsns; if (words[i].semcor != null) { newpos.Frequency = words[i].semcor.semcor; } else { newpos.Frequency = 0; } candidates.Add(newpos); } if (!trace.ContainsKey(index.wd)) { candidates.Add(pos); } if (candidates.Count > 1) { CompareLexeme comparer = new CompareLexeme(); candidates.Sort(comparer); } return((MyWordInfo[])candidates.ToArray(typeof(MyWordInfo))); }
public void ConstructMapping() { string concept = ""; string word = ""; int ID = -1; string senseNo = ""; string Sense = ""; string Pos = ""; WordOlogy WO = new WordOlogy(); ArrayList wordologyArr = new ArrayList(); int conceptcounter = 0; LoadOntology(); FileStream allConceptsFile = new FileStream(_ontologyDirectoryPath + @"\AllConcepts.txt", FileMode.Open); StreamReader allConceptsFileReader = new StreamReader(allConceptsFile); string _wordologyDirectoryPath = @"..\..\..\wordology\"; BinaryFormatter bf = new BinaryFormatter(); FileStream fs = new FileStream( _wordologyDirectoryPath + "\\wordology.txt", FileMode.Create); int indxWatcherconceptCounter = 0; int NoMapLexConcepts = 0; int CannotGetSenseExeption = 0; int AllSensesMapped = 0; while ((concept = allConceptsFileReader.ReadLine()) != null) { indxWatcherconceptCounter++; string Conceptpath = _ontologyDirectoryPath + @"\" + concept[0] + @"\" + concept; Concept C = (Concept)Onto[concept]; Property maplexProperty = C.FullProperties["ENGLISH1"]; List <MyWordInfo> maplexsenses = new List <MyWordInfo>(); MyWordInfo mwi = new MyWordInfo(); int NoOfSensesSucceeded = 0; if (maplexProperty != null) { for (int i = 0; i < maplexProperty.Fillers.Count; i++) { string tmp = maplexProperty.Fillers[i].ScalarFiller; char[] charr = new char[] { '-', '_' }; string[] splt = tmp.Split(charr); //there r fillers with no type & a-bomb masalan if (splt.Length > 1) { mwi = new MyWordInfo(); for (int k = 0; k < splt.Length - 2; k++) { mwi.Word += splt[k] + " "; } mwi.Word += splt[splt.Length - 2]; if (splt[splt.Length - 1].Length == 2) { if (splt[splt.Length - 1][0] == 'v') { mwi.Pos = Wnlib.PartsOfSpeech.Verb; } else if (splt[splt.Length - 1][0] == 'n') { mwi.Pos = Wnlib.PartsOfSpeech.Noun; } else if (splt[splt.Length - 1][0] == 'a') { mwi.Pos = Wnlib.PartsOfSpeech.Adj; } else if (splt[splt.Length - 1][0] == 'r') { mwi.Pos = Wnlib.PartsOfSpeech.Adv; } else { mwi.Pos = Wnlib.PartsOfSpeech.Unknown; } } else { mwi.Pos = Wnlib.PartsOfSpeech.Unknown; mwi.Word += " " + splt[splt.Length - 1]; } if (i == 0 || (maplexsenses.Count > 0 && (mwi.Word != maplexsenses[maplexsenses.Count - 1].Word || mwi.Pos != maplexsenses[maplexsenses.Count - 1].Pos))) { maplexsenses.Add(mwi); } } //ne loop 3al ontology kolaha } if (maplexsenses.Count > 0) { MyWordInfo[] maplexArray = new MyWordInfo[maplexsenses.Count]; for (int j = 0; j < maplexsenses.Count; j++) { maplexArray[j] = maplexsenses[j]; } WordSenseDisambiguator wsd = new WordSenseDisambiguator(); MyWordInfo[] res = new MyWordInfo[maplexArray.Length]; res = wsd.Disambiguate(maplexArray); int i = 0; foreach (MyWordInfo wi in res) { string tmp = maplexProperty.Fillers[i].ScalarFiller; char[] charr = new char[] { '-', '_' }; string[] splt = tmp.Split(charr); if (splt.Length > 1 && splt[splt.Length - 1].Length == 2) { WO.SenseNo = splt[splt.Length - 1]; } else { // "sense doesn't have POS"; } Wnlib.PartOfSpeech p = Wnlib.PartOfSpeech.of((Wnlib.PartsOfSpeech)wi.Pos); try { Wnlib.Index index = Wnlib.Index.lookup(wi.Word.ToLower(), p); SynSet sense = new SynSet(index, res[i].Sense, null); WO.Sense = sense.defn; AllSensesMapped++; NoOfSensesSucceeded++; try { WO.Pos = p.name; } catch { WO.Pos = wi.Pos.ToString(); } ID++; WO.Word = wi.Word; WO.ID = ID; WO.Concept = concept; WO.Word = word; } catch { }; if (NoOfSensesSucceeded == 0) { CannotGetSenseExeption++; } i++; // bf.Serialize(fs, "\n" + WO); wordologyArr.Add(WO); } conceptcounter++; } } else { NoMapLexConcepts++; //new part Wnlib.Index index; Wnlib.PartOfSpeech p; Search se; try { index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Noun)); if (index != null) { WO.Pos = "noun"; Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Noun); foreach (Opt o in relatedness) { for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++) { se = new Search(concept, true, PartOfSpeech.of("noun"), o.sch, senseNumber); SynSet sense = new SynSet(index, senseNumber, se); WO.Concept = concept; WO.Word = concept; WO.Sense = sense.defn; WO.ID = ID; ID++; NoOfSensesSucceeded++; AllSensesMapped++; //bf.Serialize(fs, "\n" + WO); wordologyArr.Add(WO); } } } } catch { } try { index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Verb)); if (index != null) { WO.Pos = "verb"; Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Verb); foreach (Opt o in relatedness) { for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++) { se = new Search(concept, true, PartOfSpeech.of("verb"), o.sch, senseNumber); SynSet sense = new SynSet(index, senseNumber, se); WO.Sense = sense.defn; WO.Concept = concept; WO.Word = concept; WO.ID = ID; ID++; NoOfSensesSucceeded++; AllSensesMapped++; //bf.Serialize(fs, "\n" + WO); wordologyArr.Add(WO); } } } } catch { } try { index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Adj)); if (index != null) { WO.Pos = "adj"; Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Adj); foreach (Opt o in relatedness) { for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++) { se = new Search(concept, true, PartOfSpeech.of("adj"), o.sch, senseNumber); SynSet sense = new SynSet(index, senseNumber, se); WO.Sense = sense.defn; WO.Concept = concept; WO.Word = concept; WO.ID = ID; ID++; NoOfSensesSucceeded++; AllSensesMapped++; //bf.Serialize(fs, "\n" + WO); wordologyArr.Add(WO); } } } } catch { } try { index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Adv)); if (index != null) { WO.Pos = "adv"; Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Noun); foreach (Opt o in relatedness) { for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++) { se = new Search(concept, true, PartOfSpeech.of("adv"), o.sch, senseNumber); SynSet sense = new SynSet(index, senseNumber, se); WO.Sense = sense.defn; WO.Concept = concept; WO.Word = concept; WO.ID = ID; ID++; NoOfSensesSucceeded++; AllSensesMapped++; //bf.Serialize(fs, "\n" + WO); wordologyArr.Add(WO); } } } } catch { } if (NoOfSensesSucceeded != 0) { conceptcounter++; } } }//end while allConceptsFileReader.Close(); allConceptsFile.Close(); bf.Serialize(fs, wordologyArr); fs.Close(); MessageBox.Show("no map-lex concepts number = " + NoMapLexConcepts.ToString()); MessageBox.Show("can't getsense pos number = " + CannotGetSenseExeption.ToString()); MessageBox.Show(conceptcounter.ToString()); }
private void MapConceptsWithMapLex(string concept, Property maplexProperty) { MyWordInfo mwi; WordOlogy WO = new WordOlogy(); List <MyWordInfo> maplexsenses = new List <MyWordInfo>(); int NoOfSensesSucceeded = 0; for (int i = 0; i < maplexProperty.Fillers.Count; i++) { string tmp = maplexProperty.Fillers[i].ScalarFiller; char[] charr = new char[] { '-', '_' }; string[] splt = tmp.Split(charr); //there r fillers with no type & a-bomb masalan if (splt.Length > 1) { mwi = new MyWordInfo(); for (int k = 0; k < splt.Length - 2; k++) { mwi.Word += splt[k] + " "; } mwi.Word += splt[splt.Length - 2]; if (splt[splt.Length - 1].Length == 2) { if (splt[splt.Length - 1][0] == 'v') { mwi.Pos = Wnlib.PartsOfSpeech.Verb; } else if (splt[splt.Length - 1][0] == 'n') { mwi.Pos = Wnlib.PartsOfSpeech.Noun; } else if (splt[splt.Length - 1][0] == 'a') { mwi.Pos = Wnlib.PartsOfSpeech.Adj; } else if (splt[splt.Length - 1][0] == 'r') { mwi.Pos = Wnlib.PartsOfSpeech.Adv; } else { mwi.Pos = Wnlib.PartsOfSpeech.Unknown; } } else { mwi.Pos = Wnlib.PartsOfSpeech.Unknown; mwi.Word += " " + splt[splt.Length - 1]; } if (i == 0 || (maplexsenses.Count > 0 && (mwi.Word != maplexsenses[maplexsenses.Count - 1].Word || mwi.Pos != maplexsenses[maplexsenses.Count - 1].Pos))) { maplexsenses.Add(mwi); } } //ne loop 3al ontology kolaha } if (maplexsenses.Count > 0) { MyWordInfo[] maplexArray = new MyWordInfo[maplexsenses.Count]; for (int j = 0; j < maplexsenses.Count; j++) { maplexArray[j] = maplexsenses[j]; } WordSenseDisambiguator wsd = new WordSenseDisambiguator(); MyWordInfo[] res = new MyWordInfo[maplexArray.Length]; res = wsd.Disambiguate(maplexArray); int i = 0; foreach (MyWordInfo wi in res) { string tmp = maplexProperty.Fillers[i].ScalarFiller; char[] charr = new char[] { '-', '_' }; string[] splt = tmp.Split(charr); if (splt.Length > 1 && splt[splt.Length - 1].Length == 2) { WO.SenseNo = splt[splt.Length - 1]; } else { // "sense doesn't have POS"; } Wnlib.PartOfSpeech p = Wnlib.PartOfSpeech.of((Wnlib.PartsOfSpeech)wi.Pos); try { Wnlib.Index index = Wnlib.Index.lookup(wi.Word.ToLower(), p); SynSet sense = new SynSet(index, res[i].Sense, null); WO.Sense = sense.defn; // AllSensesMapped++; NoOfSensesSucceeded++; try { WO.Pos = p.name; } catch { WO.Pos = wi.Pos.ToString(); } ID++; WO.Word = wi.Word; WO.ID = ID; WO.Concept = concept; wordologyArr.Add(WO); } catch { }; if (NoOfSensesSucceeded == 0) { CannotGetSenseExeption++; } i++; //bf.Serialize(fs, "\n" + WO); } conceptcounter++; } }
public SynSet(Index idx, int sens, Search sch) : this(idx.SynsetOffsets[sens], idx.PartOfSpeech, idx.Wd, sch, sens) { }
private static MyWordInfo[] LookupCandidates(Index index, MyWordInfo pos ) { if (pos.Sense < 0) pos.Sense=1; SynSet synset=new Wnlib.SynSet( index.SynsetOffsets[pos.Sense - 1 ], index.PartOfSpeech , index.Wd, null , pos.Sense - 1); ArrayList lexemes=new ArrayList() ; ArrayList synIndex=new ArrayList() ; foreach (Lexeme obj in synset.words) { lexemes.Add(obj) ; synIndex.Add(index.SynsetOffsets[pos.Sense - 1 ]); } if (index.SynsetOffsets.Length > 1) { if (lexemes.Count <= 1) { for(int i=0; i < index.SynsetOffsets.Length; i++ ) { synset=new SynSet( index.SynsetOffsets[i], index.PartOfSpeech, index.Wd, null, i ); foreach (Lexeme obj in synset.words) { synIndex.Add(index.SynsetOffsets[i]); lexemes.Add(obj) ; } } } else { synset=new SynSet( index.SynsetOffsets[0], index.PartOfSpeech, index.Wd, null, 0 ); int count=0; //get top most frequency word senses foreach (Lexeme obj in synset.words) { lexemes.Add(obj) ; synIndex.Add(index.SynsetOffsets[0]); ++count; if (count > 4) break; } } } ArrayList sortedSet=new ArrayList() ; Hashtable trace=new Hashtable() ; int hasSem=0; for (int i = 0; i < lexemes.Count; i++) { Lexeme word=(Lexeme)lexemes[i]; word.word=word.word.ToLower() ; int senIndex=(int)synIndex[i]; if (senIndex != -1 && word.wnsns > 0) { word.semcor=new Wnlib.SemCor(word, senIndex); lexemes[i]=word; ++hasSem; } if (!trace.ContainsKey(word.word) ) { if ((word.semcor != null && word.semcor.semcor > 0 ) || (hasSem < 4)) { trace[word.word]=1; sortedSet.Add(word) ; } } //catch {} } var words = (Lexeme[]) sortedSet.ToArray(typeof (Lexeme)); ArrayList candidates=new ArrayList(); for( int i=0; i < words.Length; i++ ) { string word=words[i].word.Replace("_", " " ); if( word[0] <= 'Z' ) continue; MyWordInfo newpos=new MyWordInfo(word, pos.Pos) ; newpos.Sense=words[i].wnsns; if (words[i].semcor != null) newpos.Frequency=words[i].semcor.semcor; else newpos.Frequency=0; candidates.Add( newpos); } if (!trace.ContainsKey (index.Wd)) candidates.Add(pos) ; if (candidates.Count > 1) { CompareLexeme comparer=new CompareLexeme(); candidates.Sort(comparer); } return (MyWordInfo[])candidates.ToArray( typeof(MyWordInfo) ); }