예제 #1
0
		/* From search.c:
		 * Find word in index file and return parsed entry in data structure.
		   Input word must be exact match of string in database. */

		// From the WordNet Manual (http://wordnet.princeton.edu/man/wnsearch.3WN.html)
		// index_lookup() finds searchstr in the index file for pos and returns a pointer 
		// to the parsed entry in an Index data structure. searchstr must exactly match the 
		// form of the word (lower case only, hyphens and underscores in the same places) in 
		// the index file. NULL is returned if a match is not found.
		public static Index lookup(string word, PartOfSpeech pos)
		{
			int j;
			if (word == "")
				return null;
			// TDMS 14 Aug 2005 - changed to allow for numbers as well
			// because the database contains searches that can start with
			// numerals
			//if (!char.IsLetter(word[0]))
			if (!char.IsLetter(word[0]) && !char.IsNumber(word[0]))
				return null;
			string line = WNDB.binSearch(word, pos);
			if (line == null)
				return null;
			Index idx = new Index();
			StrTok st = new StrTok(line);
			idx.Wd = st.next(); /* the word */
			idx.PartOfSpeech = PartOfSpeech.of(st.next()); /* the part of speech */
			idx.SenseCnt = int.Parse(st.next()); /* collins count */
			int ptruse_cnt = int.Parse(st.next()); /* number of pointers types */
			idx.Ptruse = new PointerType[ptruse_cnt];
			for (j = 0; j < ptruse_cnt; j++)
				idx.Ptruse[j] = PointerType.of(st.next());
			int off_cnt = int.Parse(st.next());
			idx.SynsetOffsets = new int[off_cnt];
			idx.TaggedSensesCount = int.Parse(st.next());
			for (j = 0; j < off_cnt; j++)
				idx.SynsetOffsets[j] = int.Parse(st.next());
			return idx;
		}
예제 #2
0
        public MyWordInfo[] FindSynonyms(ref MyWordInfo pos, bool includeMorphs)
        {
            pos.Word = pos.Word.ToLower();
            Wnlib.Index index = Wnlib.Index.lookup(pos.Word, PartOfSpeech.of(pos.Pos));

            if (index == null)
            {
                if (!includeMorphs)
                {
                    return(null);
                }

                Wnlib.MorphStr morphs = new Wnlib.MorphStr(pos.Word, Wnlib.PartOfSpeech.of(pos.Pos));
                string         morph  = "";
                while ((morph = morphs.next()) != null)
                {
                    index    = Wnlib.Index.lookup(morph, Wnlib.PartOfSpeech.of(pos.Pos));
                    pos.Word = morph;
                    if (index != null)
                    {
                        break;
                    }
                }
            }


            if (index == null)
            {
                return(null);
            }

            return(LookupCandidates(index, pos));
        }
예제 #3
0
        public static int GetSynsetIndex(string word, PartsOfSpeech pos)
        {
            word = word.ToLower();
            //word=RemoveBadChars (word);
            Wnlib.Index index = Wnlib.Index.lookup(word, PartOfSpeech.of(pos));

            if (index == null)
            {
                Wnlib.MorphStr morphs = new Wnlib.MorphStr(word, Wnlib.PartOfSpeech.of(pos));
                string         morph  = "";
                while ((morph = morphs.next()) != null)
                {
                    index = Wnlib.Index.lookup(morph, Wnlib.PartOfSpeech.of(pos));
                    if (index != null)
                    {
                        break;
                    }
                }
            }

            if (index == null)
            {
                return(-1);
            }
            else
            {
                return(0);
            }
        }
예제 #4
0
        public void beginDisambiguate()
        {
            Disambiguate(SParseTrees);
            ///////////////////////////get the text of senses ///////////////////////
            for (int i = 0; i < NewParseTreeSenses.Count; i++)
            {
                MyWordInfo[] mwiArr = (MyWordInfo[])NewParseTreeSenses[i];
                ParseTree    pt;
                pt = (ParseTree)NewSParseTrees[i];
                AddArrStems(NewSParseTrees);
                for (int j = 0; j < mwiArr.Length; j++)
                {
                    Wnlib.PartOfSpeech p = Wnlib.PartOfSpeech.of((Wnlib.PartsOfSpeech)mwiArr[j].Pos);
                    try
                    {
                        //i need the stems here to get index
                        Wnlib.Index index = Wnlib.Index.lookup(mwiArr[j].Word.ToLower(), p);
                        SynSet      sense = new SynSet(index, mwiArr[j].Sense, null);
                        NodesSenses.Add(sense.defn);
                        SensesNos.Add(mwiArr[j].Sense);

                        string s = mwiArr[j].Word.ToLower() + " : " + sense.defn;
                        DisambRes.Add(s);
                    }
                    catch
                    {
                        try
                        {
                            Wnlib.Index index = Wnlib.Index.lookup(Stems[j], p);
                            SynSet      sense = new SynSet(index, mwiArr[j].Sense, null);
                            NodesSenses.Add(sense.defn);
                            SensesNos.Add(mwiArr[j].Sense);

                            string s = Stems[j].ToLower() + " : " + sense.defn;
                            DisambRes.Add(s);
                        }
                        catch
                        { };
                    };
                }
                Senses = NodesSenses;
            }
            //////////////////////////add sense text & sense no to the nodes//////////////////////////////
            AddNodesSenses(NewSParseTrees);
            //////////////////////////put the output parsetrees in SparseTree again//////////////////////////////
            SParseTrees = NewSParseTrees;
        }
예제 #5
0
        /// <summary>
        /// Use if sense of word is not known
        /// </summary>
        /// <param name="word">word to lookup</param>
        /// <returns>List of Winlib.Search</returns>
        public static List <Search> GetWordNetInfo(string word)
        {
            List <Search> SearchList = new List <Search>();

            WnLexicon.WordInfo wordi = new WnLexicon.WordInfo();
            wordi.partOfSpeech = Wnlib.PartsOfSpeech.Unknown;

            // for each part of speech...
            Wnlib.PartsOfSpeech[] enums = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech));

            wordi.senseCounts = new int[enums.Length];
            for (int i = 0; i < enums.Length; i++)
            {
                // get a valid part of speech
                Wnlib.PartsOfSpeech pos = enums[i];
                if (pos == Wnlib.PartsOfSpeech.Unknown)
                {
                    continue;
                }

                // get an index to a synset collection
                Wnlib.Index index = Wnlib.Index.lookup(word, Wnlib.PartOfSpeech.of(pos));

                // none found?
                if (index == null)
                {
                    continue;
                }

                //Add
                SearchType ser = new SearchType(false, "OVERVIEW");
                Search     s   = new Search(word, true, Wnlib.PartOfSpeech.of(pos), ser, 0);
                SearchList.Add(s);
                // does this part of speech have a higher sense count?
                //wordi.senseCounts[i] = index.sense_cnt;
                //if (wordi.senseCounts[i] > maxCount)
                //{
                //    maxCount = wordi.senseCounts[i];
                //    wordi.partOfSpeech = pos;
                //}
            }

            return(SearchList);
        }
예제 #6
0
		void doRelList(Index idx, RelList rellist)
		{
			int i;
			bool flag;
			SynSet synptr;
			BitSet outsenses = new BitSet(300);
			prflag = true;
			for (RelList rel = rellist; rel != null; rel = rel.next)
			{
				flag = false;
				for (i = 0; i < idx.offs.Length; i++)
					if (rel.senses[i] && !outsenses[i])
					{
						flag = true;
						synptr = new SynSet(idx.offs[i], pos, "", this, i);
						synptr.strsns(i + 1);
						synptr.tracePtrs(PointerType.of("HYPERPTR"), pos, 0);
						synptr.frames.Clear(); // TDMS 03 Jul 2006 - frames get added in wordnet.cs after filtering
						// TDMS 11 Oct 2005 - build hierarchical results
						senses.Add(synptr);
						outsenses[i] = true;
					}
				if (flag)
					buf += "--------------\n";
			}
			for (i = 0; i < idx.offs.Length; i++)
				if (!outsenses[i])
				{
					synptr = new SynSet(idx.offs[i], pos, "", this, i);
					synptr.strsns(i + 1);
					synptr.tracePtrs(PointerType.of("HYPERPTR"), pos, 0);
					synptr.frames.Clear(); // TDMS 03 Jul 2006 - frames get added in wordnet.cs after filtering
					// TDMS 11 Oct 2005 - build hierarchical results
					senses.Add(synptr);
					buf += "---------------\n";
				}
		}
예제 #7
0
		RelList addRelatives(Index idx, int rel1, int rel2, RelList rellist)
		{
			/* If either of the new relatives are already in a relative group,
			   then add the other to the existing group (transitivity).
				   Otherwise create a new group and add these 2 senses to it. */
			RelList rel, last = null;
			for (rel = rellist; rel != null; rel = rel.next)
			{
				if (rel.senses[rel1] || rel.senses[rel2])
				{
					rel.senses[rel1] = rel.senses[rel2] = true;
					/* If part of another relative group, merge the groups */
					for (RelList r = rellist; r != null; r = r.next)
						if (r != rel && r.senses[rel1] || r.senses[rel2])
							rel.senses = rel.senses.Or(r.senses);
					return rellist;
				}
				last = rel;
			}
			rel = new RelList();
			rel.senses[rel1] = rel.senses[rel2] = true;
			if (rellist == null)
				return rel;
			last.next = rel;
			return rellist;
		}
예제 #8
0
		RelList findVerbGroups(Index idx, RelList rellist)
		{
			int i, j, k;
			/* Read all senses */
			for (i = 0; i < idx.offs.Length; i++)
			{
				SynSet synset = new SynSet(idx.offs[i], pos, idx.wd, this, i);
				/* Look for VERBGROUP ptr(s) for this sense.  If found,
				   create group for senses, or add to existing group. */
				for (j = 0; j < synset.ptrs.Length; j++)
				{
					Pointer p = synset.ptrs[j];
					if (p.ptp.mnemonic == "VERBGROUP")
						/* Need to find sense number for ptr offset */
						for (k = 0; k < idx.offs.Length; k++)
							if (p.off == idx.offs[k])
							{
								rellist = addRelatives(idx, i, k, rellist);
								break;
							}
				}
			}
			return rellist;
		}
예제 #9
0
		// TDMS - relatives - synonyms of verb - grouped by similarity of meaning
		void relatives(Index idx)
		{
			RelList rellist = null;
			switch (pos.name)
			{
				case "verb":
					rellist = findVerbGroups(idx, rellist);
					doRelList(idx, rellist);
					break;
			}
		}
예제 #10
0
		public SynSet(Index idx, int sens, Search sch)
			: this(idx.offs[sens], idx.pos, idx.wd, sch, sens)
		{
		}
예제 #11
0
        public void beginDisambiguate()
        {
            Disambiguate(SParseTrees);
            ///////////////////////////get the text of senses ///////////////////////
            for (int i = 0; i < NewParseTreeSenses.Count; i++)
            {
                MyWordInfo[] mwiArr = (MyWordInfo[])NewParseTreeSenses[i];
                ParseTree    pt;
                pt = (ParseTree)NewSParseTrees[i];
                AddArrStems(NewSParseTrees);
                for (int j = 0; j < mwiArr.Length; j++)
                {
                    Wnlib.PartOfSpeech p = Wnlib.PartOfSpeech.of((Wnlib.PartsOfSpeech)mwiArr[j].Pos);
                    try
                    {
                        ArrayList   results = new ArrayList();
                        Wnlib.Index index;
                        //i need the stems here to get index
                        if (mwiArr[j].Pos == PartsOfSpeech.Verb)
                        {
                            SentenceParser dummysp = new SentenceParser();
                            results = dummysp.GetINFOfVerb(mwiArr[j].Word.ToLower());
                            if (results.Count > 0)
                            {
                                index = Wnlib.Index.lookup((string)results[0], p);
                            }
                            else
                            {
                                index = Wnlib.Index.lookup(mwiArr[j].Word.ToLower(), p);
                            }
                        }

                        else
                        {
                            index = Wnlib.Index.lookup(mwiArr[j].Word.ToLower(), p);
                        }
                        SynSet sense = new SynSet(index, mwiArr[j].Sense, null);
                        NodesSenses.Add(sense.defn);
                        SensesNos.Add(mwiArr[j].Sense);

                        string s;
                        if (results.Count > 0)
                        {
                            s = (string)results[0] + " : " + sense.defn;
                        }

                        else
                        {
                            s = mwiArr[j].Word.ToLower() + " : " + sense.defn;
                        }
                        DisambRes.Add(s);
                    }
                    catch
                    {
                        try
                        {
                            Wnlib.Index index = Wnlib.Index.lookup(Stems[j], p);
                            SynSet      sense = new SynSet(index, mwiArr[j].Sense, null);
                            NodesSenses.Add(sense.defn);
                            SensesNos.Add(mwiArr[j].Sense);

                            string s = Stems[j].ToLower() + " : " + sense.defn;
                            DisambRes.Add(s);
                        }
                        catch
                        { };
                    };
                }
                Senses = NodesSenses;
            }
            //////////////////////////add sense text & sense no to the nodes//////////////////////////////
            AddNodesSenses(NewSParseTrees);
            //////////////////////////put the output parsetrees in SparseTree again//////////////////////////////
            SParseTrees = NewSParseTrees;
        }
예제 #12
0
        private MyWordInfo[] LookupCandidates(Wnlib.Index index, MyWordInfo pos)
        {
            if (pos.Sense < 0)
            {
                pos.Sense = 1;
            }
            SynSet synset = new Wnlib.SynSet(index.offs[pos.Sense - 1], index.pos, index.wd, null, pos.Sense - 1);

            ArrayList lexemes  = new ArrayList();
            ArrayList synIndex = new ArrayList();

            foreach (Lexeme obj in synset.words)
            {
                lexemes.Add(obj);
                synIndex.Add(index.offs[pos.Sense - 1]);
            }

            if (index.offs.Length > 1)
            {
                if (lexemes.Count <= 1)
                {
                    for (int i = 0; i < index.offs.Length; i++)
                    {
                        synset = new Wnlib.SynSet(index.offs[i], index.pos, index.wd, null, i);

                        foreach (Lexeme obj in synset.words)
                        {
                            synIndex.Add(index.offs[i]);
                            lexemes.Add(obj);
                        }
                    }
                }
                else
                {
                    synset = new Wnlib.SynSet(index.offs[0], index.pos, index.wd, null, 0);
                    int count = 0;                   //get top most frequency word senses
                    foreach (Lexeme obj in synset.words)
                    {
                        lexemes.Add(obj);
                        synIndex.Add(index.offs[0]);
                        ++count;
                        if (count > 4)
                        {
                            break;
                        }
                    }
                }
            }

            ArrayList sortedSet = new ArrayList();
            Hashtable trace     = new Hashtable();
            int       hasSem    = 0;

            for (int i = 0; i < lexemes.Count; i++)
            {
                Lexeme word = (Lexeme)lexemes[i];
                word.word = word.word.ToLower();

                int senIndex = (int)synIndex[i];
                if (senIndex != -1 && word.wnsns > 0)
                {
                    word.semcor = new Wnlib.SemCor(word, senIndex);
                    lexemes[i]  = word;
                    ++hasSem;
                }

                if (!trace.ContainsKey(word.word))
                {
                    if ((word.semcor != null && word.semcor.semcor > 0) || (hasSem < 4))
                    {
                        trace[word.word] = 1;
                        sortedSet.Add(word);
                    }
                }
                //catch
                {}
            }

            Lexeme[] words = (Lexeme[])sortedSet.ToArray(typeof(Lexeme));

            ArrayList candidates = new ArrayList();

            for (int i = 0; i < words.Length; i++)
            {
                string word = words[i].word.Replace("_", " ");
                if (word[0] <= 'Z')
                {
                    continue;
                }

                MyWordInfo newpos = new MyWordInfo(word, pos.Pos);
                newpos.Sense = words[i].wnsns;
                if (words[i].semcor != null)
                {
                    newpos.Frequency = words[i].semcor.semcor;
                }
                else
                {
                    newpos.Frequency = 0;
                }

                candidates.Add(newpos);
            }

            if (!trace.ContainsKey(index.wd))
            {
                candidates.Add(pos);
            }

            if (candidates.Count > 1)
            {
                CompareLexeme comparer = new CompareLexeme();
                candidates.Sort(comparer);
            }


            return((MyWordInfo[])candidates.ToArray(typeof(MyWordInfo)));
        }
예제 #13
0
        public void ConstructMapping()
        {
            string    concept        = "";
            string    word           = "";
            int       ID             = -1;
            string    senseNo        = "";
            string    Sense          = "";
            string    Pos            = "";
            WordOlogy WO             = new WordOlogy();
            ArrayList wordologyArr   = new ArrayList();
            int       conceptcounter = 0;

            LoadOntology();
            FileStream   allConceptsFile       = new FileStream(_ontologyDirectoryPath + @"\AllConcepts.txt", FileMode.Open);
            StreamReader allConceptsFileReader = new StreamReader(allConceptsFile);

            string _wordologyDirectoryPath =
                @"..\..\..\wordology\";

            BinaryFormatter bf = new BinaryFormatter();
            FileStream      fs = new FileStream(
                _wordologyDirectoryPath + "\\wordology.txt", FileMode.Create);
            int indxWatcherconceptCounter = 0;
            int NoMapLexConcepts          = 0;
            int CannotGetSenseExeption    = 0;
            int AllSensesMapped           = 0;

            while ((concept = allConceptsFileReader.ReadLine()) != null)
            {
                indxWatcherconceptCounter++;
                string   Conceptpath    = _ontologyDirectoryPath + @"\" + concept[0] + @"\" + concept;
                Concept  C              = (Concept)Onto[concept];
                Property maplexProperty = C.FullProperties["ENGLISH1"];

                List <MyWordInfo> maplexsenses = new List <MyWordInfo>();
                MyWordInfo        mwi          = new MyWordInfo();
                int NoOfSensesSucceeded        = 0;
                if (maplexProperty != null)
                {
                    for (int i = 0; i < maplexProperty.Fillers.Count; i++)
                    {
                        string   tmp   = maplexProperty.Fillers[i].ScalarFiller;
                        char[]   charr = new char[] { '-', '_' };
                        string[] splt  = tmp.Split(charr);
                        //there r fillers with no type & a-bomb masalan

                        if (splt.Length > 1)
                        {
                            mwi = new MyWordInfo();
                            for (int k = 0; k < splt.Length - 2; k++)
                            {
                                mwi.Word += splt[k] + " ";
                            }
                            mwi.Word += splt[splt.Length - 2];
                            if (splt[splt.Length - 1].Length == 2)
                            {
                                if (splt[splt.Length - 1][0] == 'v')
                                {
                                    mwi.Pos = Wnlib.PartsOfSpeech.Verb;
                                }
                                else if (splt[splt.Length - 1][0] == 'n')
                                {
                                    mwi.Pos = Wnlib.PartsOfSpeech.Noun;
                                }
                                else if (splt[splt.Length - 1][0] == 'a')
                                {
                                    mwi.Pos = Wnlib.PartsOfSpeech.Adj;
                                }
                                else if (splt[splt.Length - 1][0] == 'r')
                                {
                                    mwi.Pos = Wnlib.PartsOfSpeech.Adv;
                                }
                                else
                                {
                                    mwi.Pos = Wnlib.PartsOfSpeech.Unknown;
                                }
                            }
                            else
                            {
                                mwi.Pos   = Wnlib.PartsOfSpeech.Unknown;
                                mwi.Word += " " + splt[splt.Length - 1];
                            }
                            if (i == 0 || (maplexsenses.Count > 0 && (mwi.Word != maplexsenses[maplexsenses.Count - 1].Word || mwi.Pos != maplexsenses[maplexsenses.Count - 1].Pos)))
                            {
                                maplexsenses.Add(mwi);
                            }
                        }
                        //ne loop 3al ontology kolaha
                    }


                    if (maplexsenses.Count > 0)
                    {
                        MyWordInfo[] maplexArray = new MyWordInfo[maplexsenses.Count];
                        for (int j = 0; j < maplexsenses.Count; j++)
                        {
                            maplexArray[j] = maplexsenses[j];
                        }
                        WordSenseDisambiguator wsd = new WordSenseDisambiguator();
                        MyWordInfo[]           res = new MyWordInfo[maplexArray.Length];
                        res = wsd.Disambiguate(maplexArray);
                        int i = 0;

                        foreach (MyWordInfo wi in res)
                        {
                            string   tmp   = maplexProperty.Fillers[i].ScalarFiller;
                            char[]   charr = new char[] { '-', '_' };
                            string[] splt  = tmp.Split(charr);

                            if (splt.Length > 1 && splt[splt.Length - 1].Length == 2)
                            {
                                WO.SenseNo = splt[splt.Length - 1];
                            }
                            else
                            {
                                // "sense doesn't have POS";
                            }

                            Wnlib.PartOfSpeech p = Wnlib.PartOfSpeech.of((Wnlib.PartsOfSpeech)wi.Pos);

                            try
                            {
                                Wnlib.Index index = Wnlib.Index.lookup(wi.Word.ToLower(), p);
                                SynSet      sense = new SynSet(index, res[i].Sense, null);
                                WO.Sense = sense.defn;
                                AllSensesMapped++;
                                NoOfSensesSucceeded++;
                                try
                                {
                                    WO.Pos = p.name;
                                }
                                catch
                                {
                                    WO.Pos = wi.Pos.ToString();
                                }
                                ID++;
                                WO.Word    = wi.Word;
                                WO.ID      = ID;
                                WO.Concept = concept;
                                WO.Word    = word;
                            }
                            catch
                            {
                            };
                            if (NoOfSensesSucceeded == 0)
                            {
                                CannotGetSenseExeption++;
                            }
                            i++;
                            // bf.Serialize(fs, "\n" + WO);
                            wordologyArr.Add(WO);
                        }
                        conceptcounter++;
                    }
                }
                else
                {
                    NoMapLexConcepts++;

                    //new part


                    Wnlib.Index        index;
                    Wnlib.PartOfSpeech p;
                    Search             se;

                    try
                    {
                        index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Noun));
                        if (index != null)
                        {
                            WO.Pos = "noun";
                            Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Noun);
                            foreach (Opt o in relatedness)
                            {
                                for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                                {
                                    se = new Search(concept, true, PartOfSpeech.of("noun"), o.sch, senseNumber);
                                    SynSet sense = new SynSet(index, senseNumber, se);
                                    WO.Concept = concept;
                                    WO.Word    = concept;
                                    WO.Sense   = sense.defn;
                                    WO.ID      = ID;
                                    ID++;
                                    NoOfSensesSucceeded++;
                                    AllSensesMapped++;
                                    //bf.Serialize(fs, "\n" + WO);
                                    wordologyArr.Add(WO);
                                }
                            }
                        }
                    }
                    catch
                    { }
                    try
                    {
                        index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Verb));
                        if (index != null)
                        {
                            WO.Pos = "verb";
                            Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Verb);
                            foreach (Opt o in relatedness)
                            {
                                for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                                {
                                    se = new Search(concept, true, PartOfSpeech.of("verb"), o.sch, senseNumber);
                                    SynSet sense = new SynSet(index, senseNumber, se);
                                    WO.Sense   = sense.defn;
                                    WO.Concept = concept;
                                    WO.Word    = concept;
                                    WO.ID      = ID;
                                    ID++;
                                    NoOfSensesSucceeded++;
                                    AllSensesMapped++;
                                    //bf.Serialize(fs, "\n" + WO);
                                    wordologyArr.Add(WO);
                                }
                            }
                        }
                    }
                    catch
                    { }
                    try
                    {
                        index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Adj));
                        if (index != null)
                        {
                            WO.Pos = "adj";
                            Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Adj);
                            foreach (Opt o in relatedness)
                            {
                                for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                                {
                                    se = new Search(concept, true, PartOfSpeech.of("adj"), o.sch, senseNumber);
                                    SynSet sense = new SynSet(index, senseNumber, se);
                                    WO.Sense   = sense.defn;
                                    WO.Concept = concept;
                                    WO.Word    = concept;
                                    WO.ID      = ID;
                                    ID++;
                                    NoOfSensesSucceeded++;
                                    AllSensesMapped++;
                                    //bf.Serialize(fs, "\n" + WO);
                                    wordologyArr.Add(WO);
                                }
                            }
                        }
                    }
                    catch
                    { }
                    try
                    {
                        index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Adv));
                        if (index != null)
                        {
                            WO.Pos = "adv";
                            Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Noun);
                            foreach (Opt o in relatedness)
                            {
                                for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                                {
                                    se = new Search(concept, true, PartOfSpeech.of("adv"), o.sch, senseNumber);
                                    SynSet sense = new SynSet(index, senseNumber, se);
                                    WO.Sense   = sense.defn;
                                    WO.Concept = concept;
                                    WO.Word    = concept;
                                    WO.ID      = ID;
                                    ID++;
                                    NoOfSensesSucceeded++;
                                    AllSensesMapped++;
                                    //bf.Serialize(fs, "\n" + WO);
                                    wordologyArr.Add(WO);
                                }
                            }
                        }
                    }
                    catch
                    { }


                    if (NoOfSensesSucceeded != 0)
                    {
                        conceptcounter++;
                    }
                }
            }//end while
            allConceptsFileReader.Close();
            allConceptsFile.Close();
            bf.Serialize(fs, wordologyArr);
            fs.Close();
            MessageBox.Show("no map-lex concepts number = " + NoMapLexConcepts.ToString());
            MessageBox.Show("can't getsense pos number = " + CannotGetSenseExeption.ToString());
            MessageBox.Show(conceptcounter.ToString());
        }
예제 #14
0
        private void MapConceptsWithMapLex(string concept, Property maplexProperty)
        {
            MyWordInfo mwi;
            WordOlogy  WO = new WordOlogy();

            List <MyWordInfo> maplexsenses = new List <MyWordInfo>();
            int NoOfSensesSucceeded        = 0;

            for (int i = 0; i < maplexProperty.Fillers.Count; i++)
            {
                string   tmp   = maplexProperty.Fillers[i].ScalarFiller;
                char[]   charr = new char[] { '-', '_' };
                string[] splt  = tmp.Split(charr);
                //there r fillers with no type & a-bomb masalan

                if (splt.Length > 1)
                {
                    mwi = new MyWordInfo();
                    for (int k = 0; k < splt.Length - 2; k++)
                    {
                        mwi.Word += splt[k] + " ";
                    }
                    mwi.Word += splt[splt.Length - 2];
                    if (splt[splt.Length - 1].Length == 2)
                    {
                        if (splt[splt.Length - 1][0] == 'v')
                        {
                            mwi.Pos = Wnlib.PartsOfSpeech.Verb;
                        }
                        else if (splt[splt.Length - 1][0] == 'n')
                        {
                            mwi.Pos = Wnlib.PartsOfSpeech.Noun;
                        }
                        else if (splt[splt.Length - 1][0] == 'a')
                        {
                            mwi.Pos = Wnlib.PartsOfSpeech.Adj;
                        }
                        else if (splt[splt.Length - 1][0] == 'r')
                        {
                            mwi.Pos = Wnlib.PartsOfSpeech.Adv;
                        }
                        else
                        {
                            mwi.Pos = Wnlib.PartsOfSpeech.Unknown;
                        }
                    }
                    else
                    {
                        mwi.Pos   = Wnlib.PartsOfSpeech.Unknown;
                        mwi.Word += " " + splt[splt.Length - 1];
                    }
                    if (i == 0 || (maplexsenses.Count > 0 && (mwi.Word != maplexsenses[maplexsenses.Count - 1].Word || mwi.Pos != maplexsenses[maplexsenses.Count - 1].Pos)))
                    {
                        maplexsenses.Add(mwi);
                    }
                }
                //ne loop 3al ontology kolaha
            }


            if (maplexsenses.Count > 0)
            {
                MyWordInfo[] maplexArray = new MyWordInfo[maplexsenses.Count];
                for (int j = 0; j < maplexsenses.Count; j++)
                {
                    maplexArray[j] = maplexsenses[j];
                }
                WordSenseDisambiguator wsd = new WordSenseDisambiguator();
                MyWordInfo[]           res = new MyWordInfo[maplexArray.Length];
                res = wsd.Disambiguate(maplexArray);
                int i = 0;

                foreach (MyWordInfo wi in res)
                {
                    string   tmp   = maplexProperty.Fillers[i].ScalarFiller;
                    char[]   charr = new char[] { '-', '_' };
                    string[] splt  = tmp.Split(charr);

                    if (splt.Length > 1 && splt[splt.Length - 1].Length == 2)
                    {
                        WO.SenseNo = splt[splt.Length - 1];
                    }
                    else
                    {
                        // "sense doesn't have POS";
                    }

                    Wnlib.PartOfSpeech p = Wnlib.PartOfSpeech.of((Wnlib.PartsOfSpeech)wi.Pos);

                    try
                    {
                        Wnlib.Index index = Wnlib.Index.lookup(wi.Word.ToLower(), p);
                        SynSet      sense = new SynSet(index, res[i].Sense, null);
                        WO.Sense = sense.defn;
                        // AllSensesMapped++;
                        NoOfSensesSucceeded++;
                        try
                        {
                            WO.Pos = p.name;
                        }
                        catch
                        {
                            WO.Pos = wi.Pos.ToString();
                        }
                        ID++;
                        WO.Word    = wi.Word;
                        WO.ID      = ID;
                        WO.Concept = concept;
                        wordologyArr.Add(WO);
                    }
                    catch
                    {
                    };
                    if (NoOfSensesSucceeded == 0)
                    {
                        CannotGetSenseExeption++;
                    }
                    i++;
                    //bf.Serialize(fs, "\n" + WO);
                }
                conceptcounter++;
            }
        }
예제 #15
0
		public SynSet(Index idx, int sens, Search sch)
			: this(idx.SynsetOffsets[sens], idx.PartOfSpeech, idx.Wd, sch, sens)
		{
		}
예제 #16
0
		private static MyWordInfo[] LookupCandidates(Index index, MyWordInfo pos )
		{						
			if (pos.Sense < 0) pos.Sense=1;						
			SynSet synset=new Wnlib.SynSet( index.SynsetOffsets[pos.Sense - 1 ], index.PartOfSpeech , index.Wd, null , pos.Sense - 1);					
						
			ArrayList lexemes=new ArrayList() ;
			ArrayList synIndex=new ArrayList() ;

			foreach (Lexeme obj in synset.words)
			{
				lexemes.Add(obj) ;
				synIndex.Add(index.SynsetOffsets[pos.Sense - 1 ]);
			}
			
			if (index.SynsetOffsets.Length > 1)
			{
				if (lexemes.Count <= 1)
				{
					for(int i=0; i < index.SynsetOffsets.Length; i++ )
					{				
						synset=new SynSet( index.SynsetOffsets[i], index.PartOfSpeech, index.Wd, null, i );

						foreach (Lexeme obj in synset.words)
						{
							synIndex.Add(index.SynsetOffsets[i]);
							lexemes.Add(obj) ;
						}
					}
				}
				else
				{
					synset=new SynSet( index.SynsetOffsets[0], index.PartOfSpeech, index.Wd, null, 0 );
					int count=0; //get top most frequency word senses
					foreach (Lexeme obj in synset.words)
					{
						lexemes.Add(obj) ;
						synIndex.Add(index.SynsetOffsets[0]);
						++count;
						if (count > 4) break;
					}

				}
			}
			
			ArrayList sortedSet=new ArrayList() ;
			Hashtable trace=new Hashtable() ;
			int hasSem=0;
			for (int i = 0; i < lexemes.Count; i++)
			{				
				Lexeme word=(Lexeme)lexemes[i];				
				word.word=word.word.ToLower() ;

				int senIndex=(int)synIndex[i];
				if (senIndex != -1  && word.wnsns > 0)
				{
					word.semcor=new Wnlib.SemCor(word, senIndex);
					lexemes[i]=word;					
					++hasSem;
				}

				if (!trace.ContainsKey(word.word) )					
				{					
					if ((word.semcor != null &&  word.semcor.semcor  > 0 ) || (hasSem < 4))
					{
						trace[word.word]=1;
						sortedSet.Add(word) ;
					}
				}
				//catch
				{}
			}

		    var words = (Lexeme[]) sortedSet.ToArray(typeof (Lexeme));				

			ArrayList candidates=new ArrayList();

			for( int i=0; i < words.Length; i++ )
			{
				string word=words[i].word.Replace("_", " " );				
				if( word[0] <= 'Z' ) continue;

				MyWordInfo newpos=new MyWordInfo(word, pos.Pos) ;
				newpos.Sense=words[i].wnsns;
				if (words[i].semcor != null)
					newpos.Frequency=words[i].semcor.semcor;
				else
					newpos.Frequency=0;

				candidates.Add( newpos);								
			}

			if (!trace.ContainsKey (index.Wd))
				candidates.Add(pos) ;

			if (candidates.Count > 1)
			{
				CompareLexeme comparer=new CompareLexeme();
				candidates.Sort(comparer);
			}
			

			return (MyWordInfo[])candidates.ToArray( typeof(MyWordInfo) );
		}