Ejemplo n.º 1
0
        private void getRelatedSynSets_Click(object sender, EventArgs e)
        {
            SynSet selectedSynSet = synSets.SelectedItem as SynSet;

            if (selectedSynSet == null || semanticRelations.SelectedIndex == -1)
                return;

            synSets.Items.Clear();

            // get relations
            string relationStr = semanticRelations.SelectedItem.ToString();
            relationStr = relationStr.Split(':')[0].Trim();
            WordNetEngine.SynSetRelation relation = (WordNetEngine.SynSetRelation)Enum.Parse(typeof(WordNetEngine.SynSetRelation), relationStr);

            // add related synset
            foreach (SynSet relatedSynset in selectedSynSet.GetRelatedSynSets(relation, false))
                synSets.Items.Add(relatedSynset);

            // selected synset
            if (synSets.Items.Count > 0)
                synSets.SelectedIndex = 0;
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Gets synset shells from a word index line. A synset shell is an instance of SynSet with only the POS and Offset
        /// members initialized. These members are enough to look up the full synset within the corresponding data file. This
        /// method is static to prevent inadvertent references to a current WordNetEngine, which should be passed via the
        /// corresponding parameter.
        /// </summary>
        /// <param name="wordIndexLine">Word index line from which to get synset shells</param>
        /// <param name="pos">POS of the given index line</param>
        /// <param name="mostCommonSynSet">Returns the most common synset for the word</param>
        /// <param name="wordNet">The WordNet instance</param>
        /// <returns>Synset shells for the given index line</returns>
        /// <exception cref="System.Exception">Failed to get most common synset</exception>
        internal static List <SynSet> GetSynSetShells(string wordIndexLine, WordNetPos pos, out SynSet mostCommonSynSet, WordNet wordNet)
        {
            var synsets = new List <SynSet>();

            mostCommonSynSet = null;

            // get number of synsets
            var parts      = wordIndexLine.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            var numSynSets = int.Parse(parts[2]);

            // grab each synset shell, from last to first
            int firstOffsetIndex = parts.Length - numSynSets;

            for (int i = parts.Length - 1; i >= firstOffsetIndex; --i)
            {
                // create synset
                int offset = int.Parse(parts[i]);

                // add synset to collection
                var synset = new SynSet(pos, offset, wordNet);
                synsets.Add(synset);

                // if this is the last synset offset to get (since we grabbed them in reverse order), record it as the most common synset
                if (i == firstOffsetIndex)
                {
                    mostCommonSynSet = synset;
                }
            }

            if (mostCommonSynSet == null)
            {
                throw new Exception("Failed to get most common synset");
            }

            return(synsets);
        }
Ejemplo n.º 3
0
        protected override SynSet SelectSynset(string word, POS pos)
        {
            SynSet result = base.SelectSynset(word, pos); // temporary

            WordNetEngine.POS wordnetPos = pos.ForWordnet();
            if (wordnetPos != WordNetEngine.POS.None)
            {
                IGlossaryEntry glossEntry = glossary.FindWord(word);
                if (glossEntry == null)
                {
                    Set <SynSet> synsets = wordnet.GetSynSets(word, wordnetPos);
                    foreach (SynSet synset in synsets)
                    {
                        // great algorythms will be added here
                    }
                }
                else
                {
                    result = glossEntry.Synset;
                }
            }

            return(result);
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Gets synset shells from a word index line. A synset shell is an instance of SynSet with only the POS and Offset
        /// members initialized. These members are enough to look up the full synset within the corresponding data file. This
        /// method is static to prevent inadvertent references to a current WordNetEngine, which should be passed via the
        /// corresponding parameter.
        /// </summary>
        /// <param name="wordIndexLine">Word index line from which to get synset shells</param>
        /// <param name="pos">POS of the given index line</param>
        /// <param name="mostCommonSynSet">Returns the most common synset for the word</param>
        /// <param name="wordNet">The WordNet instance</param>
        /// <returns>Synset shells for the given index line</returns>
        /// <exception cref="System.Exception">Failed to get most common synset</exception>
        internal static List<SynSet> GetSynSetShells(string wordIndexLine, WordNetPos pos, out SynSet mostCommonSynSet, WordNet wordNet) {
            var synsets = new List<SynSet>();
            mostCommonSynSet = null;

            // get number of synsets
            var parts = wordIndexLine.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            var numSynSets = int.Parse(parts[2]);

            // grab each synset shell, from last to first
            int firstOffsetIndex = parts.Length - numSynSets;
            for (int i = parts.Length - 1; i >= firstOffsetIndex; --i) {
                // create synset
                int offset = int.Parse(parts[i]);

                // add synset to collection                        
                var synset = new SynSet(pos, offset, wordNet);
                synsets.Add(synset);

                // if this is the last synset offset to get (since we grabbed them in reverse order), record it as the most common synset
                if (i == firstOffsetIndex)
                    mostCommonSynSet = synset;
            }

            if (mostCommonSynSet == null)
                throw new Exception("Failed to get most common synset");

            return synsets;
        }
Ejemplo n.º 5
0
        private void findLCS_Click(object sender, EventArgs e)
        {
            int found = 0;

            LinkBox.Items.Clear();
            // retrive synsets
            Set <SynSet> synStartSet = null;

            try { synStartSet = _wordNetEngine.GetSynSets(StartWord.Text, (WordNetEngine.POS)pos.SelectedItem); }
            catch (Exception)
            {
                MessageBox.Show("Invalid Start SynSet ID");
                return;
            }
            Set <SynSet> synDestSet = null;

            try { synDestSet = _wordNetEngine.GetSynSets(DestWord.Text, (WordNetEngine.POS)pos.SelectedItem); }
            catch (Exception)
            {
                MessageBox.Show("Invalid Dest SynSet ID");
                return;
            }
            if (synStartSet.Count > 0)
            {
                WordNetEngine.SynSetRelation[] vlist = new WordNetEngine.SynSetRelation[1];
                vlist[0] = WordNetEngine.SynSetRelation.Hypernym;
                //vlist[1] = WordNetEngine.SynSetRelation.InstanceHypernym;
                //vlist[2] = WordNetEngine.SynSetRelation.Hyponym;
                //vlist[3] = WordNetEngine.SynSetRelation.InstanceHyponym;
                foreach (SynSet synSrcSet in synStartSet)
                {
                    foreach (SynSet synDstSet in synDestSet)
                    {
                        //synSets.Items.Add(synSet);
                        List <SynSet> linkageList = null;

                        linkageList = synSrcSet.GetShortestPathTo(synDstSet, vlist);

                        SynSet s = synSrcSet.GetClosestMutuallyReachableSynset(synDstSet, vlist);
                        if (s != null)
                        {
                            StringBuilder desc = new StringBuilder();

                            desc.Append("{");
                            bool prependComma = false;
                            foreach (string word in s.Words)
                            {
                                desc.Append((prependComma ? ", " : "") + word);
                                prependComma = true;
                            }

                            desc.Append("}");

                            LinkBox.Items.Add(desc.ToString());
                            LinkBox.Text = desc.ToString();
                            found++;
                            //return;
                        }
                    }
                }
                if (found == 0)
                {
                    LinkBox.Text = "false";
                }
            }
            else
            {
                LinkBox.Text = "false";
                // MessageBox.Show("No synsets found");
            }
        }
Ejemplo n.º 6
0
 public SynsetWithGloss(SynSet synSet, string gloss, int index)
 {
     SynSet = synSet;
     Gloss  = gloss;
     Index  = index;
 }
Ejemplo n.º 7
0
        private static MyWordInfo[] LookupCandidates(Index index, MyWordInfo pos)
        {
            if (pos.Sense < 0)
            {
                pos.Sense = 1;
            }
            SynSet synset = new Wnlib.SynSet(index.SynsetOffsets[pos.Sense - 1], index.PartOfSpeech, index.Wd, null, pos.Sense - 1);

            ArrayList lexemes  = new ArrayList();
            ArrayList synIndex = new ArrayList();

            foreach (Lexeme obj in synset.words)
            {
                lexemes.Add(obj);
                synIndex.Add(index.SynsetOffsets[pos.Sense - 1]);
            }

            if (index.SynsetOffsets.Length > 1)
            {
                if (lexemes.Count <= 1)
                {
                    for (int i = 0; i < index.SynsetOffsets.Length; i++)
                    {
                        synset = new SynSet(index.SynsetOffsets[i], index.PartOfSpeech, index.Wd, null, i);

                        foreach (Lexeme obj in synset.words)
                        {
                            synIndex.Add(index.SynsetOffsets[i]);
                            lexemes.Add(obj);
                        }
                    }
                }
                else
                {
                    synset = new SynSet(index.SynsetOffsets[0], index.PartOfSpeech, index.Wd, null, 0);
                    int count = 0;                   //get top most frequency word senses
                    foreach (Lexeme obj in synset.words)
                    {
                        lexemes.Add(obj);
                        synIndex.Add(index.SynsetOffsets[0]);
                        ++count;
                        if (count > 4)
                        {
                            break;
                        }
                    }
                }
            }

            ArrayList sortedSet = new ArrayList();
            Hashtable trace     = new Hashtable();
            int       hasSem    = 0;

            for (int i = 0; i < lexemes.Count; i++)
            {
                Lexeme word = (Lexeme)lexemes[i];
                word.word = word.word.ToLower();

                int senIndex = (int)synIndex[i];
                if (senIndex != -1 && word.wnsns > 0)
                {
                    word.semcor = new Wnlib.SemCor(word, senIndex);
                    lexemes[i]  = word;
                    ++hasSem;
                }

                if (!trace.ContainsKey(word.word))
                {
                    if ((word.semcor != null && word.semcor.semcor > 0) || (hasSem < 4))
                    {
                        trace[word.word] = 1;
                        sortedSet.Add(word);
                    }
                }
                //catch
                {}
            }

            var words = (Lexeme[])sortedSet.ToArray(typeof(Lexeme));

            ArrayList candidates = new ArrayList();

            for (int i = 0; i < words.Length; i++)
            {
                string word = words[i].word.Replace("_", " ");
                if (word[0] <= 'Z')
                {
                    continue;
                }

                MyWordInfo newpos = new MyWordInfo(word, pos.Pos);
                newpos.Sense = words[i].wnsns;
                if (words[i].semcor != null)
                {
                    newpos.Frequency = words[i].semcor.semcor;
                }
                else
                {
                    newpos.Frequency = 0;
                }

                candidates.Add(newpos);
            }

            if (!trace.ContainsKey(index.Wd))
            {
                candidates.Add(pos);
            }

            if (candidates.Count > 1)
            {
                CompareLexeme comparer = new CompareLexeme();
                candidates.Sort(comparer);
            }


            return((MyWordInfo[])candidates.ToArray(typeof(MyWordInfo)));
        }
Ejemplo n.º 8
0
        private void FillSenses(ParseTree parsetree, ParseNode node, ref ArrayList wordinfoArr, ref int j)
        {
            if (node.Children != null)
            {
                for (int i = 0; i < node.Children.Count; i++)
                {
                    ParseNode pn = (ParseNode)node.Children[i];
                    FillSenses(parsetree, pn, ref wordinfoArr, ref j);
                }
            }
            else
            {
                SentenceParser dummysp = new SentenceParser();
                string         str     = node.Goal;
                if (str == "PPN")
                {
                    WordSenseDisambiguator wsd = new WordSenseDisambiguator();
                    double score;
                    double num;
                    if (double.TryParse(node.Text, out num))
                    {
                        MyWordInfo[] ret = wsd.MMG_Disambiguate(new MyWordInfo[] { new MyWordInfo("Number", PartsOfSpeech.Noun) }, out score);
                        PartOfSpeech pos = PartOfSpeech.of(PartsOfSpeech.Noun);

                        Index  index = Wnlib.Index.lookup(ret[0].Word.ToLower(), pos);
                        SynSet sense = new SynSet(index, ret[0].Sense, null);

                        node.Sense   = sense.defn;
                        node.SenseNo = ret[0].Sense;
                    }
                    else
                    {
                        MyWordInfo[] ret = wsd.MMG_Disambiguate(new MyWordInfo[] { new MyWordInfo("proper_name", PartsOfSpeech.Noun) }, out score);
                        PartOfSpeech pos = PartOfSpeech.of(PartsOfSpeech.Noun);

                        Index  index = Wnlib.Index.lookup(ret[0].Word.ToLower(), pos);
                        SynSet sense = new SynSet(index, ret[0].Sense, null);

                        node.Sense   = sense.defn;
                        node.SenseNo = ret[0].Sense;
                    }
                }
                else
                if (str == "N" || str.Contains("NPP") || (str == "VING") || str.Contains("PPJ") || (str == "VPSP") || (str == "BE1") || (str == "BE2") || (str == "V") || str.Contains("CPADJ") || str.Contains("ADJ") || str.Contains("PADV") || str.Contains("ADV") || str == "VINF")
                {
                    ///ta3deelat 5/7/////////
                    string [] spltstr  = DisambRes[j].ToString().Split(':');
                    VerbSense VS       = new VerbSense();
                    string[]  spltstr2 = new string[10];
                    string[]  spltstr3 = new string[10];
                    if (node.Senses != null && node.Goal.Contains("V"))
                    {
                        VS       = (VerbSense)node.Senses[0];
                        spltstr2 = VS.Sense.ToString().Split('#');
                        spltstr3 = spltstr[0].Split(' ');

                        ArrayList results = dummysp.GetINFOfVerb(spltstr2[0]);
                        if (results.Count > 0)
                        {
                            spltstr2[0] = (string)results[0];
                        }


                        if (spltstr3[0] == spltstr2[0])
                        {
                            if (NodesSenses.Count > 0)
                            {
                                node.Sense   = (string)NodesSenses[0];
                                node.SenseNo = (int)SensesNos[0];
                                SensesNos.RemoveAt(0);
                                NodesSenses.RemoveAt(0);
                                j++;
                            }
                        }
                    }
                    if (str == "N")
                    {
                        node.Sense   = (string)NodesSenses[0];
                        node.SenseNo = (int)SensesNos[0];
                        SensesNos.RemoveAt(0);
                        NodesSenses.RemoveAt(0);
                        j++;
                    }
                    else
                    {
                        string NodeWord = SyntacticAnalyzer.SentenceParser.GetWordString(parsetree, node);


                        spltstr3 = spltstr[0].Split(' ');
                        string nodeWord = NodeWord.ToLower();
                        if (node.Goal == "V" || node.Goal == "BE1" || node.Goal == "VINF" || node.Goal == "VPSP" || node.Goal == "VING")
                        {
                            ArrayList results = dummysp.GetINFOfVerb(nodeWord);
                            if (results.Count > 0)
                            {
                                nodeWord = (string)results[0];
                            }
                        }

                        string dummy = spltstr3[0];
                        dummy = dummy.Remove(dummy.Length - 1);
                        dummy = dummy + "ies";

                        string dummy2 = nodeWord + "ing";
                        string dummy3 = dummy2 + "s";
                        string dummy4 = nodeWord.Remove(nodeWord.Length - 1);
                        dummy4 = dummy4 + "ing";

                        if (spltstr3[0] == nodeWord || spltstr3[0] + 's' == nodeWord || spltstr3[0] + "es" == nodeWord || dummy == nodeWord || spltstr3[0] == dummy2 || spltstr3[0] == dummy3 || spltstr3[0] == dummy4 || spltstr3[0] == dummy4 + 's')
                        {
                            if (NodesSenses.Count > 0)
                            {
                                node.Sense   = (string)NodesSenses[0];
                                node.SenseNo = (int)SensesNos[0];
                                SensesNos.RemoveAt(0);
                                NodesSenses.RemoveAt(0);
                                j++;
                            }
                        }
                    }
                }
            }
        }
Ejemplo n.º 9
0
        public void beginDisambiguate()
        {
            Disambiguate(SParseTrees);
            ///////////////////////////get the text of senses ///////////////////////
            for (int i = 0; i < NewParseTreeSenses.Count; i++)
            {
                MyWordInfo[] mwiArr = (MyWordInfo[])NewParseTreeSenses[i];
                ParseTree    pt;
                pt = (ParseTree)NewSParseTrees[i];
                AddArrStems(NewSParseTrees);
                for (int j = 0; j < mwiArr.Length; j++)
                {
                    Wnlib.PartOfSpeech p = Wnlib.PartOfSpeech.of((Wnlib.PartsOfSpeech)mwiArr[j].Pos);
                    try
                    {
                        ArrayList   results = new ArrayList();
                        Wnlib.Index index;
                        //i need the stems here to get index
                        if (mwiArr[j].Pos == PartsOfSpeech.Verb)
                        {
                            SentenceParser dummysp = new SentenceParser();
                            results = dummysp.GetINFOfVerb(mwiArr[j].Word.ToLower());
                            if (results.Count > 0)
                            {
                                index = Wnlib.Index.lookup((string)results[0], p);
                            }
                            else
                            {
                                index = Wnlib.Index.lookup(mwiArr[j].Word.ToLower(), p);
                            }
                        }

                        else
                        {
                            index = Wnlib.Index.lookup(mwiArr[j].Word.ToLower(), p);
                        }
                        SynSet sense = new SynSet(index, mwiArr[j].Sense, null);
                        NodesSenses.Add(sense.defn);
                        SensesNos.Add(mwiArr[j].Sense);

                        string s;
                        if (results.Count > 0)
                        {
                            s = (string)results[0] + " : " + sense.defn;
                        }

                        else
                        {
                            s = mwiArr[j].Word.ToLower() + " : " + sense.defn;
                        }
                        DisambRes.Add(s);
                    }
                    catch
                    {
                        try
                        {
                            Wnlib.Index index = Wnlib.Index.lookup(Stems[j], p);
                            SynSet      sense = new SynSet(index, mwiArr[j].Sense, null);
                            NodesSenses.Add(sense.defn);
                            SensesNos.Add(mwiArr[j].Sense);

                            string s = Stems[j].ToLower() + " : " + sense.defn;
                            DisambRes.Add(s);
                        }
                        catch
                        { };
                    };
                }
                Senses = NodesSenses;
            }
            //////////////////////////add sense text & sense no to the nodes//////////////////////////////
            AddNodesSenses(NewSParseTrees);
            //////////////////////////put the output parsetrees in SparseTree again//////////////////////////////
            SParseTrees = NewSParseTrees;
        }
Ejemplo n.º 10
0
        private SynSet Disambiguate(List <SynSet> Senses, string[] context)
        {
            int           synSize = Senses.Count;
            List <SynSet> RelatedSenses;
            SynSet        tmpSense = null;

            // Temp variable
            string senseData;
            // Traking the sense that maximizes the overlap score
            int overlap;
            int score = 0;

            //Rada Recommendation
            //if (synSize > 3)
            //    synSize = 3;

            if (synSize > 0)
            {
                for (int k = 0; k < synSize; k++)
                {
                    senseData = string.Join(" ", Senses[k].Synonyms) + " " + Senses[k].Gloss;
                    senseData = senseData.Replace("_", " ").Replace("-", " ");
                    overlap   = Intersect(_tokenizer.Tokenize(senseData), context) * 60;


                    // strong relations
                    senseData     = "";
                    RelatedSenses = Senses[k].GetRelatedSynSets(WordNetApi.Core.WordNetEngine.SynSetRelation.Hypernym, false);
                    if (RelatedSenses.Count > 0)
                    {
                        foreach (SynSet syn in RelatedSenses)
                        {
                            senseData = senseData + string.Join(" ", syn.Synonyms) + " " + syn.Gloss;
                        }
                    }
                    RelatedSenses = Senses[k].GetRelatedSynSets(WordNetApi.Core.WordNetEngine.SynSetRelation.Hyponym, false);
                    if (RelatedSenses.Count > 0)
                    {
                        foreach (SynSet syn in RelatedSenses)
                        {
                            senseData = senseData + string.Join(" ", syn.Synonyms) + " " + syn.Gloss;
                        }
                    }

                    overlap = overlap + Intersect(_tokenizer.Tokenize(senseData), context) * 20;



                    // weak relations
                    senseData     = "";
                    RelatedSenses = Senses[k].GetRelatedSynSets(WordNetApi.Core.WordNetEngine.SynSetRelation.PartHolonym, false);
                    if (RelatedSenses.Count > 0)
                    {
                        foreach (SynSet syn in RelatedSenses)
                        {
                            senseData = senseData + string.Join(" ", syn.Synonyms) + " " + syn.Gloss;
                        }
                    }
                    RelatedSenses = Senses[k].GetRelatedSynSets(WordNetApi.Core.WordNetEngine.SynSetRelation.PartMeronym, false);
                    if (RelatedSenses.Count > 0)
                    {
                        foreach (SynSet syn in RelatedSenses)
                        {
                            senseData = senseData + string.Join(" ", syn.Synonyms) + " " + syn.Gloss;
                        }
                    }
                    RelatedSenses = Senses[k].GetRelatedSynSets(WordNetApi.Core.WordNetEngine.SynSetRelation.InstanceHypernym, false);
                    if (RelatedSenses.Count > 0)
                    {
                        foreach (SynSet syn in RelatedSenses)
                        {
                            senseData = senseData + string.Join(" ", syn.Synonyms) + " " + syn.Gloss;
                        }
                    }

                    RelatedSenses = Senses[k].GetRelatedSynSets(WordNetApi.Core.WordNetEngine.SynSetRelation.InstanceHyponym, false);
                    if (RelatedSenses.Count > 0)
                    {
                        foreach (SynSet syn in RelatedSenses)
                        {
                            senseData = senseData + string.Join(" ", syn.Synonyms) + " " + syn.Gloss;
                        }
                    }

                    overlap = overlap + Intersect(_tokenizer.Tokenize(senseData), context) * 5;

                    if (overlap > score)
                    {
                        score    = overlap;
                        tmpSense = Senses[k];
                    }
                }
            }
            if (tmpSense == null)
            {
                tmpSense = Senses[0];
            }

            return(tmpSense);
        }
Ejemplo n.º 11
0
        public String QueryExpansion(IEnumerable <String> tokens, Boolean checkForNoun, Boolean checkForAdj, Boolean checkForVerb, Boolean checkForAdverb)
        {
            String expansion = "";


            foreach (String token in tokens)
            {
                bool isToken = false;
                foreach (string sw in STOP_WORDS)
                {
                    if (sw.Contains(token))
                    {
                        isToken = true;
                    }
                }
                if (token == "" || token == " " || isToken == true)
                {
                    System.Console.WriteLine("Token: {0} : {0}", token, isToken);
                    continue;
                }
                System.Console.WriteLine("\n\n~~~~~~~~~~~~~~~~~~~~~~~~ Getting synonyms from WordNet\n");
                System.Console.WriteLine("Synonyms for token =  " + token + "\n");

                expansion += token + "^5 ";

                // For each token, add synonym
                // src: https://developer.syn.co.in/api/Syn.WordNet.WordNetEngine.html
                SynSet synSet_noun      = wordNetEngine.GetMostCommonSynSet(token, PartOfSpeech.Noun);
                SynSet synSet_adjective = wordNetEngine.GetMostCommonSynSet(token, PartOfSpeech.Adjective);
                SynSet synSet_verb      = wordNetEngine.GetMostCommonSynSet(token, PartOfSpeech.Verb);
                SynSet synSet_adverb    = wordNetEngine.GetMostCommonSynSet(token, PartOfSpeech.Adverb);


                System.Console.WriteLine("~~~~~~~ Nouns\n");

                if (checkForNoun)
                {
                    try
                    {
                        foreach (var noun_syn in synSet_noun.Words)
                        {
                            if (!expansion.Contains(noun_syn))
                            {
                                expansion += noun_syn + "^1 ";
                                System.Console.WriteLine(noun_syn);
                            }
                        }
                    }
                    catch (NullReferenceException e)
                    {
                        // Noun does not contain synonym
                    }
                }

                System.Console.WriteLine("~~~~~~~ Adjectives\n");

                if (checkForAdj)
                {
                    try
                    {
                        foreach (var adj_syn in synSet_adjective.Words)
                        {
                            if (!expansion.Contains(adj_syn))
                            {
                                expansion += adj_syn + "^1 ";
                                System.Console.WriteLine(adj_syn);
                            }
                        }
                    }
                    catch (NullReferenceException e)
                    {
                        // Noun does not contain adjectives
                    }
                }

                System.Console.WriteLine("~~~~~~~ Verbs\n");

                if (checkForVerb)
                {
                    try
                    {
                        foreach (var adj_verb in synSet_verb.Words)
                        {
                            if (!expansion.Contains(adj_verb))
                            {
                                expansion += adj_verb + "^1 ";
                                System.Console.WriteLine(adj_verb);
                            }
                        }
                    }
                    catch (NullReferenceException e)
                    {
                        // Noun does not contain adjectives
                    }
                }

                System.Console.WriteLine("~~~~~~~ Adverbs\n");

                if (checkForAdverb)
                {
                    try
                    {
                        foreach (var adj_adverb in synSet_adverb.Words)
                        {
                            if (!expansion.Contains(adj_adverb))
                            {
                                expansion += adj_adverb + "^1 ";
                                System.Console.WriteLine(adj_adverb);
                            }
                        }
                    }
                    catch (NullReferenceException e)
                    {
                        // Noun does not contain adjectives
                    }
                }

                System.Console.WriteLine("~~~~~~~~~~~~~~~~~~~~~~~~ \n\n");
            }

            return(expansion);
        }
Ejemplo n.º 12
0
 public Comparison(string tag, double value, SynSet highSynSet = null)
 {
     Tag = tag;
     Value = value;
     HighSynSet = highSynSet;
 }
Ejemplo n.º 13
0
        /// <summary>
        /// Convert method creates db file from wndb files
        /// dictpath - path to wndb data files
        /// context - dest db context
        /// </summary>
        public static void Convert(string dictPack, string jsonFile)
        {
            WNDB wndb  = new WNDB(dictPack);
            var  poses = (new[] { "n", "v", "a", "r" }).Select(s => PartOfSpeech.of(s));

            // Convert to  Dictionary
            // lemma -> { SynSetGroup: PosSymbol, Synsets = { synset: synonims, definitions, examples } }

            var dict = new Dictionary <string, List <ExpSynSetGroup> >();

            foreach (var pos in poses)
            {
                Console.WriteLine("Process Data of {0}", pos.name);

                foreach (var data in wndb.GetData(pos))
                {
                    //data.adj includes 'a' & 's' pos symbols
                    char posSymbol = pos.symbol.First();

                    bool singleWord = false;
                    if (data.origWords.Count() == 1)
                    {
                        var w = data.origWords.First().word;
                        singleWord = w == w.ToLower();
                    }

                    var synSet = new SynSet
                    {
                        // Skip synonims if where is a single lowercase word
                        Synonims    = (singleWord) ? null : data.origWords.Select(ow => ow.word).ToArray(),
                        Definition  = (data.definitions.Count() == 1) ? data.definitions.First() : null,
                        Definitions = (data.definitions.Count() > 1) ? data.definitions : null,
                        Example     = (data.examples?.Count() == 1) ? data.examples.First() : null,
                        Examples    = (data.examples?.Count() > 1) ? data.examples : null
                    };

                    foreach (var lemma in data.origWords.Select(ow => ow.word.ToLower()))
                    {
                        var synGrps = dict.GetValue(lemma);
                        if (synGrps != null)
                        {
                            var grp = synGrps.FirstOrDefault(g => g.PosSymbol == posSymbol);

                            if (grp == null)
                            {
                                synGrps.Add(new ExpSynSetGroup(posSymbol, synSet));
                            }
                            else
                            {
                                grp.Synsets.Add(synSet);
                            }
                        }
                        else
                        {
                            dict.Add(lemma, new List <ExpSynSetGroup> {
                                new ExpSynSetGroup(posSymbol, synSet)
                            });
                        }
                    }
                }
            }

            // exceptions
            //TODO: remove morphes, ...

            var excepts = new Dictionary <string, List <DictException> >();

            foreach (var pos in poses)
            {
                Console.WriteLine("Process Exceptions of {0}", pos.name);

                foreach (var exwords in wndb.GetExceptions(pos))
                {
                    var morph = Morph.GetBasicForm(exwords[0], pos);
                    for (int i = 1; i < exwords.Length; i++)
                    {
                        var baseForm = exwords[i];
                        if (baseForm == exwords[0] || baseForm == morph)
                        {
                            //Console.WriteLine($"Skip: {(exwords[0])} -> {baseForm}/{morph}");
                            continue;
                        }

                        List <ExpSynSetGroup> synGrps = dict.GetValue(baseForm);
                        if (synGrps == null && baseForm.Contains('-'))
                        {
                            baseForm = baseForm.Replace('-', ' ');
                            dict.TryGetValue(baseForm, out synGrps);
                        }

                        if (synGrps != null)
                        {
                            var posSymbols = string.Join("", synGrps.Select(sg => sg.PosSymbol));
                            var except     = new DictException {
                                BasicForm = baseForm, PosSymbols = posSymbols
                            };

                            List <DictException> baseForms;
                            if (excepts.TryGetValue(exwords[0], out baseForms))
                            {
                                if (!baseForms.Any(e => e.BasicForm == baseForm))
                                {
                                    baseForms.Add(except);
                                }
                            }
                            else
                            {
                                excepts.Add(exwords[0], new List <DictException> {
                                    except
                                });
                            }
                        }
                    }
                }
            }

            Console.WriteLine("Save changes");

            var storage = new ExpDictStorage
            {
                SynSets    = dict,
                Exceptions = excepts
            };

            var serializer = new JsonSerializer();

            serializer.NullValueHandling = NullValueHandling.Ignore;

            using (var stream = File.Open(jsonFile, FileMode.Create))
                using (var writer = new BsonWriter(stream))
                {
                    serializer.Serialize(writer, storage);
                }
        }
Ejemplo n.º 14
0
        private void MapConceptsWithMapLex(string concept, Property maplexProperty)
        {
            MyWordInfo mwi;
            WordOlogy  WO = new WordOlogy();

            List <MyWordInfo> maplexsenses = new List <MyWordInfo>();
            int NoOfSensesSucceeded        = 0;

            for (int i = 0; i < maplexProperty.Fillers.Count; i++)
            {
                string   tmp   = maplexProperty.Fillers[i].ScalarFiller;
                char[]   charr = new char[] { '-', '_' };
                string[] splt  = tmp.Split(charr);
                //there r fillers with no type & a-bomb masalan

                if (splt.Length > 1)
                {
                    mwi = new MyWordInfo();
                    for (int k = 0; k < splt.Length - 2; k++)
                    {
                        mwi.Word += splt[k] + " ";
                    }
                    mwi.Word += splt[splt.Length - 2];
                    if (splt[splt.Length - 1].Length == 2)
                    {
                        if (splt[splt.Length - 1][0] == 'v')
                        {
                            mwi.Pos = Wnlib.PartsOfSpeech.Verb;
                        }
                        else if (splt[splt.Length - 1][0] == 'n')
                        {
                            mwi.Pos = Wnlib.PartsOfSpeech.Noun;
                        }
                        else if (splt[splt.Length - 1][0] == 'a')
                        {
                            mwi.Pos = Wnlib.PartsOfSpeech.Adj;
                        }
                        else if (splt[splt.Length - 1][0] == 'r')
                        {
                            mwi.Pos = Wnlib.PartsOfSpeech.Adv;
                        }
                        else
                        {
                            mwi.Pos = Wnlib.PartsOfSpeech.Unknown;
                        }
                    }
                    else
                    {
                        mwi.Pos   = Wnlib.PartsOfSpeech.Unknown;
                        mwi.Word += " " + splt[splt.Length - 1];
                    }
                    if (i == 0 || (maplexsenses.Count > 0 && (mwi.Word != maplexsenses[maplexsenses.Count - 1].Word || mwi.Pos != maplexsenses[maplexsenses.Count - 1].Pos)))
                    {
                        maplexsenses.Add(mwi);
                    }
                }
                //ne loop 3al ontology kolaha
            }


            if (maplexsenses.Count > 0)
            {
                MyWordInfo[] maplexArray = new MyWordInfo[maplexsenses.Count];
                for (int j = 0; j < maplexsenses.Count; j++)
                {
                    maplexArray[j] = maplexsenses[j];
                }
                WordSenseDisambiguator wsd = new WordSenseDisambiguator();
                MyWordInfo[]           res = new MyWordInfo[maplexArray.Length];
                res = wsd.Disambiguate(maplexArray);
                int i = 0;

                foreach (MyWordInfo wi in res)
                {
                    string   tmp   = maplexProperty.Fillers[i].ScalarFiller;
                    char[]   charr = new char[] { '-', '_' };
                    string[] splt  = tmp.Split(charr);

                    if (splt.Length > 1 && splt[splt.Length - 1].Length == 2)
                    {
                        WO.SenseNo = splt[splt.Length - 1];
                    }
                    else
                    {
                        // "sense doesn't have POS";
                    }

                    Wnlib.PartOfSpeech p = Wnlib.PartOfSpeech.of((Wnlib.PartsOfSpeech)wi.Pos);

                    try
                    {
                        Wnlib.Index index = Wnlib.Index.lookup(wi.Word.ToLower(), p);
                        SynSet      sense = new SynSet(index, res[i].Sense, null);
                        WO.Sense = sense.defn;
                        // AllSensesMapped++;
                        NoOfSensesSucceeded++;
                        try
                        {
                            WO.Pos = p.name;
                        }
                        catch
                        {
                            WO.Pos = wi.Pos.ToString();
                        }
                        ID++;
                        WO.Word    = wi.Word;
                        WO.ID      = ID;
                        WO.Concept = concept;
                        wordologyArr.Add(WO);
                    }
                    catch
                    {
                    };
                    if (NoOfSensesSucceeded == 0)
                    {
                        CannotGetSenseExeption++;
                    }
                    i++;
                    //bf.Serialize(fs, "\n" + WO);
                }
                conceptcounter++;
            }
        }
Ejemplo n.º 15
0
        private void MapConceptsWithOutMapLex(string concept)
        {
            Wnlib.Index        index;
            Wnlib.PartOfSpeech p;
            Search             se;
            WordOlogy          WO   = new WordOlogy();
            int NoOfSensesSucceeded = 0;

            try
            {
                index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Noun));
                if (index != null)
                {
                    WO.Pos = "noun";
                    Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Noun);
                    foreach (Opt o in relatedness)
                    {
                        for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                        {
                            se = new Search(concept, true, PartOfSpeech.of("noun"), o.sch, senseNumber);
                            SynSet sense = new SynSet(index, senseNumber, se);
                            WO.Concept = concept;
                            WO.Word    = concept;
                            WO.Sense   = sense.defn;
                            WO.ID      = ID;
                            ID++;
                            NoOfSensesSucceeded++;
                            //AllSensesMapped++;
                            //bf.Serialize(fs, "\n" + WO);
                            wordologyArr.Add(WO);
                        }
                    }
                }
            }
            catch
            { }
            try
            {
                index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Verb));
                if (index != null)
                {
                    WO.Pos = "verb";
                    Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Verb);
                    foreach (Opt o in relatedness)
                    {
                        for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                        {
                            se = new Search(concept, true, PartOfSpeech.of("verb"), o.sch, senseNumber);
                            SynSet sense = new SynSet(index, senseNumber, se);
                            WO.Sense   = sense.defn;
                            WO.Concept = concept;
                            WO.Word    = concept;
                            WO.ID      = ID;
                            ID++;
                            NoOfSensesSucceeded++;
//                            AllSensesMapped++;
                            //bf.Serialize(fs, "\n" + WO);
                            wordologyArr.Add(WO);
                        }
                    }
                }
            }
            catch
            { }
            try
            {
                index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Adj));
                if (index != null)
                {
                    WO.Pos = "adj";
                    Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Adj);
                    foreach (Opt o in relatedness)
                    {
                        for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                        {
                            se = new Search(concept, true, PartOfSpeech.of("adj"), o.sch, senseNumber);
                            SynSet sense = new SynSet(index, senseNumber, se);
                            WO.Sense   = sense.defn;
                            WO.Concept = concept;
                            WO.Word    = concept;
                            WO.ID      = ID;
                            ID++;
                            NoOfSensesSucceeded++;
                            //AllSensesMapped++;
                            //bf.Serialize(fs, "\n" + WO);
                            wordologyArr.Add(WO);
                        }
                    }
                }
            }
            catch
            { }
            try
            {
                index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Adv));
                if (index != null)
                {
                    WO.Pos = "adv";
                    Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Noun);
                    foreach (Opt o in relatedness)
                    {
                        for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                        {
                            se = new Search(concept, true, PartOfSpeech.of("adv"), o.sch, senseNumber);
                            SynSet sense = new SynSet(index, senseNumber, se);
                            WO.Sense   = sense.defn;
                            WO.Concept = concept;
                            WO.Word    = concept;
                            WO.ID      = ID;
                            ID++;
                            NoOfSensesSucceeded++;
                            //AllSensesMapped++;
                            //bf.Serialize(fs, "\n" + WO);
                            wordologyArr.Add(WO);
                        }
                    }
                }
            }
            catch
            { }


            if (NoOfSensesSucceeded != 0)
            {
                conceptcounter++;
            }
        }
Ejemplo n.º 16
0
        /// <summary>
        /// Convert method creates db file from wndb files
        /// dictpath - path to wndb data files
        /// context - dest db context
        /// </summary>
        public static void Convert(string dictPack, WordNetContext context)
        {
            WNDB wndb     = new WNDB(dictPack);
            var  synWords = new List <string>();
            // int ind;

            var wordToLemma = new Dictionary <string, Lemma>();
            var words       = new Dictionary <string, Writing>();

            var poses = (new [] { "n", "v", "a", "r" }).Select(s => PartOfSpeech.of(s));

            foreach (var pos in poses)
            {
                Console.WriteLine("Process Data of {0}", pos.name);
                // ind = 0;

                foreach (var data in wndb.GetData(pos))
                {
                    if (data.pos != pos.symbol && !(data.pos == "s" && pos.symbol == "a")) //data.adj includes 'a' & 's' pos symbols
                    {
                        throw new Exception("pos!=data.pos");
                    }

                    var synset = new SynSet {
                        Pos = data.pos
                    };
                    context.SynSets.Add(synset);
                    synWords.Clear();

                    foreach (var oword in data.origWords)
                    {
                        Lemma  lemma;
                        string lcWord = oword.word.ToLower();

                        // add lemma
                        if (!wordToLemma.TryGetValue(lcWord, out lemma))
                        {
                            lemma = new Lemma {
                                Value = lcWord, Poses = data.pos
                            };
                            wordToLemma.Add(lcWord, lemma);
                            context.Lemmas.Add(lemma);
                        }
                        else if (!lemma.Poses.Contains(data.pos))
                        {
                            lemma.Poses += data.pos;
                        }

                        if (synWords.IndexOf(lcWord) < 0)
                        {
                            synWords.Add(lcWord);

                            // add SynSet <-> Lemma relation
                            context.SynsetLemmas.Add(new SynsetLemma
                            {
                                SynSet = synset,
                                Lemma  = lemma
                            });
                        }

                        // add original word if it differs from lemma
                        Writing word;
                        if (lcWord != oword.word)
                        {
                            if (!words.TryGetValue(oword.word, out word))
                            {
                                word = new Writing {
                                    Value = oword.word, Lemma = lemma
                                };
                                words.Add(oword.word, word);
                                context.Writings.Add(word);
                            }
                            else if (word.Lemma != lemma)
                            {
                                Console.WriteLine("Word mix: {0} {1} {2}", oword.word, lemma.Value, word.Lemma.Value);
                                continue;
                            }
                        }
                    }

                    synset.Definition = string.Join(";", data.definitions);
                    synset.Example    = string.Join(";", data.examples);

                    // ind++;
                    // if (ind % 1000 == 0)
                    //     ShowProgress(ind.ToString());
                }
                Console.WriteLine("Save changes");
                context.SaveChanges();

                // exceptions
                //TODO: remove morphes, ...

                Console.WriteLine("Process Exceptions of {0}", pos.name);
                // ind = 0;

                foreach (var exwords in GetExceptions(wndb, pos))
                {
                    for (int i = 1; i < exwords.Length; i++)
                    {
                        if (exwords[i] == exwords[0])
                        {
                            continue;
                        }

                        Lemma lemma;
                        if (wordToLemma.TryGetValue(exwords[i], out lemma) ||
                            (exwords[i].Contains('-') && wordToLemma.TryGetValue(exwords[i].Replace('-', ' '), out lemma)))
                        {
                            context.Excepts.Add(new Except {
                                Value = exwords[0], MainForm = exwords[i], Lemma = lemma
                            });
                        }
                        // else
                        // {
                        //     Console.WriteLine("Lemma not found {0}", exwords[i]);
                        //     context.Excepts.Add(new Except { Value = exwords[0], MainForm = exwords[i] });
                        // }
                    }

                    // ind++;
                    // if (ind % 1000 == 0)
                    //     ShowProgress(ind.ToString());
                }
                Console.WriteLine("Save changes");
                context.SaveChanges();
            }

            //Console.WriteLine("Save changes");
            context.SaveChanges();
        }
Ejemplo n.º 17
0
        /// <summary>
        /// Initializes a new instance of the <see cref="WordNetMemoryProvider"/> class.
        /// </summary>
        /// <param name="dataPath">The data path.</param>
        /// <exception cref="System.ArgumentNullException">dataPath</exception>
        /// <exception cref="System.IO.DirectoryNotFoundException">The data directory does not exist.</exception>
        /// <exception cref="System.IO.FileNotFoundException">A required WordNet file does not exist: [filename]</exception>
        public WordNetMemoryProvider(string dataPath)
        {
            if (string.IsNullOrEmpty(dataPath))
            {
                throw new ArgumentNullException("dataPath");
            }

            var dir = new DirectoryInfo(dataPath);

            if (!dir.Exists)
            {
                throw new DirectoryNotFoundException("The data directory does not exist.");
            }


            var dataPaths = new [] {
                new FileInfo(Path.Combine(dataPath, "data.adj")),
                new FileInfo(Path.Combine(dataPath, "data.adv")),
                new FileInfo(Path.Combine(dataPath, "data.noun")),
                new FileInfo(Path.Combine(dataPath, "data.verb"))
            };

            var indexPaths = new [] {
                new FileInfo(Path.Combine(dataPath, "index.adj")),
                new FileInfo(Path.Combine(dataPath, "index.adv")),
                new FileInfo(Path.Combine(dataPath, "index.noun")),
                new FileInfo(Path.Combine(dataPath, "index.verb"))
            };

            foreach (var file in dataPaths.Union(indexPaths).Where(file => !file.Exists))
            {
                throw new FileNotFoundException("A required WordNet file does not exist: " + file.Name);
            }

            // Pass 1: Get total number of synsets
            var totalSynsets = 0;

            foreach (var dataInfo in dataPaths)
            {
                // scan synset data file for lines that don't start with a space...
                // these are synset definition lines
                using (var dataFile = new StreamReader(dataInfo.FullName)) {
                    string line;
                    while ((line = dataFile.ReadLine()) != null)
                    {
                        var firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                        {
                            ++totalSynsets;
                        }
                    }
                }
            }

            // Pass 2: Create synset shells (pos and offset only)
            idSynset = new Dictionary <string, SynSet>(totalSynsets);
            foreach (var dataInfo in dataPaths)
            {
                var pos = WordNetFileProvider.GetFilePos(dataInfo.FullName);

                // scan synset data file
                using (var dataFile = new StreamReader(dataInfo.FullName)) {
                    string line;
                    while ((line = dataFile.ReadLine()) != null)
                    {
                        var firstSpace = line.IndexOf(' ');
                        if (firstSpace <= 0)
                        {
                            continue;
                        }

                        // get offset and create synset shell
                        var offset = int.Parse(line.Substring(0, firstSpace));
                        var synset = new SynSet(pos, offset, null);

                        idSynset.Add(synset.Id, synset);
                    }
                }
            }

            // Pass 3: Instantiate synsets (hooks up relations, set glosses, etc.)
            foreach (var dataInfo in dataPaths)
            {
                var pos = WordNetFileProvider.GetFilePos(dataInfo.FullName);

                // scan synset data file
                using (var dataFile = new StreamReader(dataInfo.FullName)) {
                    string line;
                    while ((line = dataFile.ReadLine()) != null)
                    {
                        var firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                        {
                            // instantiate synset defined on current line, using the instantiated synsets for all references
                            idSynset[pos + ":" + int.Parse(line.Substring(0, firstSpace))].Instantiate(line, idSynset);
                        }
                    }
                }
            }

            // organize synsets by pos and words...
            // also set most common synset for word-pos pairs that have multiple synsets

            posWordSynSets = new Dictionary <WordNetPos, Dictionary <string, List <SynSet> > >();

            foreach (var indexInfo in indexPaths)
            {
                var pos = WordNetFileProvider.GetFilePos(indexInfo.FullName);

                posWordSynSets.EnsureContainsKey(pos, typeof(Dictionary <string, List <SynSet> >));

                // scan word index file, skipping header lines
                using (var indexFile = new StreamReader(indexInfo.FullName)) {
                    string line;
                    while ((line = indexFile.ReadLine()) != null)
                    {
                        var firstSpace = line.IndexOf(' ');
                        if (firstSpace <= 0)
                        {
                            continue;
                        }

                        // grab word and synset shells, along with the most common synset
                        var    word = line.Substring(0, firstSpace);
                        SynSet mostCommonSynSet;
                        var    synsets = WordNetFileProvider.GetSynSetShells(line, pos, out mostCommonSynSet, wordNet);

                        // set flag on most common synset if it's ambiguous
                        if (synsets.Count > 1)
                        {
                            idSynset[mostCommonSynSet.Id].SetAsMostCommonSynsetFor(word);
                        }

                        // use reference to the synsets that we instantiated in our three-pass routine above
                        posWordSynSets[pos].Add(word, new List <SynSet>(synsets.Count));
                        foreach (var synset in synsets)
                        {
                            posWordSynSets[pos][word].Add(idSynset[synset.Id]);
                        }
                    }
                }
            }
        }
Ejemplo n.º 18
0
        /*--------------------------------------------------------------------------------------------*/
        private static void InsertLexAndSemForSynSet(ISession pSess, string pSynSetId, SynSet pSynSet)
        {
            Synset dbSynSet = SynsetCache[pSynSetId];
            List <LexicalRelation> lexRels = pSynSet.GetLexicallyRelated();

            foreach (LexicalRelation lr in lexRels)
            {
                var dbLex = new Lexical();
                dbLex.Synset       = dbSynSet;
                dbLex.Word         = WordCache[dbLex.Synset.SsId + "|" + lr.FromWord];
                dbLex.RelationId   = (byte)lr.Relation;
                dbLex.TargetSynset = SynsetCache[lr.ToSyn.ID];
                dbLex.TargetWord   = WordCache[dbLex.TargetSynset.SsId + "|" + lr.ToWord];
                pSess.Save(dbLex);
            }

            foreach (WordNetEngine.SynSetRelation rel in pSynSet.SemanticRelations)
            {
                Set <SynSet> relSet = pSynSet.GetRelatedSynSets(rel, false);

                foreach (SynSet rs in relSet)
                {
                    var dbSem = new Semantic();
                    dbSem.Synset       = dbSynSet;
                    dbSem.RelationId   = (byte)rel;
                    dbSem.TargetSynset = SynsetCache[rs.ID];
                    pSess.Save(dbSem);
                }
            }
        }
Ejemplo n.º 19
0
        public void ConstructMapping()
        {
            string    concept        = "";
            string    word           = "";
            int       ID             = -1;
            string    senseNo        = "";
            string    Sense          = "";
            string    Pos            = "";
            WordOlogy WO             = new WordOlogy();
            ArrayList wordologyArr   = new ArrayList();
            int       conceptcounter = 0;

            LoadOntology();
            FileStream   allConceptsFile       = new FileStream(_ontologyDirectoryPath + @"\AllConcepts.txt", FileMode.Open);
            StreamReader allConceptsFileReader = new StreamReader(allConceptsFile);

            string _wordologyDirectoryPath =
                @"..\..\..\wordology\";

            BinaryFormatter bf = new BinaryFormatter();
            FileStream      fs = new FileStream(
                _wordologyDirectoryPath + "\\wordology.txt", FileMode.Create);
            int indxWatcherconceptCounter = 0;
            int NoMapLexConcepts          = 0;
            int CannotGetSenseExeption    = 0;
            int AllSensesMapped           = 0;

            while ((concept = allConceptsFileReader.ReadLine()) != null)
            {
                indxWatcherconceptCounter++;
                string   Conceptpath    = _ontologyDirectoryPath + @"\" + concept[0] + @"\" + concept;
                Concept  C              = (Concept)Onto[concept];
                Property maplexProperty = C.FullProperties["ENGLISH1"];

                List <MyWordInfo> maplexsenses = new List <MyWordInfo>();
                MyWordInfo        mwi          = new MyWordInfo();
                int NoOfSensesSucceeded        = 0;
                if (maplexProperty != null)
                {
                    for (int i = 0; i < maplexProperty.Fillers.Count; i++)
                    {
                        string   tmp   = maplexProperty.Fillers[i].ScalarFiller;
                        char[]   charr = new char[] { '-', '_' };
                        string[] splt  = tmp.Split(charr);
                        //there r fillers with no type & a-bomb masalan

                        if (splt.Length > 1)
                        {
                            mwi = new MyWordInfo();
                            for (int k = 0; k < splt.Length - 2; k++)
                            {
                                mwi.Word += splt[k] + " ";
                            }
                            mwi.Word += splt[splt.Length - 2];
                            if (splt[splt.Length - 1].Length == 2)
                            {
                                if (splt[splt.Length - 1][0] == 'v')
                                {
                                    mwi.Pos = Wnlib.PartsOfSpeech.Verb;
                                }
                                else if (splt[splt.Length - 1][0] == 'n')
                                {
                                    mwi.Pos = Wnlib.PartsOfSpeech.Noun;
                                }
                                else if (splt[splt.Length - 1][0] == 'a')
                                {
                                    mwi.Pos = Wnlib.PartsOfSpeech.Adj;
                                }
                                else if (splt[splt.Length - 1][0] == 'r')
                                {
                                    mwi.Pos = Wnlib.PartsOfSpeech.Adv;
                                }
                                else
                                {
                                    mwi.Pos = Wnlib.PartsOfSpeech.Unknown;
                                }
                            }
                            else
                            {
                                mwi.Pos   = Wnlib.PartsOfSpeech.Unknown;
                                mwi.Word += " " + splt[splt.Length - 1];
                            }
                            if (i == 0 || (maplexsenses.Count > 0 && (mwi.Word != maplexsenses[maplexsenses.Count - 1].Word || mwi.Pos != maplexsenses[maplexsenses.Count - 1].Pos)))
                            {
                                maplexsenses.Add(mwi);
                            }
                        }
                        //ne loop 3al ontology kolaha
                    }


                    if (maplexsenses.Count > 0)
                    {
                        MyWordInfo[] maplexArray = new MyWordInfo[maplexsenses.Count];
                        for (int j = 0; j < maplexsenses.Count; j++)
                        {
                            maplexArray[j] = maplexsenses[j];
                        }
                        WordSenseDisambiguator wsd = new WordSenseDisambiguator();
                        MyWordInfo[]           res = new MyWordInfo[maplexArray.Length];
                        res = wsd.Disambiguate(maplexArray);
                        int i = 0;

                        foreach (MyWordInfo wi in res)
                        {
                            string   tmp   = maplexProperty.Fillers[i].ScalarFiller;
                            char[]   charr = new char[] { '-', '_' };
                            string[] splt  = tmp.Split(charr);

                            if (splt.Length > 1 && splt[splt.Length - 1].Length == 2)
                            {
                                WO.SenseNo = splt[splt.Length - 1];
                            }
                            else
                            {
                                // "sense doesn't have POS";
                            }

                            Wnlib.PartOfSpeech p = Wnlib.PartOfSpeech.of((Wnlib.PartsOfSpeech)wi.Pos);

                            try
                            {
                                Wnlib.Index index = Wnlib.Index.lookup(wi.Word.ToLower(), p);
                                SynSet      sense = new SynSet(index, res[i].Sense, null);
                                WO.Sense = sense.defn;
                                AllSensesMapped++;
                                NoOfSensesSucceeded++;
                                try
                                {
                                    WO.Pos = p.name;
                                }
                                catch
                                {
                                    WO.Pos = wi.Pos.ToString();
                                }
                                ID++;
                                WO.Word    = wi.Word;
                                WO.ID      = ID;
                                WO.Concept = concept;
                                WO.Word    = word;
                            }
                            catch
                            {
                            };
                            if (NoOfSensesSucceeded == 0)
                            {
                                CannotGetSenseExeption++;
                            }
                            i++;
                            // bf.Serialize(fs, "\n" + WO);
                            wordologyArr.Add(WO);
                        }
                        conceptcounter++;
                    }
                }
                else
                {
                    NoMapLexConcepts++;

                    //new part


                    Wnlib.Index        index;
                    Wnlib.PartOfSpeech p;
                    Search             se;

                    try
                    {
                        index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Noun));
                        if (index != null)
                        {
                            WO.Pos = "noun";
                            Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Noun);
                            foreach (Opt o in relatedness)
                            {
                                for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                                {
                                    se = new Search(concept, true, PartOfSpeech.of("noun"), o.sch, senseNumber);
                                    SynSet sense = new SynSet(index, senseNumber, se);
                                    WO.Concept = concept;
                                    WO.Word    = concept;
                                    WO.Sense   = sense.defn;
                                    WO.ID      = ID;
                                    ID++;
                                    NoOfSensesSucceeded++;
                                    AllSensesMapped++;
                                    //bf.Serialize(fs, "\n" + WO);
                                    wordologyArr.Add(WO);
                                }
                            }
                        }
                    }
                    catch
                    { }
                    try
                    {
                        index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Verb));
                        if (index != null)
                        {
                            WO.Pos = "verb";
                            Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Verb);
                            foreach (Opt o in relatedness)
                            {
                                for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                                {
                                    se = new Search(concept, true, PartOfSpeech.of("verb"), o.sch, senseNumber);
                                    SynSet sense = new SynSet(index, senseNumber, se);
                                    WO.Sense   = sense.defn;
                                    WO.Concept = concept;
                                    WO.Word    = concept;
                                    WO.ID      = ID;
                                    ID++;
                                    NoOfSensesSucceeded++;
                                    AllSensesMapped++;
                                    //bf.Serialize(fs, "\n" + WO);
                                    wordologyArr.Add(WO);
                                }
                            }
                        }
                    }
                    catch
                    { }
                    try
                    {
                        index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Adj));
                        if (index != null)
                        {
                            WO.Pos = "adj";
                            Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Adj);
                            foreach (Opt o in relatedness)
                            {
                                for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                                {
                                    se = new Search(concept, true, PartOfSpeech.of("adj"), o.sch, senseNumber);
                                    SynSet sense = new SynSet(index, senseNumber, se);
                                    WO.Sense   = sense.defn;
                                    WO.Concept = concept;
                                    WO.Word    = concept;
                                    WO.ID      = ID;
                                    ID++;
                                    NoOfSensesSucceeded++;
                                    AllSensesMapped++;
                                    //bf.Serialize(fs, "\n" + WO);
                                    wordologyArr.Add(WO);
                                }
                            }
                        }
                    }
                    catch
                    { }
                    try
                    {
                        index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Adv));
                        if (index != null)
                        {
                            WO.Pos = "adv";
                            Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Noun);
                            foreach (Opt o in relatedness)
                            {
                                for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                                {
                                    se = new Search(concept, true, PartOfSpeech.of("adv"), o.sch, senseNumber);
                                    SynSet sense = new SynSet(index, senseNumber, se);
                                    WO.Sense   = sense.defn;
                                    WO.Concept = concept;
                                    WO.Word    = concept;
                                    WO.ID      = ID;
                                    ID++;
                                    NoOfSensesSucceeded++;
                                    AllSensesMapped++;
                                    //bf.Serialize(fs, "\n" + WO);
                                    wordologyArr.Add(WO);
                                }
                            }
                        }
                    }
                    catch
                    { }


                    if (NoOfSensesSucceeded != 0)
                    {
                        conceptcounter++;
                    }
                }
            }//end while
            allConceptsFileReader.Close();
            allConceptsFile.Close();
            bf.Serialize(fs, wordologyArr);
            fs.Close();
            MessageBox.Show("no map-lex concepts number = " + NoMapLexConcepts.ToString());
            MessageBox.Show("can't getsense pos number = " + CannotGetSenseExeption.ToString());
            MessageBox.Show(conceptcounter.ToString());
        }
Ejemplo n.º 20
0
        /// <summary>
        /// Initializes a new instance of the <see cref="WordNetMemoryProvider"/> class.
        /// </summary>
        /// <param name="dataPath">The data path.</param>
        /// <exception cref="System.ArgumentNullException">dataPath</exception>
        /// <exception cref="System.IO.DirectoryNotFoundException">The data directory does not exist.</exception>
        /// <exception cref="System.IO.FileNotFoundException">A required WordNet file does not exist: [filename]</exception>
        public WordNetMemoryProvider(string dataPath) {
            if (string.IsNullOrEmpty(dataPath))
                throw new ArgumentNullException("dataPath");

            var dir = new DirectoryInfo(dataPath);

            if (!dir.Exists)
                throw new DirectoryNotFoundException("The data directory does not exist.");


            var dataPaths = new [] {
                new FileInfo(Path.Combine(dataPath, "data.adj")),
                new FileInfo(Path.Combine(dataPath, "data.adv")),
                new FileInfo(Path.Combine(dataPath, "data.noun")),
                new FileInfo(Path.Combine(dataPath, "data.verb"))
            };

            var indexPaths = new [] {
                new FileInfo(Path.Combine(dataPath, "index.adj")),
                new FileInfo(Path.Combine(dataPath, "index.adv")),
                new FileInfo(Path.Combine(dataPath, "index.noun")),
                new FileInfo(Path.Combine(dataPath, "index.verb"))
            };

            foreach (var file in dataPaths.Union(indexPaths).Where(file => !file.Exists))
                throw new FileNotFoundException("A required WordNet file does not exist: " + file.Name);

            // Pass 1: Get total number of synsets
            var totalSynsets = 0;
            foreach (var dataInfo in dataPaths) {
                // scan synset data file for lines that don't start with a space... 
                // these are synset definition lines
                using (var dataFile = new StreamReader(dataInfo.FullName)) {
                    string line;
                    while ((line = dataFile.ReadLine()) != null) {
                        var firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                            ++totalSynsets;
                    }
                }

            }

            // Pass 2: Create synset shells (pos and offset only)
            idSynset = new Dictionary<string, SynSet>(totalSynsets);
            foreach (var dataInfo in dataPaths) {
                var pos = WordNetFileProvider.GetFilePos(dataInfo.FullName);

                // scan synset data file
                using (var dataFile = new StreamReader(dataInfo.FullName)) {
                    string line;
                    while ((line = dataFile.ReadLine()) != null) {
                        var firstSpace = line.IndexOf(' ');
                        if (firstSpace <= 0) 
                            continue;

                        // get offset and create synset shell
                        var offset = int.Parse(line.Substring(0, firstSpace));
                        var synset = new SynSet(pos, offset, null);

                        idSynset.Add(synset.Id, synset);
                    }
                }

            }

            // Pass 3: Instantiate synsets (hooks up relations, set glosses, etc.)
            foreach (var dataInfo in dataPaths) {
                var pos = WordNetFileProvider.GetFilePos(dataInfo.FullName);

                // scan synset data file
                using (var dataFile = new StreamReader(dataInfo.FullName)) {
                    string line;
                    while ((line = dataFile.ReadLine()) != null) {
                        var firstSpace = line.IndexOf(' ');
                        if (firstSpace > 0)
                            // instantiate synset defined on current line, using the instantiated synsets for all references
                            idSynset[pos + ":" + int.Parse(line.Substring(0, firstSpace))].Instantiate(line, idSynset);
                    }
                }

            }

            // organize synsets by pos and words... 
            // also set most common synset for word-pos pairs that have multiple synsets

            posWordSynSets = new Dictionary<WordNetPos, Dictionary<string, List<SynSet>>>();

            foreach (var indexInfo in indexPaths) {
                var pos = WordNetFileProvider.GetFilePos(indexInfo.FullName);

                posWordSynSets.EnsureContainsKey(pos, typeof(Dictionary<string, List<SynSet>>));

                // scan word index file, skipping header lines
                using (var indexFile = new StreamReader(indexInfo.FullName)) {
                    string line;
                    while ((line = indexFile.ReadLine()) != null) {
                        var firstSpace = line.IndexOf(' ');
                        if (firstSpace <= 0)
                            continue;

                        // grab word and synset shells, along with the most common synset
                        var word = line.Substring(0, firstSpace);
                        SynSet mostCommonSynSet;
                        var synsets = WordNetFileProvider.GetSynSetShells(line, pos, out mostCommonSynSet, wordNet);

                        // set flag on most common synset if it's ambiguous
                        if (synsets.Count > 1)
                            idSynset[mostCommonSynSet.Id].SetAsMostCommonSynsetFor(word);

                        // use reference to the synsets that we instantiated in our three-pass routine above
                        posWordSynSets[pos].Add(word, new List<SynSet>(synsets.Count));
                        foreach (var synset in synsets)
                            posWordSynSets[pos][word].Add(idSynset[synset.Id]);
                    }
                }
            }
        }
Ejemplo n.º 21
0
 private void ss2_DoubleClick(object sender, EventArgs e)
 {
     ss2.Text              = _origSsLbl;
     _semSimSs2            = null;
     computeSemSim.Enabled = false;
 }
Ejemplo n.º 22
0
 public abstract double ComputeSimilarity(SynSet synSet1, SynSet synSet2);