Example #1
0
        private void Disambiguate(ArrayList sparsetree)
        {
            foreach (ParseTree ps in SParseTrees)
            {
                ListifyParseTree(ps);
                PrepareInputArrOfDisambiguation();

                double Score;
                WordSenseDisambiguator wsd = new WordSenseDisambiguator();
                DisambiguateRes = new MyWordInfo[size];
                DisambiguateRes = wsd.MMG_Disambiguate(mwi2, out Score);
                ScoresOfParseTree.Add(Score);
                Senses.Add(DisambiguateRes);
            }
            checkParseTreeRepitition();
            // AddArrStems(NewSParseTrees);
            // fe problem fel disambiguation law ra7laha parsetrees fehom kaza pt l 1 sintence
            //law fe gomla liha 3 parse trees el code da hayetsaraf ?
        }
Example #2
0
        public void ConstructMapping()
        {
            string    concept        = "";
            string    word           = "";
            int       ID             = -1;
            string    senseNo        = "";
            string    Sense          = "";
            string    Pos            = "";
            WordOlogy WO             = new WordOlogy();
            ArrayList wordologyArr   = new ArrayList();
            int       conceptcounter = 0;

            LoadOntology();
            FileStream   allConceptsFile       = new FileStream(_ontologyDirectoryPath + @"\AllConcepts.txt", FileMode.Open);
            StreamReader allConceptsFileReader = new StreamReader(allConceptsFile);

            string _wordologyDirectoryPath =
                @"..\..\..\wordology\";

            BinaryFormatter bf = new BinaryFormatter();
            FileStream      fs = new FileStream(
                _wordologyDirectoryPath + "\\wordology.txt", FileMode.Create);
            int indxWatcherconceptCounter = 0;
            int NoMapLexConcepts          = 0;
            int CannotGetSenseExeption    = 0;
            int AllSensesMapped           = 0;

            while ((concept = allConceptsFileReader.ReadLine()) != null)
            {
                indxWatcherconceptCounter++;
                string   Conceptpath    = _ontologyDirectoryPath + @"\" + concept[0] + @"\" + concept;
                Concept  C              = (Concept)Onto[concept];
                Property maplexProperty = C.FullProperties["ENGLISH1"];

                List <MyWordInfo> maplexsenses = new List <MyWordInfo>();
                MyWordInfo        mwi          = new MyWordInfo();
                int NoOfSensesSucceeded        = 0;
                if (maplexProperty != null)
                {
                    for (int i = 0; i < maplexProperty.Fillers.Count; i++)
                    {
                        string   tmp   = maplexProperty.Fillers[i].ScalarFiller;
                        char[]   charr = new char[] { '-', '_' };
                        string[] splt  = tmp.Split(charr);
                        //there r fillers with no type & a-bomb masalan

                        if (splt.Length > 1)
                        {
                            mwi = new MyWordInfo();
                            for (int k = 0; k < splt.Length - 2; k++)
                            {
                                mwi.Word += splt[k] + " ";
                            }
                            mwi.Word += splt[splt.Length - 2];
                            if (splt[splt.Length - 1].Length == 2)
                            {
                                if (splt[splt.Length - 1][0] == 'v')
                                {
                                    mwi.Pos = Wnlib.PartsOfSpeech.Verb;
                                }
                                else if (splt[splt.Length - 1][0] == 'n')
                                {
                                    mwi.Pos = Wnlib.PartsOfSpeech.Noun;
                                }
                                else if (splt[splt.Length - 1][0] == 'a')
                                {
                                    mwi.Pos = Wnlib.PartsOfSpeech.Adj;
                                }
                                else if (splt[splt.Length - 1][0] == 'r')
                                {
                                    mwi.Pos = Wnlib.PartsOfSpeech.Adv;
                                }
                                else
                                {
                                    mwi.Pos = Wnlib.PartsOfSpeech.Unknown;
                                }
                            }
                            else
                            {
                                mwi.Pos   = Wnlib.PartsOfSpeech.Unknown;
                                mwi.Word += " " + splt[splt.Length - 1];
                            }
                            if (i == 0 || (maplexsenses.Count > 0 && (mwi.Word != maplexsenses[maplexsenses.Count - 1].Word || mwi.Pos != maplexsenses[maplexsenses.Count - 1].Pos)))
                            {
                                maplexsenses.Add(mwi);
                            }
                        }
                        //ne loop 3al ontology kolaha
                    }


                    if (maplexsenses.Count > 0)
                    {
                        MyWordInfo[] maplexArray = new MyWordInfo[maplexsenses.Count];
                        for (int j = 0; j < maplexsenses.Count; j++)
                        {
                            maplexArray[j] = maplexsenses[j];
                        }
                        WordSenseDisambiguator wsd = new WordSenseDisambiguator();
                        MyWordInfo[]           res = new MyWordInfo[maplexArray.Length];
                        res = wsd.Disambiguate(maplexArray);
                        int i = 0;

                        foreach (MyWordInfo wi in res)
                        {
                            string   tmp   = maplexProperty.Fillers[i].ScalarFiller;
                            char[]   charr = new char[] { '-', '_' };
                            string[] splt  = tmp.Split(charr);

                            if (splt.Length > 1 && splt[splt.Length - 1].Length == 2)
                            {
                                WO.SenseNo = splt[splt.Length - 1];
                            }
                            else
                            {
                                // "sense doesn't have POS";
                            }

                            Wnlib.PartOfSpeech p = Wnlib.PartOfSpeech.of((Wnlib.PartsOfSpeech)wi.Pos);

                            try
                            {
                                Wnlib.Index index = Wnlib.Index.lookup(wi.Word.ToLower(), p);
                                SynSet      sense = new SynSet(index, res[i].Sense, null);
                                WO.Sense = sense.defn;
                                AllSensesMapped++;
                                NoOfSensesSucceeded++;
                                try
                                {
                                    WO.Pos = p.name;
                                }
                                catch
                                {
                                    WO.Pos = wi.Pos.ToString();
                                }
                                ID++;
                                WO.Word    = wi.Word;
                                WO.ID      = ID;
                                WO.Concept = concept;
                                WO.Word    = word;
                            }
                            catch
                            {
                            };
                            if (NoOfSensesSucceeded == 0)
                            {
                                CannotGetSenseExeption++;
                            }
                            i++;
                            // bf.Serialize(fs, "\n" + WO);
                            wordologyArr.Add(WO);
                        }
                        conceptcounter++;
                    }
                }
                else
                {
                    NoMapLexConcepts++;

                    //new part


                    Wnlib.Index        index;
                    Wnlib.PartOfSpeech p;
                    Search             se;

                    try
                    {
                        index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Noun));
                        if (index != null)
                        {
                            WO.Pos = "noun";
                            Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Noun);
                            foreach (Opt o in relatedness)
                            {
                                for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                                {
                                    se = new Search(concept, true, PartOfSpeech.of("noun"), o.sch, senseNumber);
                                    SynSet sense = new SynSet(index, senseNumber, se);
                                    WO.Concept = concept;
                                    WO.Word    = concept;
                                    WO.Sense   = sense.defn;
                                    WO.ID      = ID;
                                    ID++;
                                    NoOfSensesSucceeded++;
                                    AllSensesMapped++;
                                    //bf.Serialize(fs, "\n" + WO);
                                    wordologyArr.Add(WO);
                                }
                            }
                        }
                    }
                    catch
                    { }
                    try
                    {
                        index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Verb));
                        if (index != null)
                        {
                            WO.Pos = "verb";
                            Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Verb);
                            foreach (Opt o in relatedness)
                            {
                                for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                                {
                                    se = new Search(concept, true, PartOfSpeech.of("verb"), o.sch, senseNumber);
                                    SynSet sense = new SynSet(index, senseNumber, se);
                                    WO.Sense   = sense.defn;
                                    WO.Concept = concept;
                                    WO.Word    = concept;
                                    WO.ID      = ID;
                                    ID++;
                                    NoOfSensesSucceeded++;
                                    AllSensesMapped++;
                                    //bf.Serialize(fs, "\n" + WO);
                                    wordologyArr.Add(WO);
                                }
                            }
                        }
                    }
                    catch
                    { }
                    try
                    {
                        index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Adj));
                        if (index != null)
                        {
                            WO.Pos = "adj";
                            Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Adj);
                            foreach (Opt o in relatedness)
                            {
                                for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                                {
                                    se = new Search(concept, true, PartOfSpeech.of("adj"), o.sch, senseNumber);
                                    SynSet sense = new SynSet(index, senseNumber, se);
                                    WO.Sense   = sense.defn;
                                    WO.Concept = concept;
                                    WO.Word    = concept;
                                    WO.ID      = ID;
                                    ID++;
                                    NoOfSensesSucceeded++;
                                    AllSensesMapped++;
                                    //bf.Serialize(fs, "\n" + WO);
                                    wordologyArr.Add(WO);
                                }
                            }
                        }
                    }
                    catch
                    { }
                    try
                    {
                        index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Adv));
                        if (index != null)
                        {
                            WO.Pos = "adv";
                            Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Noun);
                            foreach (Opt o in relatedness)
                            {
                                for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                                {
                                    se = new Search(concept, true, PartOfSpeech.of("adv"), o.sch, senseNumber);
                                    SynSet sense = new SynSet(index, senseNumber, se);
                                    WO.Sense   = sense.defn;
                                    WO.Concept = concept;
                                    WO.Word    = concept;
                                    WO.ID      = ID;
                                    ID++;
                                    NoOfSensesSucceeded++;
                                    AllSensesMapped++;
                                    //bf.Serialize(fs, "\n" + WO);
                                    wordologyArr.Add(WO);
                                }
                            }
                        }
                    }
                    catch
                    { }


                    if (NoOfSensesSucceeded != 0)
                    {
                        conceptcounter++;
                    }
                }
            }//end while
            allConceptsFileReader.Close();
            allConceptsFile.Close();
            bf.Serialize(fs, wordologyArr);
            fs.Close();
            MessageBox.Show("no map-lex concepts number = " + NoMapLexConcepts.ToString());
            MessageBox.Show("can't getsense pos number = " + CannotGetSenseExeption.ToString());
            MessageBox.Show(conceptcounter.ToString());
        }
Example #3
0
        private void FillSenses(ParseTree parsetree, ParseNode node, ref ArrayList wordinfoArr, ref int j)
        {
            if (node.Children != null)
            {
                for (int i = 0; i < node.Children.Count; i++)
                {
                    ParseNode pn = (ParseNode)node.Children[i];
                    FillSenses(parsetree, pn, ref wordinfoArr, ref j);
                }
            }
            else
            {
                SentenceParser dummysp = new SentenceParser();
                string         str     = node.Goal;
                if (str == "PPN")
                {
                    WordSenseDisambiguator wsd = new WordSenseDisambiguator();
                    double score;
                    double num;
                    if (double.TryParse(node.Text, out num))
                    {
                        MyWordInfo[] ret = wsd.MMG_Disambiguate(new MyWordInfo[] { new MyWordInfo("Number", PartsOfSpeech.Noun) }, out score);
                        PartOfSpeech pos = PartOfSpeech.of(PartsOfSpeech.Noun);

                        Index  index = Wnlib.Index.lookup(ret[0].Word.ToLower(), pos);
                        SynSet sense = new SynSet(index, ret[0].Sense, null);

                        node.Sense   = sense.defn;
                        node.SenseNo = ret[0].Sense;
                    }
                    else
                    {
                        MyWordInfo[] ret = wsd.MMG_Disambiguate(new MyWordInfo[] { new MyWordInfo("proper_name", PartsOfSpeech.Noun) }, out score);
                        PartOfSpeech pos = PartOfSpeech.of(PartsOfSpeech.Noun);

                        Index  index = Wnlib.Index.lookup(ret[0].Word.ToLower(), pos);
                        SynSet sense = new SynSet(index, ret[0].Sense, null);

                        node.Sense   = sense.defn;
                        node.SenseNo = ret[0].Sense;
                    }
                }
                else
                if (str == "N" || str.Contains("NPP") || (str == "VING") || str.Contains("PPJ") || (str == "VPSP") || (str == "BE1") || (str == "BE2") || (str == "V") || str.Contains("CPADJ") || str.Contains("ADJ") || str.Contains("PADV") || str.Contains("ADV") || str == "VINF")
                {
                    ///ta3deelat 5/7/////////
                    string [] spltstr  = DisambRes[j].ToString().Split(':');
                    VerbSense VS       = new VerbSense();
                    string[]  spltstr2 = new string[10];
                    string[]  spltstr3 = new string[10];
                    if (node.Senses != null && node.Goal.Contains("V"))
                    {
                        VS       = (VerbSense)node.Senses[0];
                        spltstr2 = VS.Sense.ToString().Split('#');
                        spltstr3 = spltstr[0].Split(' ');

                        ArrayList results = dummysp.GetINFOfVerb(spltstr2[0]);
                        if (results.Count > 0)
                        {
                            spltstr2[0] = (string)results[0];
                        }


                        if (spltstr3[0] == spltstr2[0])
                        {
                            if (NodesSenses.Count > 0)
                            {
                                node.Sense   = (string)NodesSenses[0];
                                node.SenseNo = (int)SensesNos[0];
                                SensesNos.RemoveAt(0);
                                NodesSenses.RemoveAt(0);
                                j++;
                            }
                        }
                    }
                    if (str == "N")
                    {
                        node.Sense   = (string)NodesSenses[0];
                        node.SenseNo = (int)SensesNos[0];
                        SensesNos.RemoveAt(0);
                        NodesSenses.RemoveAt(0);
                        j++;
                    }
                    else
                    {
                        string NodeWord = SyntacticAnalyzer.SentenceParser.GetWordString(parsetree, node);


                        spltstr3 = spltstr[0].Split(' ');
                        string nodeWord = NodeWord.ToLower();
                        if (node.Goal == "V" || node.Goal == "BE1" || node.Goal == "VINF" || node.Goal == "VPSP" || node.Goal == "VING")
                        {
                            ArrayList results = dummysp.GetINFOfVerb(nodeWord);
                            if (results.Count > 0)
                            {
                                nodeWord = (string)results[0];
                            }
                        }

                        string dummy = spltstr3[0];
                        dummy = dummy.Remove(dummy.Length - 1);
                        dummy = dummy + "ies";

                        string dummy2 = nodeWord + "ing";
                        string dummy3 = dummy2 + "s";
                        string dummy4 = nodeWord.Remove(nodeWord.Length - 1);
                        dummy4 = dummy4 + "ing";

                        if (spltstr3[0] == nodeWord || spltstr3[0] + 's' == nodeWord || spltstr3[0] + "es" == nodeWord || dummy == nodeWord || spltstr3[0] == dummy2 || spltstr3[0] == dummy3 || spltstr3[0] == dummy4 || spltstr3[0] == dummy4 + 's')
                        {
                            if (NodesSenses.Count > 0)
                            {
                                node.Sense   = (string)NodesSenses[0];
                                node.SenseNo = (int)SensesNos[0];
                                SensesNos.RemoveAt(0);
                                NodesSenses.RemoveAt(0);
                                j++;
                            }
                        }
                    }
                }
            }
        }
Example #4
0
        private void MapConceptsWithMapLex(string concept, Property maplexProperty)
        {
            MyWordInfo mwi;
            WordOlogy  WO = new WordOlogy();

            List <MyWordInfo> maplexsenses = new List <MyWordInfo>();
            int NoOfSensesSucceeded        = 0;

            for (int i = 0; i < maplexProperty.Fillers.Count; i++)
            {
                string   tmp   = maplexProperty.Fillers[i].ScalarFiller;
                char[]   charr = new char[] { '-', '_' };
                string[] splt  = tmp.Split(charr);
                //there r fillers with no type & a-bomb masalan

                if (splt.Length > 1)
                {
                    mwi = new MyWordInfo();
                    for (int k = 0; k < splt.Length - 2; k++)
                    {
                        mwi.Word += splt[k] + " ";
                    }
                    mwi.Word += splt[splt.Length - 2];
                    if (splt[splt.Length - 1].Length == 2)
                    {
                        if (splt[splt.Length - 1][0] == 'v')
                        {
                            mwi.Pos = Wnlib.PartsOfSpeech.Verb;
                        }
                        else if (splt[splt.Length - 1][0] == 'n')
                        {
                            mwi.Pos = Wnlib.PartsOfSpeech.Noun;
                        }
                        else if (splt[splt.Length - 1][0] == 'a')
                        {
                            mwi.Pos = Wnlib.PartsOfSpeech.Adj;
                        }
                        else if (splt[splt.Length - 1][0] == 'r')
                        {
                            mwi.Pos = Wnlib.PartsOfSpeech.Adv;
                        }
                        else
                        {
                            mwi.Pos = Wnlib.PartsOfSpeech.Unknown;
                        }
                    }
                    else
                    {
                        mwi.Pos   = Wnlib.PartsOfSpeech.Unknown;
                        mwi.Word += " " + splt[splt.Length - 1];
                    }
                    if (i == 0 || (maplexsenses.Count > 0 && (mwi.Word != maplexsenses[maplexsenses.Count - 1].Word || mwi.Pos != maplexsenses[maplexsenses.Count - 1].Pos)))
                    {
                        maplexsenses.Add(mwi);
                    }
                }
                //ne loop 3al ontology kolaha
            }


            if (maplexsenses.Count > 0)
            {
                MyWordInfo[] maplexArray = new MyWordInfo[maplexsenses.Count];
                for (int j = 0; j < maplexsenses.Count; j++)
                {
                    maplexArray[j] = maplexsenses[j];
                }
                WordSenseDisambiguator wsd = new WordSenseDisambiguator();
                MyWordInfo[]           res = new MyWordInfo[maplexArray.Length];
                res = wsd.Disambiguate(maplexArray);
                int i = 0;

                foreach (MyWordInfo wi in res)
                {
                    string   tmp   = maplexProperty.Fillers[i].ScalarFiller;
                    char[]   charr = new char[] { '-', '_' };
                    string[] splt  = tmp.Split(charr);

                    if (splt.Length > 1 && splt[splt.Length - 1].Length == 2)
                    {
                        WO.SenseNo = splt[splt.Length - 1];
                    }
                    else
                    {
                        // "sense doesn't have POS";
                    }

                    Wnlib.PartOfSpeech p = Wnlib.PartOfSpeech.of((Wnlib.PartsOfSpeech)wi.Pos);

                    try
                    {
                        Wnlib.Index index = Wnlib.Index.lookup(wi.Word.ToLower(), p);
                        SynSet      sense = new SynSet(index, res[i].Sense, null);
                        WO.Sense = sense.defn;
                        // AllSensesMapped++;
                        NoOfSensesSucceeded++;
                        try
                        {
                            WO.Pos = p.name;
                        }
                        catch
                        {
                            WO.Pos = wi.Pos.ToString();
                        }
                        ID++;
                        WO.Word    = wi.Word;
                        WO.ID      = ID;
                        WO.Concept = concept;
                        wordologyArr.Add(WO);
                    }
                    catch
                    {
                    };
                    if (NoOfSensesSucceeded == 0)
                    {
                        CannotGetSenseExeption++;
                    }
                    i++;
                    //bf.Serialize(fs, "\n" + WO);
                }
                conceptcounter++;
            }
        }