Пример #1
0
        public static int GetSynsetIndex(string word, PartsOfSpeech pos)
        {
            word = word.ToLower();
            //word=RemoveBadChars (word);
            Index index = Wnlib.Index.lookup(word, PartOfSpeech.of(pos));

            if (index == null)
            {
                MorphStr morphs = new MorphStr(word, PartOfSpeech.of(pos));
                string   morph  = "";
                while ((morph = morphs.next()) != null)
                {
                    index = Index.lookup(morph, PartOfSpeech.of(pos));
                    if (index != null)
                    {
                        break;
                    }
                }
            }

            if (index == null)
            {
                return(-1);
            }
            else
            {
                return(0);
            }
        }
Пример #2
0
        public void OverviewFor(string t, string p, ref bool b, ref SearchSet obj, ArrayList list)
        {
            PartOfSpeech pos         = PartOfSpeech.of(p);
            SearchSet    ss          = WNDB.is_defined(t, pos);
            MorphStr     ms          = new MorphStr(t, pos);
            bool         checkmorphs = false;

            checkmorphs = AddSearchFor(t, pos, list);             // do a search

            if (checkmorphs)
            {
                HasMatch = true;
            }

            if (!HasMatch)
            {
                // loop through morphs (if there are any)
                string m;
                while ((m = ms.next()) != null)
                {
                    if (m != t)
                    {
                        ss = ss + WNDB.is_defined(m, pos);
                        AddSearchFor(m, pos, list);
                    }
                }
            }
            b   = ss.NonEmpty;
            obj = ss;
        }
Пример #3
0
        public MyWordInfo[] FindSynonyms(ref MyWordInfo pos, bool includeMorphs)
        {
            pos.Word = pos.Word.ToLower();
            Index index = Index.lookup(pos.Word, PartOfSpeech.of(pos.Pos));

            if (index == null)
            {
                if (!includeMorphs)
                {
                    return(null);
                }

                var    morphs = new MorphStr(pos.Word, PartOfSpeech.of(pos.Pos));
                string morph  = "";
                while ((morph = morphs.next()) != null)
                {
                    index    = Index.lookup(morph, PartOfSpeech.of(pos.Pos));
                    pos.Word = morph;
                    if (index != null)
                    {
                        break;
                    }
                }
            }


            return(index == null ? null : LookupCandidates(index, pos));
        }
Пример #4
0
        public static int GetType(string strSense)
        {
            bool         blnIsHuman = false;
            Search       se;
            SearchType   sty;
            PartOfSpeech pos = null;

            if (strSense == "")
            {
                return(0);
            }
            string[] strarr = strSense.Split('#');

            if (strarr.Length > 1)
            {
                switch (strarr[1])
                {
                case "n":
                    pos = PartOfSpeech.of(PartsOfSpeech.Noun);
                    break;

                default:
                    // there is an error
                    break;
                }
            }


            sty = new SearchType(true, "HYPERPTR");

            //se = new Search(strarr[0], true, pos, sty, int.Parse( strarr[2]));

            se = new Search(strarr[0], true, pos, sty, int.Parse(strarr[2]));

            SynSetList slist = se.senses;

            if (slist.Count > 0)
            {
                blnIsHuman = IsHumanHelper(slist);
                if (blnIsHuman)
                {
                    return(1);
                }
                else
                {
                    return(2);
                }
            }
            else
            {
                return(0);
            }
        }
Пример #5
0
        public static IEnumerable<string> GetMorphs(string str, PartOfSpeech pos, WordNetContext context)
        {
            if (pos == null)
            {
                var poses = (new string[] { "n", "v", "a" }).Select(s => PartOfSpeech.of(s));
                foreach(var p in poses)
                {
                    foreach (var s in GetMorphs(str, p, context))
                    {
                        yield return s;
                    }
                }
                yield break;
            }

            if (pos.clss == "SATELLITE")
                pos = PartOfSpeech.of("adj");

            var parts = str.Split(' ');
            int cnt = parts.Length;
            string tmp = null;

            /* first try exception list */
            var e = GetExceptions(str, pos, context);
            if (e.Any())
            {
                foreach(var s in e)
                    yield return s;
            }
            else if (pos.name == "verb" && cnt > 1 && HasPreposition(parts))
            {
                yield return MorphPreposition(str, parts, pos, context);
                yield break;
            }
            else
            /* then try simply morph on original string */
            if (pos.name != "verb" && MorphWord(str, pos, morph => tmp = morph, context))
            {
                yield return tmp;
            }
            else
            {
                bool isChanged = false;
                for (int i = 0; i < parts.Length; i++)
                {
                    string word = parts[i];
                    if (word.Contains('-'))
                    {
                        bool isSubChanged = false;
                        var subs = word.Split('-');
                        for (int j = 0; j < subs.Length; j++)
                        {
                            isSubChanged |= MorphWord(subs[j], pos, morph => subs[j] = morph, context);
                        }

                        if (isSubChanged)
                        {
                            parts[i] = string.Join("-", subs);
                            isChanged = true;
                        }
                    }
                    else
                    {
                        isChanged |= MorphWord(word, pos, morph => parts[i] = morph, context);
                    }
                }

                if (isChanged)
                {
                    var s = string.Join(" ", parts);
                    if (IsDefinded(s, pos, context))
                        yield return s;
                }
            }
        }
Пример #6
0
        private static string MorphPreposition(string str, string[] parts, PartOfSpeech pos, WordNetContext context)
        {
            string retval;

            /* Assume that the verb is the first word in the phrase.  Strip it
			   off, check for validity, then try various morphs with the
			   rest of the phrase tacked on, trying to find a match. */

            string rest = str.Substring(parts.First().Length);
            string end = null;
            if (parts.Length > 2)
            {   // more than 2 words
                MorphWord(parts.Last(), pos, morph => end = rest.Substring(0, parts.Last().Length) + morph, context);
            }

            string word = parts.First();
            if (!word.All(c => char.IsLetterOrDigit(c)))
                return null;

            /* First try to find the verb in the exception list */
            var e = GetExceptions(word, PartOfSpeech.of("verb"), context);
            foreach (string excWord in e)
            {
                retval = excWord + rest;
                if (IsDefinded(retval, PartOfSpeech.of("verb"), context))
                    return retval;

                if (end != null)
                {
                    retval = excWord + end;
                    if (IsDefinded(retval, PartOfSpeech.of("verb"), context))
                        return retval;
                }
            }

            var psufx = sufx[PartOfSpeech.of("verb").ident];
            for (int i = 0; i < psufx.Length; i++)
            {
                string suffix = psufx[i];
                if (word.EndsWith(suffix)) // ending is different
                {
                    string excWord = word.Substring(0, word.Length - suffix.Length) + addr[PartOfSpeech.of("verb").ident][i];

                    retval = excWord + rest;
                    if (IsDefinded(retval, PartOfSpeech.of("verb"), context))
                        return retval;

                    if (end != null)
                    {
                        retval = excWord + end;
                        if (IsDefinded(retval, PartOfSpeech.of("verb"), context))
                            return retval;
                    }
                }
            }

            if (end != null)
            {
                return word + end;
            }

            return null;
        }
Пример #7
0
        private void FillSenses(ParseTree parsetree, ParseNode node, ref ArrayList wordinfoArr, ref int j)
        {
            if (node.Children != null)
            {
                for (int i = 0; i < node.Children.Count; i++)
                {
                    ParseNode pn = (ParseNode)node.Children[i];
                    FillSenses(parsetree, pn, ref wordinfoArr, ref j);
                }
            }
            else
            {
                SentenceParser dummysp = new SentenceParser();
                string         str     = node.Goal;
                if (str == "PPN")
                {
                    WordSenseDisambiguator wsd = new WordSenseDisambiguator();
                    double score;
                    double num;
                    if (double.TryParse(node.Text, out num))
                    {
                        MyWordInfo[] ret = wsd.MMG_Disambiguate(new MyWordInfo[] { new MyWordInfo("Number", PartsOfSpeech.Noun) }, out score);
                        PartOfSpeech pos = PartOfSpeech.of(PartsOfSpeech.Noun);

                        Index  index = Wnlib.Index.lookup(ret[0].Word.ToLower(), pos);
                        SynSet sense = new SynSet(index, ret[0].Sense, null);

                        node.Sense   = sense.defn;
                        node.SenseNo = ret[0].Sense;
                    }
                    else
                    {
                        MyWordInfo[] ret = wsd.MMG_Disambiguate(new MyWordInfo[] { new MyWordInfo("proper_name", PartsOfSpeech.Noun) }, out score);
                        PartOfSpeech pos = PartOfSpeech.of(PartsOfSpeech.Noun);

                        Index  index = Wnlib.Index.lookup(ret[0].Word.ToLower(), pos);
                        SynSet sense = new SynSet(index, ret[0].Sense, null);

                        node.Sense   = sense.defn;
                        node.SenseNo = ret[0].Sense;
                    }
                }
                else
                if (str == "N" || str.Contains("NPP") || (str == "VING") || str.Contains("PPJ") || (str == "VPSP") || (str == "BE1") || (str == "BE2") || (str == "V") || str.Contains("CPADJ") || str.Contains("ADJ") || str.Contains("PADV") || str.Contains("ADV") || str == "VINF")
                {
                    ///ta3deelat 5/7/////////
                    string [] spltstr  = DisambRes[j].ToString().Split(':');
                    VerbSense VS       = new VerbSense();
                    string[]  spltstr2 = new string[10];
                    string[]  spltstr3 = new string[10];
                    if (node.Senses != null && node.Goal.Contains("V"))
                    {
                        VS       = (VerbSense)node.Senses[0];
                        spltstr2 = VS.Sense.ToString().Split('#');
                        spltstr3 = spltstr[0].Split(' ');

                        ArrayList results = dummysp.GetINFOfVerb(spltstr2[0]);
                        if (results.Count > 0)
                        {
                            spltstr2[0] = (string)results[0];
                        }


                        if (spltstr3[0] == spltstr2[0])
                        {
                            if (NodesSenses.Count > 0)
                            {
                                node.Sense   = (string)NodesSenses[0];
                                node.SenseNo = (int)SensesNos[0];
                                SensesNos.RemoveAt(0);
                                NodesSenses.RemoveAt(0);
                                j++;
                            }
                        }
                    }
                    if (str == "N")
                    {
                        node.Sense   = (string)NodesSenses[0];
                        node.SenseNo = (int)SensesNos[0];
                        SensesNos.RemoveAt(0);
                        NodesSenses.RemoveAt(0);
                        j++;
                    }
                    else
                    {
                        string NodeWord = SyntacticAnalyzer.SentenceParser.GetWordString(parsetree, node);


                        spltstr3 = spltstr[0].Split(' ');
                        string nodeWord = NodeWord.ToLower();
                        if (node.Goal == "V" || node.Goal == "BE1" || node.Goal == "VINF" || node.Goal == "VPSP" || node.Goal == "VING")
                        {
                            ArrayList results = dummysp.GetINFOfVerb(nodeWord);
                            if (results.Count > 0)
                            {
                                nodeWord = (string)results[0];
                            }
                        }

                        string dummy = spltstr3[0];
                        dummy = dummy.Remove(dummy.Length - 1);
                        dummy = dummy + "ies";

                        string dummy2 = nodeWord + "ing";
                        string dummy3 = dummy2 + "s";
                        string dummy4 = nodeWord.Remove(nodeWord.Length - 1);
                        dummy4 = dummy4 + "ing";

                        if (spltstr3[0] == nodeWord || spltstr3[0] + 's' == nodeWord || spltstr3[0] + "es" == nodeWord || dummy == nodeWord || spltstr3[0] == dummy2 || spltstr3[0] == dummy3 || spltstr3[0] == dummy4 || spltstr3[0] == dummy4 + 's')
                        {
                            if (NodesSenses.Count > 0)
                            {
                                node.Sense   = (string)NodesSenses[0];
                                node.SenseNo = (int)SensesNos[0];
                                SensesNos.RemoveAt(0);
                                NodesSenses.RemoveAt(0);
                                j++;
                            }
                        }
                    }
                }
            }
        }
Пример #8
0
        public void ConstructMapping()
        {
            string    concept        = "";
            string    word           = "";
            int       ID             = -1;
            string    senseNo        = "";
            string    Sense          = "";
            string    Pos            = "";
            WordOlogy WO             = new WordOlogy();
            ArrayList wordologyArr   = new ArrayList();
            int       conceptcounter = 0;

            LoadOntology();
            FileStream   allConceptsFile       = new FileStream(_ontologyDirectoryPath + @"\AllConcepts.txt", FileMode.Open);
            StreamReader allConceptsFileReader = new StreamReader(allConceptsFile);

            string _wordologyDirectoryPath =
                @"..\..\..\wordology\";

            BinaryFormatter bf = new BinaryFormatter();
            FileStream      fs = new FileStream(
                _wordologyDirectoryPath + "\\wordology.txt", FileMode.Create);
            int indxWatcherconceptCounter = 0;
            int NoMapLexConcepts          = 0;
            int CannotGetSenseExeption    = 0;
            int AllSensesMapped           = 0;

            while ((concept = allConceptsFileReader.ReadLine()) != null)
            {
                indxWatcherconceptCounter++;
                string   Conceptpath    = _ontologyDirectoryPath + @"\" + concept[0] + @"\" + concept;
                Concept  C              = (Concept)Onto[concept];
                Property maplexProperty = C.FullProperties["ENGLISH1"];

                List <MyWordInfo> maplexsenses = new List <MyWordInfo>();
                MyWordInfo        mwi          = new MyWordInfo();
                int NoOfSensesSucceeded        = 0;
                if (maplexProperty != null)
                {
                    for (int i = 0; i < maplexProperty.Fillers.Count; i++)
                    {
                        string   tmp   = maplexProperty.Fillers[i].ScalarFiller;
                        char[]   charr = new char[] { '-', '_' };
                        string[] splt  = tmp.Split(charr);
                        //there r fillers with no type & a-bomb masalan

                        if (splt.Length > 1)
                        {
                            mwi = new MyWordInfo();
                            for (int k = 0; k < splt.Length - 2; k++)
                            {
                                mwi.Word += splt[k] + " ";
                            }
                            mwi.Word += splt[splt.Length - 2];
                            if (splt[splt.Length - 1].Length == 2)
                            {
                                if (splt[splt.Length - 1][0] == 'v')
                                {
                                    mwi.Pos = Wnlib.PartsOfSpeech.Verb;
                                }
                                else if (splt[splt.Length - 1][0] == 'n')
                                {
                                    mwi.Pos = Wnlib.PartsOfSpeech.Noun;
                                }
                                else if (splt[splt.Length - 1][0] == 'a')
                                {
                                    mwi.Pos = Wnlib.PartsOfSpeech.Adj;
                                }
                                else if (splt[splt.Length - 1][0] == 'r')
                                {
                                    mwi.Pos = Wnlib.PartsOfSpeech.Adv;
                                }
                                else
                                {
                                    mwi.Pos = Wnlib.PartsOfSpeech.Unknown;
                                }
                            }
                            else
                            {
                                mwi.Pos   = Wnlib.PartsOfSpeech.Unknown;
                                mwi.Word += " " + splt[splt.Length - 1];
                            }
                            if (i == 0 || (maplexsenses.Count > 0 && (mwi.Word != maplexsenses[maplexsenses.Count - 1].Word || mwi.Pos != maplexsenses[maplexsenses.Count - 1].Pos)))
                            {
                                maplexsenses.Add(mwi);
                            }
                        }
                        //ne loop 3al ontology kolaha
                    }


                    if (maplexsenses.Count > 0)
                    {
                        MyWordInfo[] maplexArray = new MyWordInfo[maplexsenses.Count];
                        for (int j = 0; j < maplexsenses.Count; j++)
                        {
                            maplexArray[j] = maplexsenses[j];
                        }
                        WordSenseDisambiguator wsd = new WordSenseDisambiguator();
                        MyWordInfo[]           res = new MyWordInfo[maplexArray.Length];
                        res = wsd.Disambiguate(maplexArray);
                        int i = 0;

                        foreach (MyWordInfo wi in res)
                        {
                            string   tmp   = maplexProperty.Fillers[i].ScalarFiller;
                            char[]   charr = new char[] { '-', '_' };
                            string[] splt  = tmp.Split(charr);

                            if (splt.Length > 1 && splt[splt.Length - 1].Length == 2)
                            {
                                WO.SenseNo = splt[splt.Length - 1];
                            }
                            else
                            {
                                // "sense doesn't have POS";
                            }

                            Wnlib.PartOfSpeech p = Wnlib.PartOfSpeech.of((Wnlib.PartsOfSpeech)wi.Pos);

                            try
                            {
                                Wnlib.Index index = Wnlib.Index.lookup(wi.Word.ToLower(), p);
                                SynSet      sense = new SynSet(index, res[i].Sense, null);
                                WO.Sense = sense.defn;
                                AllSensesMapped++;
                                NoOfSensesSucceeded++;
                                try
                                {
                                    WO.Pos = p.name;
                                }
                                catch
                                {
                                    WO.Pos = wi.Pos.ToString();
                                }
                                ID++;
                                WO.Word    = wi.Word;
                                WO.ID      = ID;
                                WO.Concept = concept;
                                WO.Word    = word;
                            }
                            catch
                            {
                            };
                            if (NoOfSensesSucceeded == 0)
                            {
                                CannotGetSenseExeption++;
                            }
                            i++;
                            // bf.Serialize(fs, "\n" + WO);
                            wordologyArr.Add(WO);
                        }
                        conceptcounter++;
                    }
                }
                else
                {
                    NoMapLexConcepts++;

                    //new part


                    Wnlib.Index        index;
                    Wnlib.PartOfSpeech p;
                    Search             se;

                    try
                    {
                        index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Noun));
                        if (index != null)
                        {
                            WO.Pos = "noun";
                            Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Noun);
                            foreach (Opt o in relatedness)
                            {
                                for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                                {
                                    se = new Search(concept, true, PartOfSpeech.of("noun"), o.sch, senseNumber);
                                    SynSet sense = new SynSet(index, senseNumber, se);
                                    WO.Concept = concept;
                                    WO.Word    = concept;
                                    WO.Sense   = sense.defn;
                                    WO.ID      = ID;
                                    ID++;
                                    NoOfSensesSucceeded++;
                                    AllSensesMapped++;
                                    //bf.Serialize(fs, "\n" + WO);
                                    wordologyArr.Add(WO);
                                }
                            }
                        }
                    }
                    catch
                    { }
                    try
                    {
                        index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Verb));
                        if (index != null)
                        {
                            WO.Pos = "verb";
                            Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Verb);
                            foreach (Opt o in relatedness)
                            {
                                for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                                {
                                    se = new Search(concept, true, PartOfSpeech.of("verb"), o.sch, senseNumber);
                                    SynSet sense = new SynSet(index, senseNumber, se);
                                    WO.Sense   = sense.defn;
                                    WO.Concept = concept;
                                    WO.Word    = concept;
                                    WO.ID      = ID;
                                    ID++;
                                    NoOfSensesSucceeded++;
                                    AllSensesMapped++;
                                    //bf.Serialize(fs, "\n" + WO);
                                    wordologyArr.Add(WO);
                                }
                            }
                        }
                    }
                    catch
                    { }
                    try
                    {
                        index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Adj));
                        if (index != null)
                        {
                            WO.Pos = "adj";
                            Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Adj);
                            foreach (Opt o in relatedness)
                            {
                                for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                                {
                                    se = new Search(concept, true, PartOfSpeech.of("adj"), o.sch, senseNumber);
                                    SynSet sense = new SynSet(index, senseNumber, se);
                                    WO.Sense   = sense.defn;
                                    WO.Concept = concept;
                                    WO.Word    = concept;
                                    WO.ID      = ID;
                                    ID++;
                                    NoOfSensesSucceeded++;
                                    AllSensesMapped++;
                                    //bf.Serialize(fs, "\n" + WO);
                                    wordologyArr.Add(WO);
                                }
                            }
                        }
                    }
                    catch
                    { }
                    try
                    {
                        index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Adv));
                        if (index != null)
                        {
                            WO.Pos = "adv";
                            Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Noun);
                            foreach (Opt o in relatedness)
                            {
                                for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                                {
                                    se = new Search(concept, true, PartOfSpeech.of("adv"), o.sch, senseNumber);
                                    SynSet sense = new SynSet(index, senseNumber, se);
                                    WO.Sense   = sense.defn;
                                    WO.Concept = concept;
                                    WO.Word    = concept;
                                    WO.ID      = ID;
                                    ID++;
                                    NoOfSensesSucceeded++;
                                    AllSensesMapped++;
                                    //bf.Serialize(fs, "\n" + WO);
                                    wordologyArr.Add(WO);
                                }
                            }
                        }
                    }
                    catch
                    { }


                    if (NoOfSensesSucceeded != 0)
                    {
                        conceptcounter++;
                    }
                }
            }//end while
            allConceptsFileReader.Close();
            allConceptsFile.Close();
            bf.Serialize(fs, wordologyArr);
            fs.Close();
            MessageBox.Show("no map-lex concepts number = " + NoMapLexConcepts.ToString());
            MessageBox.Show("can't getsense pos number = " + CannotGetSenseExeption.ToString());
            MessageBox.Show(conceptcounter.ToString());
        }
Пример #9
0
        /// <summary>
        /// Convert method creates db file from wndb files
        /// dictpath - path to wndb data files
        /// context - dest db context
        /// </summary>
        public static void Convert(string dictPack, string jsonFile)
        {
            WNDB wndb  = new WNDB(dictPack);
            var  poses = (new[] { "n", "v", "a", "r" }).Select(s => PartOfSpeech.of(s));

            // Convert to  Dictionary
            // lemma -> { SynSetGroup: PosSymbol, Synsets = { synset: synonims, definitions, examples } }

            var dict = new Dictionary <string, List <ExpSynSetGroup> >();

            foreach (var pos in poses)
            {
                Console.WriteLine("Process Data of {0}", pos.name);

                foreach (var data in wndb.GetData(pos))
                {
                    //data.adj includes 'a' & 's' pos symbols
                    char posSymbol = pos.symbol.First();

                    bool singleWord = false;
                    if (data.origWords.Count() == 1)
                    {
                        var w = data.origWords.First().word;
                        singleWord = w == w.ToLower();
                    }

                    var synSet = new SynSet
                    {
                        // Skip synonims if where is a single lowercase word
                        Synonims    = (singleWord) ? null : data.origWords.Select(ow => ow.word).ToArray(),
                        Definition  = (data.definitions.Count() == 1) ? data.definitions.First() : null,
                        Definitions = (data.definitions.Count() > 1) ? data.definitions : null,
                        Example     = (data.examples?.Count() == 1) ? data.examples.First() : null,
                        Examples    = (data.examples?.Count() > 1) ? data.examples : null
                    };

                    foreach (var lemma in data.origWords.Select(ow => ow.word.ToLower()))
                    {
                        var synGrps = dict.GetValue(lemma);
                        if (synGrps != null)
                        {
                            var grp = synGrps.FirstOrDefault(g => g.PosSymbol == posSymbol);

                            if (grp == null)
                            {
                                synGrps.Add(new ExpSynSetGroup(posSymbol, synSet));
                            }
                            else
                            {
                                grp.Synsets.Add(synSet);
                            }
                        }
                        else
                        {
                            dict.Add(lemma, new List <ExpSynSetGroup> {
                                new ExpSynSetGroup(posSymbol, synSet)
                            });
                        }
                    }
                }
            }

            // exceptions
            //TODO: remove morphes, ...

            var excepts = new Dictionary <string, List <DictException> >();

            foreach (var pos in poses)
            {
                Console.WriteLine("Process Exceptions of {0}", pos.name);

                foreach (var exwords in wndb.GetExceptions(pos))
                {
                    var morph = Morph.GetBasicForm(exwords[0], pos);
                    for (int i = 1; i < exwords.Length; i++)
                    {
                        var baseForm = exwords[i];
                        if (baseForm == exwords[0] || baseForm == morph)
                        {
                            //Console.WriteLine($"Skip: {(exwords[0])} -> {baseForm}/{morph}");
                            continue;
                        }

                        List <ExpSynSetGroup> synGrps = dict.GetValue(baseForm);
                        if (synGrps == null && baseForm.Contains('-'))
                        {
                            baseForm = baseForm.Replace('-', ' ');
                            dict.TryGetValue(baseForm, out synGrps);
                        }

                        if (synGrps != null)
                        {
                            var posSymbols = string.Join("", synGrps.Select(sg => sg.PosSymbol));
                            var except     = new DictException {
                                BasicForm = baseForm, PosSymbols = posSymbols
                            };

                            List <DictException> baseForms;
                            if (excepts.TryGetValue(exwords[0], out baseForms))
                            {
                                if (!baseForms.Any(e => e.BasicForm == baseForm))
                                {
                                    baseForms.Add(except);
                                }
                            }
                            else
                            {
                                excepts.Add(exwords[0], new List <DictException> {
                                    except
                                });
                            }
                        }
                    }
                }
            }

            Console.WriteLine("Save changes");

            var storage = new ExpDictStorage
            {
                SynSets    = dict,
                Exceptions = excepts
            };

            var serializer = new JsonSerializer();

            serializer.NullValueHandling = NullValueHandling.Ignore;

            using (var stream = File.Open(jsonFile, FileMode.Create))
                using (var writer = new BsonWriter(stream))
                {
                    serializer.Serialize(writer, storage);
                }
        }
Пример #10
0
        private void MapConceptsWithOutMapLex(string concept)
        {
            Wnlib.Index        index;
            Wnlib.PartOfSpeech p;
            Search             se;
            WordOlogy          WO   = new WordOlogy();
            int NoOfSensesSucceeded = 0;

            try
            {
                index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Noun));
                if (index != null)
                {
                    WO.Pos = "noun";
                    Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Noun);
                    foreach (Opt o in relatedness)
                    {
                        for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                        {
                            se = new Search(concept, true, PartOfSpeech.of("noun"), o.sch, senseNumber);
                            SynSet sense = new SynSet(index, senseNumber, se);
                            WO.Concept = concept;
                            WO.Word    = concept;
                            WO.Sense   = sense.defn;
                            WO.ID      = ID;
                            ID++;
                            NoOfSensesSucceeded++;
                            //AllSensesMapped++;
                            //bf.Serialize(fs, "\n" + WO);
                            wordologyArr.Add(WO);
                        }
                    }
                }
            }
            catch
            { }
            try
            {
                index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Verb));
                if (index != null)
                {
                    WO.Pos = "verb";
                    Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Verb);
                    foreach (Opt o in relatedness)
                    {
                        for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                        {
                            se = new Search(concept, true, PartOfSpeech.of("verb"), o.sch, senseNumber);
                            SynSet sense = new SynSet(index, senseNumber, se);
                            WO.Sense   = sense.defn;
                            WO.Concept = concept;
                            WO.Word    = concept;
                            WO.ID      = ID;
                            ID++;
                            NoOfSensesSucceeded++;
//                            AllSensesMapped++;
                            //bf.Serialize(fs, "\n" + WO);
                            wordologyArr.Add(WO);
                        }
                    }
                }
            }
            catch
            { }
            try
            {
                index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Adj));
                if (index != null)
                {
                    WO.Pos = "adj";
                    Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Adj);
                    foreach (Opt o in relatedness)
                    {
                        for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                        {
                            se = new Search(concept, true, PartOfSpeech.of("adj"), o.sch, senseNumber);
                            SynSet sense = new SynSet(index, senseNumber, se);
                            WO.Sense   = sense.defn;
                            WO.Concept = concept;
                            WO.Word    = concept;
                            WO.ID      = ID;
                            ID++;
                            NoOfSensesSucceeded++;
                            //AllSensesMapped++;
                            //bf.Serialize(fs, "\n" + WO);
                            wordologyArr.Add(WO);
                        }
                    }
                }
            }
            catch
            { }
            try
            {
                index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Adv));
                if (index != null)
                {
                    WO.Pos = "adv";
                    Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Noun);
                    foreach (Opt o in relatedness)
                    {
                        for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++)
                        {
                            se = new Search(concept, true, PartOfSpeech.of("adv"), o.sch, senseNumber);
                            SynSet sense = new SynSet(index, senseNumber, se);
                            WO.Sense   = sense.defn;
                            WO.Concept = concept;
                            WO.Word    = concept;
                            WO.ID      = ID;
                            ID++;
                            NoOfSensesSucceeded++;
                            //AllSensesMapped++;
                            //bf.Serialize(fs, "\n" + WO);
                            wordologyArr.Add(WO);
                        }
                    }
                }
            }
            catch
            { }


            if (NoOfSensesSucceeded != 0)
            {
                conceptcounter++;
            }
        }
Пример #11
0
        /// <summary>
        /// Convert method creates db file from wndb files
        /// dictpath - path to wndb data files
        /// context - dest db context
        /// </summary>
        public static void Convert(string dictPack, WordNetContext context)
        {
            WNDB wndb     = new WNDB(dictPack);
            var  synWords = new List <string>();
            // int ind;

            var wordToLemma = new Dictionary <string, Lemma>();
            var words       = new Dictionary <string, Writing>();

            var poses = (new [] { "n", "v", "a", "r" }).Select(s => PartOfSpeech.of(s));

            foreach (var pos in poses)
            {
                Console.WriteLine("Process Data of {0}", pos.name);
                // ind = 0;

                foreach (var data in wndb.GetData(pos))
                {
                    if (data.pos != pos.symbol && !(data.pos == "s" && pos.symbol == "a")) //data.adj includes 'a' & 's' pos symbols
                    {
                        throw new Exception("pos!=data.pos");
                    }

                    var synset = new SynSet {
                        Pos = data.pos
                    };
                    context.SynSets.Add(synset);
                    synWords.Clear();

                    foreach (var oword in data.origWords)
                    {
                        Lemma  lemma;
                        string lcWord = oword.word.ToLower();

                        // add lemma
                        if (!wordToLemma.TryGetValue(lcWord, out lemma))
                        {
                            lemma = new Lemma {
                                Value = lcWord, Poses = data.pos
                            };
                            wordToLemma.Add(lcWord, lemma);
                            context.Lemmas.Add(lemma);
                        }
                        else if (!lemma.Poses.Contains(data.pos))
                        {
                            lemma.Poses += data.pos;
                        }

                        if (synWords.IndexOf(lcWord) < 0)
                        {
                            synWords.Add(lcWord);

                            // add SynSet <-> Lemma relation
                            context.SynsetLemmas.Add(new SynsetLemma
                            {
                                SynSet = synset,
                                Lemma  = lemma
                            });
                        }

                        // add original word if it differs from lemma
                        Writing word;
                        if (lcWord != oword.word)
                        {
                            if (!words.TryGetValue(oword.word, out word))
                            {
                                word = new Writing {
                                    Value = oword.word, Lemma = lemma
                                };
                                words.Add(oword.word, word);
                                context.Writings.Add(word);
                            }
                            else if (word.Lemma != lemma)
                            {
                                Console.WriteLine("Word mix: {0} {1} {2}", oword.word, lemma.Value, word.Lemma.Value);
                                continue;
                            }
                        }
                    }

                    synset.Definition = string.Join(";", data.definitions);
                    synset.Example    = string.Join(";", data.examples);

                    // ind++;
                    // if (ind % 1000 == 0)
                    //     ShowProgress(ind.ToString());
                }
                Console.WriteLine("Save changes");
                context.SaveChanges();

                // exceptions
                //TODO: remove morphes, ...

                Console.WriteLine("Process Exceptions of {0}", pos.name);
                // ind = 0;

                foreach (var exwords in GetExceptions(wndb, pos))
                {
                    for (int i = 1; i < exwords.Length; i++)
                    {
                        if (exwords[i] == exwords[0])
                        {
                            continue;
                        }

                        Lemma lemma;
                        if (wordToLemma.TryGetValue(exwords[i], out lemma) ||
                            (exwords[i].Contains('-') && wordToLemma.TryGetValue(exwords[i].Replace('-', ' '), out lemma)))
                        {
                            context.Excepts.Add(new Except {
                                Value = exwords[0], MainForm = exwords[i], Lemma = lemma
                            });
                        }
                        // else
                        // {
                        //     Console.WriteLine("Lemma not found {0}", exwords[i]);
                        //     context.Excepts.Add(new Except { Value = exwords[0], MainForm = exwords[i] });
                        // }
                    }

                    // ind++;
                    // if (ind % 1000 == 0)
                    //     ShowProgress(ind.ToString());
                }
                Console.WriteLine("Save changes");
                context.SaveChanges();
            }

            //Console.WriteLine("Save changes");
            context.SaveChanges();
        }
Пример #12
0
        public IEnumerable <string> GetMorphs(string str, PartOfSpeech pos)
        {
            if (pos.clss == "SATELLITE")
            {
                pos = PartOfSpeech.of("adj");
            }

            var    parts = str.Split(' ');
            int    cnt   = parts.Length;
            string tmp   = null;

            /* first try exception list */
            var e = _storage.GetBasicForms(str, pos);

            if (e.Any())
            {
                foreach (var s in e)
                {
                    yield return(s);
                }
            }
            else if (pos.name == "verb" && cnt > 1 && HasPreposition(parts))
            {
                yield return(MorphPreposition(str, parts, pos));

                yield break;
            }
            else
            /* then try simply morph on original string */
            if (pos.name != "verb" && MorphWord(str, pos, morph => tmp = morph))
            {
                yield return(tmp);
            }
            else
            {
                bool isChanged = false;
                for (int i = 0; i < parts.Length; i++)
                {
                    string word = parts[i];
                    if (word.Contains('-'))
                    {
                        bool isSubChanged = false;
                        var  subs         = word.Split('-');
                        for (int j = 0; j < subs.Length; j++)
                        {
                            isSubChanged |= MorphWord(subs[j], pos, morph => subs[j] = morph);
                        }

                        if (isSubChanged)
                        {
                            parts[i]  = string.Join("-", subs);
                            isChanged = true;
                        }
                    }
                    else
                    {
                        isChanged |= MorphWord(word, pos, morph => parts[i] = morph);
                    }
                }

                if (isChanged)
                {
                    var s = string.Join(" ", parts);
                    if (_storage.IsDefinded(s, pos))
                    {
                        yield return(s);
                    }
                }
            }
        }