Beispiel #1
0
        public static termExploreModelSet exploreWithApertiumAndWordnet(string word, ILogBuilder response)
        {
            //List<TermLemma> lemmas = semanticLexiconManager.manager.resolve(word);

            termExploreModelSet outset = semanticLexiconManager.manager.constructor.loadTermModels(word);

            response.AppendLine("term[" + word + "]->models[" + Enumerable.Count(outset) + "]");

            if (Enumerable.Count(outset) == 0)
            {
                outset.missingLemmas.Add(word);
                response.AppendLine("term[" + word + "]->missingLemma[]");
                return(outset);
            }

            int c = 0;

            foreach (termExploreModel model in outset)
            {
                var result = languageManagerApertium.manager.queryForSynonyms(model.lemma.inputForm, apertiumDictQueryScope.exact);
                var srb    = result.GetNativeWords();
                var eng    = result.GetTranslatedWords();
                model.translations.AddRange(eng);
                model.synonyms.AddRange(srb);
                response.AppendLine("term[" + word + "]->model[" + c.ToString() + "]->lemma[" + model.lemma.inputForm + "] --> Apertium.dic ==> srb[" + srb.Count() + "] eng[" + eng.Count() + "]");
                // <-- wordnet

                wordnetSymsetResults wordnet_res = languageManagerWordnet.manager.query_eng(model.translations, response);

                model.wordnetSecondarySymsets.AddRange(wordnet_res.GetKeys());

                wordnetSymsetResults wordnet_2nd = languageManagerWordnet.manager.query_eng_symset(model.wordnetSecondarySymsets, response);

                model.wordnetSynonyms.AddRange(wordnet_2nd.GetEnglish());



                var synTrans = languageManagerApertium.manager.query(model.wordnetSynonyms, apertiumDictQueryScope.exact, apertiumDictNeedleSide.translated);

                model.wordnetSynonymSerbian.AddRange(synTrans.GetNativeWords());

                response.AppendLine("WordNet(" + eng.Count() + ") ==> synsets[" + Enumerable.Count(model.wordnetSecondarySymsets) + "]  synEng[" + Enumerable.Count(model.wordnetSynonyms) + "] ==> synSrb[" + Enumerable.Count(model.wordnetSynonymSerbian) + "]");

                semanticLexiconManager.manager.constructor.saveTermModel(model);

                c++;
            }


            //termExploreModel output = semanticLexiconManager.manager.constructor.mode
            return(outset);
        }
        /// <summary>
        /// Explores definition on an unknown term
        /// </summary>
        /// <param name="term">The term.</param>
        /// <param name="loger">The loger.</param>
        /// <param name="shortExplore">if set to <c>true</c> [short explore].</param>
        /// <param name="debug">if set to <c>true</c> [debug].</param>
        /// <returns></returns>
        public List <termExploreModel> explore(string term, ILogBuilder loger, bool shortExplore = true, bool debug = true, termExploreModel exploreModel = null)
        {
            term = term.Trim();
            List <termExploreModel> output = new List <termExploreModel>();


            if (modelRegistry.ContainsKey(term))
            {
                return(modelRegistry[term]);
            }
            if (missing.Contains(term))
            {
                return(GetModels(term));
            }

            if (term.isNumber())
            {
                termExploreModel tmp = makeTempModel(term, pos_type.NUMnumerical);
                tmp.flags = termExploreItemEnumFlag.datapoint;
                if (loger != null)
                {
                    loger.AppendLine("Term [" + term + "] detected to be number.");
                }
                AddModel(tmp);
                return(GetModels(term));
            }

            // <----- drugi test
            exploreModel = termExploreProcedures.exploreWithHunspell(new termExploreItem(term), loger);
            List <string> suggests = new List <string>();

            exploreModel.instances.ForEach(x => suggests.Add(x.inputForm));


            //languageManagerDBNamedEntities.manager.exploreEntities(exploreModel.rootWord, exploreModel);

            suggests.Add(exploreModel.rootWord);

            // s



            apertiumDictionaryResult result = languageManagerApertium.manager.query(suggests, apertiumDictQueryScope.exact, apertiumDictNeedleSide.serbian);

            if (result.Any())
            {
                List <termExploreItem> gramCheck = new List <termExploreItem>();

                gramFlags gr = null;



                if (result.termVsGramFlags.ContainsKey(exploreModel.inputForm))
                {
                    exploreModel.gramSet.Add(new gramFlags(result.termVsGramFlags[exploreModel.inputForm]));

                    if (exploreModel.lemma == null)
                    {
                        exploreModel.lemma = exploreModel.instances[exploreModel.inputForm];
                    }

                    gramCheck.Add(exploreModel);
                    if (debug)
                    {
                        if (loger != null)
                        {
                            loger.AppendLine("Apertium discovered model [" + exploreModel.inputForm + "]");
                        }
                    }
                }
                else
                {
                    //if (loger != null) loger.AppendLine("Apertium failed to discover [" + exploreModel.inputForm + "]");
                }

                foreach (termExploreItem item in exploreModel.instances)
                {
                    if (result.termVsGramFlags.ContainsKey(item.inputForm))
                    {
                        item.gramSet.Add(new gramFlags(result.termVsGramFlags[item.inputForm]));
                        gramCheck.Add(exploreModel);
                        exploreModel.lemmaForm = item.inputForm;
                        if (exploreModel.lemma == null)
                        {
                            exploreModel.lemma = item;
                        }


                        if (debug)
                        {
                            if (loger != null)
                            {
                                loger.AppendLine("Apertium discovered model [" + item.inputForm + "]");
                            }
                        }
                    }
                    else
                    {
                        //if (loger != null) loger.AppendLine("Apertium failed to discover [" + item.inputForm + "]");
                    }
                }

                exploreModel.translations.AddRange(result.GetEnglish());



                gramCheck.RemoveAll(x => posConverter.posTypeVsPattern[x.gramSet.getPosType()].Count() == 0);

                int disc = 0;
                foreach (var gram in gramCheck)
                {
                    if (discoverGram(gram, loger, debug))
                    {
                        disc++;
                    }
                }

                if (loger != null)
                {
                    loger.AppendLine("Gram [" + term + "] autodiscovered for [" + disc + "] / [" + gramCheck.Count() + "]");
                }
                if (debug)
                {
                    if (loger != null)
                    {
                        exploreModel.ToString(loger, true, false);
                        manager.constructor.saveTermModel(exploreModel, "Apertium_");
                    }
                }

                AddModel(exploreModel);
                exploreModel.flags = termExploreItemEnumFlag.aper;


                if (shortExplore)
                {
                    return(GetModels(term));
                }
            }
            else
            {
                if (loger != null)
                {
                    loger.AppendLine("Apertium failed to discover any information on [" + term + "]");
                }
            }


            if (loger != null)
            {
                loger.consoleAltColorToggle();
            }

            // <------------------ APERTIUM ^^

            foreach (string s in suggests)
            {
                languageManagerDBNamedEntities.manager.exploreEntities(s, exploreModel);
            }

            if (exploreModel.flags == termExploreItemEnumFlag.namedEntity)
            {
                AddModel(exploreModel);

                if (debug)
                {
                    if (loger != null)
                    {
                        exploreModel.ToString(loger, true, false);
                        manager.constructor.saveTermModel(exploreModel, "NamedEntity_");
                        loger.AppendLine("Named entities discovered model [" + exploreModel.inputForm + "]:" + exploreModel.gramSet.ToString());
                    }
                }



                if (shortExplore)
                {
                    return(GetModels(term));
                }
            }
            else
            {
                if (loger != null)
                {
                    if (debug)
                    {
                        loger.AppendLine("Named entities found nothing for [" + exploreModel.inputForm + "]:" + exploreModel.gramSet.ToString());
                    }
                }
            }


            if (loger != null)
            {
                loger.consoleAltColorToggle();
            }

            // <------------------ NAMED ENTITY ^^

            // <----------------- Wordnet
            wordnetSymsetResults resSrWordnet = languageManagerWordnet.manager.query_srb(suggests, loger);
            bool found = false;

            if (resSrWordnet.Any())
            {
                foreach (termExploreItem item in exploreModel.instances)
                {
                    if (resSrWordnet.GetByKey(item.inputForm).Any())
                    {
                        exploreModel.lemma     = item;
                        exploreModel.lemmaForm = item.inputForm;
                        exploreModel.translations.AddRange(resSrWordnet.GetValues());
                        exploreModel.synonyms.AddRange(resSrWordnet.GetKeys());
                        exploreModel.flags = termExploreItemEnumFlag.srWNet;
                        found = true;

                        item.gramSet.Add(new gramFlags(new Enum[] { resSrWordnet.models[item.inputForm].gramSet.getPosType() }));
                    }
                }

                foreach (termExploreItem item in exploreModel.instances)
                {
                    discoverGram(item, loger, debug);
                }
            }

            if (found)
            {
                if (loger != null)
                {
                    loger.AppendLine("SerbianWordNet discovered model [" + term + "]:" + exploreModel.gramSet.ToString());
                }
                if (debug)
                {
                    if (loger != null)
                    {
                        exploreModel.ToString(loger, true, false);
                        manager.constructor.saveTermModel(exploreModel, "SrWordNet_");;
                    }
                }

                AddModel(exploreModel);
                exploreModel.flags = termExploreItemEnumFlag.srWNet;

                if (shortExplore)
                {
                    return(GetModels(term));
                }
            }
            else
            {
                if (loger != null)
                {
                    if (debug)
                    {
                        loger.AppendLine("Serbian wordnet found nothing for [" + term + "]");
                    }
                }
            }

            // <------------------ SERBIAN WORD NET ^^

            bool failed = discoverGram(exploreModel, loger, debug);

            exploreModel.instances.ForEach(x => discoverGram(x, loger, debug));

            int d = 0;

            List <termExploreItem> lastCheck = new List <termExploreItem>();

            foreach (var gram in lastCheck)
            {
                if (discoverGram(gram, loger, debug))
                {
                    d++;
                }
            }

            if (debug)
            {
                if (loger != null)
                {
                    loger.AppendLine("The last check [" + term + "] autodiscovered for [" + d + "] / [" + lastCheck.Count() + "]");
                }
            }

            if (d == 0)
            {
                failed = true;
            }

            if (loger != null)
            {
                loger.consoleAltColorToggle();
            }

            // <------------------ LAST CHECK ^^



            if (!failed)
            {
                exploreModel.flags = termExploreItemEnumFlag.termExplorer;
                AddModel(exploreModel);
                return(GetModels(term));
            }
            else
            {
                if (debug)
                {
                    if (loger != null)
                    {
                        loger.AppendLine("Exploration failed for [" + term + "] -- creating temporary term model");
                    }
                }
                output.Add(makeTempModel(term, pos_type.TEMP));
                missing.Add(term);
                return(output);
            }
        }