/// <summary>
        /// Explores definition on an unknown term
        /// </summary>
        /// <param name="term">The term.</param>
        /// <param name="loger">The loger.</param>
        /// <param name="shortExplore">if set to <c>true</c> [short explore].</param>
        /// <param name="debug">if set to <c>true</c> [debug].</param>
        /// <returns></returns>
        public List <termExploreModel> explore(string term, ILogBuilder loger, bool shortExplore = true, bool debug = true, termExploreModel exploreModel = null)
        {
            term = term.Trim();
            List <termExploreModel> output = new List <termExploreModel>();


            if (modelRegistry.ContainsKey(term))
            {
                return(modelRegistry[term]);
            }
            if (missing.Contains(term))
            {
                return(GetModels(term));
            }

            if (term.isNumber())
            {
                termExploreModel tmp = makeTempModel(term, pos_type.NUMnumerical);
                tmp.flags = termExploreItemEnumFlag.datapoint;
                if (loger != null)
                {
                    loger.AppendLine("Term [" + term + "] detected to be number.");
                }
                AddModel(tmp);
                return(GetModels(term));
            }

            // <----- drugi test
            exploreModel = termExploreProcedures.exploreWithHunspell(new termExploreItem(term), loger);
            List <string> suggests = new List <string>();

            exploreModel.instances.ForEach(x => suggests.Add(x.inputForm));


            //languageManagerDBNamedEntities.manager.exploreEntities(exploreModel.rootWord, exploreModel);

            suggests.Add(exploreModel.rootWord);

            // s



            apertiumDictionaryResult result = languageManagerApertium.manager.query(suggests, apertiumDictQueryScope.exact, apertiumDictNeedleSide.serbian);

            if (result.Any())
            {
                List <termExploreItem> gramCheck = new List <termExploreItem>();

                gramFlags gr = null;



                if (result.termVsGramFlags.ContainsKey(exploreModel.inputForm))
                {
                    exploreModel.gramSet.Add(new gramFlags(result.termVsGramFlags[exploreModel.inputForm]));

                    if (exploreModel.lemma == null)
                    {
                        exploreModel.lemma = exploreModel.instances[exploreModel.inputForm];
                    }

                    gramCheck.Add(exploreModel);
                    if (debug)
                    {
                        if (loger != null)
                        {
                            loger.AppendLine("Apertium discovered model [" + exploreModel.inputForm + "]");
                        }
                    }
                }
                else
                {
                    //if (loger != null) loger.AppendLine("Apertium failed to discover [" + exploreModel.inputForm + "]");
                }

                foreach (termExploreItem item in exploreModel.instances)
                {
                    if (result.termVsGramFlags.ContainsKey(item.inputForm))
                    {
                        item.gramSet.Add(new gramFlags(result.termVsGramFlags[item.inputForm]));
                        gramCheck.Add(exploreModel);
                        exploreModel.lemmaForm = item.inputForm;
                        if (exploreModel.lemma == null)
                        {
                            exploreModel.lemma = item;
                        }


                        if (debug)
                        {
                            if (loger != null)
                            {
                                loger.AppendLine("Apertium discovered model [" + item.inputForm + "]");
                            }
                        }
                    }
                    else
                    {
                        //if (loger != null) loger.AppendLine("Apertium failed to discover [" + item.inputForm + "]");
                    }
                }

                exploreModel.translations.AddRange(result.GetEnglish());



                gramCheck.RemoveAll(x => posConverter.posTypeVsPattern[x.gramSet.getPosType()].Count() == 0);

                int disc = 0;
                foreach (var gram in gramCheck)
                {
                    if (discoverGram(gram, loger, debug))
                    {
                        disc++;
                    }
                }

                if (loger != null)
                {
                    loger.AppendLine("Gram [" + term + "] autodiscovered for [" + disc + "] / [" + gramCheck.Count() + "]");
                }
                if (debug)
                {
                    if (loger != null)
                    {
                        exploreModel.ToString(loger, true, false);
                        manager.constructor.saveTermModel(exploreModel, "Apertium_");
                    }
                }

                AddModel(exploreModel);
                exploreModel.flags = termExploreItemEnumFlag.aper;


                if (shortExplore)
                {
                    return(GetModels(term));
                }
            }
            else
            {
                if (loger != null)
                {
                    loger.AppendLine("Apertium failed to discover any information on [" + term + "]");
                }
            }


            if (loger != null)
            {
                loger.consoleAltColorToggle();
            }

            // <------------------ APERTIUM ^^

            foreach (string s in suggests)
            {
                languageManagerDBNamedEntities.manager.exploreEntities(s, exploreModel);
            }

            if (exploreModel.flags == termExploreItemEnumFlag.namedEntity)
            {
                AddModel(exploreModel);

                if (debug)
                {
                    if (loger != null)
                    {
                        exploreModel.ToString(loger, true, false);
                        manager.constructor.saveTermModel(exploreModel, "NamedEntity_");
                        loger.AppendLine("Named entities discovered model [" + exploreModel.inputForm + "]:" + exploreModel.gramSet.ToString());
                    }
                }



                if (shortExplore)
                {
                    return(GetModels(term));
                }
            }
            else
            {
                if (loger != null)
                {
                    if (debug)
                    {
                        loger.AppendLine("Named entities found nothing for [" + exploreModel.inputForm + "]:" + exploreModel.gramSet.ToString());
                    }
                }
            }


            if (loger != null)
            {
                loger.consoleAltColorToggle();
            }

            // <------------------ NAMED ENTITY ^^

            // <----------------- Wordnet
            wordnetSymsetResults resSrWordnet = languageManagerWordnet.manager.query_srb(suggests, loger);
            bool found = false;

            if (resSrWordnet.Any())
            {
                foreach (termExploreItem item in exploreModel.instances)
                {
                    if (resSrWordnet.GetByKey(item.inputForm).Any())
                    {
                        exploreModel.lemma     = item;
                        exploreModel.lemmaForm = item.inputForm;
                        exploreModel.translations.AddRange(resSrWordnet.GetValues());
                        exploreModel.synonyms.AddRange(resSrWordnet.GetKeys());
                        exploreModel.flags = termExploreItemEnumFlag.srWNet;
                        found = true;

                        item.gramSet.Add(new gramFlags(new Enum[] { resSrWordnet.models[item.inputForm].gramSet.getPosType() }));
                    }
                }

                foreach (termExploreItem item in exploreModel.instances)
                {
                    discoverGram(item, loger, debug);
                }
            }

            if (found)
            {
                if (loger != null)
                {
                    loger.AppendLine("SerbianWordNet discovered model [" + term + "]:" + exploreModel.gramSet.ToString());
                }
                if (debug)
                {
                    if (loger != null)
                    {
                        exploreModel.ToString(loger, true, false);
                        manager.constructor.saveTermModel(exploreModel, "SrWordNet_");;
                    }
                }

                AddModel(exploreModel);
                exploreModel.flags = termExploreItemEnumFlag.srWNet;

                if (shortExplore)
                {
                    return(GetModels(term));
                }
            }
            else
            {
                if (loger != null)
                {
                    if (debug)
                    {
                        loger.AppendLine("Serbian wordnet found nothing for [" + term + "]");
                    }
                }
            }

            // <------------------ SERBIAN WORD NET ^^

            bool failed = discoverGram(exploreModel, loger, debug);

            exploreModel.instances.ForEach(x => discoverGram(x, loger, debug));

            int d = 0;

            List <termExploreItem> lastCheck = new List <termExploreItem>();

            foreach (var gram in lastCheck)
            {
                if (discoverGram(gram, loger, debug))
                {
                    d++;
                }
            }

            if (debug)
            {
                if (loger != null)
                {
                    loger.AppendLine("The last check [" + term + "] autodiscovered for [" + d + "] / [" + lastCheck.Count() + "]");
                }
            }

            if (d == 0)
            {
                failed = true;
            }

            if (loger != null)
            {
                loger.consoleAltColorToggle();
            }

            // <------------------ LAST CHECK ^^



            if (!failed)
            {
                exploreModel.flags = termExploreItemEnumFlag.termExplorer;
                AddModel(exploreModel);
                return(GetModels(term));
            }
            else
            {
                if (debug)
                {
                    if (loger != null)
                    {
                        loger.AppendLine("Exploration failed for [" + term + "] -- creating temporary term model");
                    }
                }
                output.Add(makeTempModel(term, pos_type.TEMP));
                missing.Add(term);
                return(output);
            }
        }
Exemple #2
0
        /// <summary>
        /// The stage two exploration
        /// </summary>
        /// <param name="lemma">The lemma.</param>
        /// <param name="response">The response.</param>
        /// <param name="savemodel">if set to <c>true</c> [savemodel].</param>
        /// <param name="debug">if set to <c>true</c> [debug].</param>
        /// <param name="verbose">if set to <c>true</c> [verbose].</param>
        /// <returns></returns>
        public static termExploreModelSet exploreStageTwo(string lemma, ILogBuilder response, bool savemodel, bool debug, bool verbose, lexiconTaskBase task = null)
        {
            lexiconConstructor  constructor = semanticLexiconManager.manager.constructor;
            termExploreModelSet outset      = semanticLexiconManager.manager.constructor.loadTermModels(lemma, true);

            if (!Enumerable.Any(outset))
            {
                outset.missingLemmas.Add(lemma);
                return(outset);
            }

            foreach (termExploreModel mod in outset)
            {
                builderForLog logout = new builderForLog();
                if (verbose)
                {
                    aceLog.consoleControl.setAsOutput(logout, "stage2");
                }
                termExploreModel model = getSynonymsWithApertium(mod, logout);

                string pt = model.lemma.gramSet.getPosType().ToString();
                if (savemodel)
                {
                    //  model.graph.saveDescription(constructor.projectFolderStructure[lexiconConstructorProjectFolder.logs].path, pt + "_related");
                }


                model = getSynonymsWithWordnetViaApertium(model, logout, true, false);

                if (savemodel)
                {
                    model.graph.saveDescription(constructor.projectFolderStructure[lexiconConstructorProjectFolder.logs].path, pt + "_concepts");
                    // model.graph.savePaths(constructor.projectFolderStructure[lexiconConstructorProjectFolder.logs].path, pt + "_concepts");
                }


                model.PostProcess();

                if (debug)
                {
                    model.ToString(logout, true, true);
                    string fn = model.lemma.inputForm + "_" + pt + "_log.md";
                    logout.ToString(false).saveStringToFile(constructor.projectFolderStructure[lexiconConstructorProjectFolder.logs].pathFor(fn), getWritableFileMode.overwrite);
                }

                if (verbose)
                {
                    aceLog.consoleControl.removeFromOutput(logout);
                }

                if (savemodel)
                {
                    if (task != null)
                    {
                        model.lastModifiedByStage = task.taskTitle;
                    }
                    else
                    {
                        model.lastModifiedByStage = "stageTwo-exploreProcedure";
                    }
                    if (!model.wasExploreFailed)
                    {
                        constructor.saveTermModel(model);
                    }
                    else
                    {
                        outset.failedModels.Add(model);
                    }
                }
            }
            return(outset);
        }
Exemple #3
0
        public static termExploreModel explore(string word, ILogBuilder response, termExploreMode mode, bool verbose = false)
        {
            termExploreModel    model  = new termExploreModel(word);
            termExploreModelSet outset = semanticLexiconManager.manager.constructor.loadTermModels(word, true);

            if (response != null)
            {
                response.consoleAltColorToggle();
                response.AppendHorizontalLine();
                response.AppendLine("Exploring term[" + model.inputForm + "] with [" + mode.ToString() + "]");
                response.consoleAltColorToggle();
            }

            if (Enumerable.Any(outset))
            {
                model = Enumerable.First(outset);

                if (response != null)
                {
                    response.AppendLine("term[" + model.inputForm + "]->lemma[" + model.lemma.inputForm + "]");
                }
            }
            else
            {
                model.lemmaForm = "";
                if (response != null)
                {
                    response.AppendLine("term[" + word + "]->missingLemma[]");
                }
            }


            var output = response;

            if (!verbose)
            {
                response = null;
            }

            switch (mode)
            {
            case termExploreMode.apertium_direct:
                model = getSynonymsWithApertium(model, response);
                break;

            case termExploreMode.apertium_wordnet_eng:
                model = getSynonymsWithWordnetViaApertium(model, response);
                break;

            case termExploreMode.apertium_wordnet_srb:
                model = getSynonymsWithSerbianWordNetAndApertium(model, response);
                break;

            case termExploreMode.corpus:
                model = getSynonymsByCorpus(model, response);
                break;

            case termExploreMode.hunspell_srb:
                model = getSynonymsWithHunspell(model, response);
                break;

            case termExploreMode.none:
                break;

            case termExploreMode.wordnet_srb:
                model = getSynonymsWithSerbianWordNet(model, response);
                break;

            case termExploreMode.unitex:
                model = exploreWithUnitex(word, response);
                break;
            }

            model.PostProcess();
            if (output != null)
            {
                model.ToString(output, verbose, false);
            }

            return(model);
        }
Exemple #4
0
        /// <summary>
        /// Builds a term model out from Word input
        /// </summary>
        /// <param name="word">The word.</param>
        /// <param name="response">The response.</param>
        /// <returns></returns>
        public static termExploreModel exploreWithUnitex(string word, ILogBuilder response, bool wordIsLemma = false)
        {
            termExploreModel output = new termExploreModel();

            output.modelSource = termExploreModelSource.fromToken;
            output.inputForm   = word;

            string lemma = word;

            var tls = semanticLexiconManager.manager.resolve(word);

            if (tls != null)
            {
                if (Enumerable.Count(tls) > 0)
                {
                    if (response != null)
                    {
                        response.AppendLine("#1 Lemma already defined in the triplestore [" + word + "] ");
                    }
                    output = semanticLexiconManager.manager.constructor.getTermModel(Enumerable.First(tls));
                    return(output);
                }
            }

            if (!wordIsLemma)
            {
                if (response != null)
                {
                    response.AppendLine("#1 Finding Lemma for [" + word + "] ");
                }
                string query = string.Format(posConverter.REGEX_UNITEX_InstanceToLemmaFormat, word);
                fileTextSearchResult reslt = languageManagerUnitex.manager.operatorDelaf.Search(query, true, 1, RegexOptions.IgnoreCase);

                if (response != null)
                {
                    reslt.ToString(response, true);
                }

                Regex instanceToLemmaReg = new Regex(query);


                if (reslt.Count() > 0)
                {
                    var lnp = reslt.First();

                    Match mch = instanceToLemmaReg.Match(lnp.Value);
                    lemma = mch.Groups[1].Value;
                }
            }
            else
            {
                if (response != null)
                {
                    response.AppendLine("#1 The word is trusted to be a lemma [" + word + "] - skipping search");
                }
            }
            // <------------------------------------------------------------------- preparing chache ---------------

            var cache = languageManagerUnitex.manager.operatorDelaf.Search(lemma, false, 300);

            if (response != null)
            {
                response.AppendLine("Cached definitions [" + cache.Count() + "] ");
            }



            // <------------------------------------------------------------  2. finding lemma definition

            output.lemmaForm = lemma;
            output.lemma     = new termExploreItem(lemma);

            if (response != null)
            {
                response.AppendLine("#2 Finding Lemma definition [" + lemma + "] ");
            }

            string lemmaQuery                = string.Format(posConverter.REGEX_UNITEX_DeclarationForLemma, lemma);
            Regex  lemmaQueryRegex           = new Regex(lemmaQuery);
            fileTextSearchResult lemmaResult = languageManagerUnitex.manager.operatorDelaf.Search(cache, lemmaQuery, true, 5, RegexOptions.IgnoreCase);

            if (response != null)
            {
                lemmaResult.ToString(response, true);
            }

            if (lemmaResult.Count() == 0)
            {
                if (response != null)
                {
                    response.consoleAltColorToggle();
                    response.AppendLine("Failed to find lemma definition for [" + word + "]. Aborting exploration.");
                    response.consoleAltColorToggle();
                }
                output.wasExploreFailed = true;
                return(output);
            }

            foreach (var lr_lnp in lemmaResult)
            {
                Match lmch = lemmaQueryRegex.Match(lr_lnp.Value);
                if (lmch.Success)
                {
                    output.lemma.gramSet.Add(lmch.Groups[1].Value);
                }
            }
            if (response != null)
            {
                output.lemma.ToString(response);
            }



            // <------------------------------------------------------------  3. getting all instances for the lemma
            if (response != null)
            {
                response.AppendLine("#3 Extracting all instances for the Lemma [" + lemma + "] ");
            }

            string instanceQuery                = string.Format(posConverter.REGEX_UNITEX_LemmaToInstanceFormat, lemma);
            string instanceUnitexQuery          = "," + lemma + ".";
            Regex  instanceQueryRegex           = new Regex(instanceQuery);
            fileTextSearchResult instanceResult = languageManagerUnitex.manager.operatorDelaf.Search(cache, instanceUnitexQuery, false, 100, RegexOptions.IgnoreCase);

            if (response != null)
            {
                instanceResult.ToString(response, true);
            }

            foreach (var lr_lnp in instanceResult)
            {
                Match lmch = instanceQueryRegex.Match(lr_lnp.Value);
                output.instances.Add(lmch.Groups[1].Value, lmch.Groups[2].Value);
            }

            // <------------------------------------------------------------  4. Resulting term model
            if (response != null)
            {
                response.AppendLine("#4 Resulting term model [" + lemma + "] ");
                output.ToString(response);
            }


            return(output);
        }
        /// <summary>
        /// Explores the with hunspell.
        /// </summary>
        /// <param name="item">The item.</param>
        /// <param name="log">The log.</param>
        /// <returns></returns>
        public static termExploreModel exploreWithHunspell(this termExploreItem item, ILogBuilder log)
        {
            termExploreModel output = new termExploreModel();
            List <string>    terms  = new List <string>();

            terms.Add(item.inputForm);

            List <string> suggest = imbLanguageFrameworkManager.serbian.basic.hunspellEngine.Suggest(item.inputForm);



            List <string> sug2 = new List <string>();

            suggest.ForEach(x => sug2.Add(x.Replace("\\-", "-")));
            suggest = sug2;
            if (Enumerable.Any(suggest))
            {
                int min_l = Enumerable.Min(suggest, x => x.Length);


                List <string> possibleTerm = new List <string>();



                int tocut = min_l - item.inputForm.Length;


                string start = item.inputForm;

                if (tocut != 0)
                {
                    start = start.substring(tocut);
                }


                string rootComposite      = "";
                int    rootCompositeSplit = 0;
                foreach (string sug in suggest)
                {
                    if (!sug.Contains(" "))
                    {
                        if (sug.Contains("-"))
                        {
                            int rcSplit = sug.IndexOf("-");
                            if (rcSplit > rootCompositeSplit)
                            {
                                rootCompositeSplit = rcSplit;
                                rootComposite      = sug.Substring(0, rootCompositeSplit).Trim(Enumerable.ToArray("-"));
                            }
                        }
                        else
                        {
                            if (sug.StartsWith(start))
                            {
                                possibleTerm.Add(sug);
                            }
                        }
                    }
                }

                if (tocut == 0)
                {
                    if (possibleTerm.Count == 0)
                    {
                        possibleTerm.AddRange(suggest);
                    }
                }
                possibleTerm.Add(item.inputForm);


                if (rootCompositeSplit == 0)
                {
                    rootComposite = possibleTerm.MinItem(x => x.Length);
                }


                suggest = possibleTerm.Clone();
                possibleTerm.Clear();

                string lemmaForm = "";

                foreach (string sug in suggest)
                {
                    if (sug.Contains(rootComposite, StringComparison.CurrentCultureIgnoreCase))
                    {
                        possibleTerm.Add(sug);
                        if (lemmaForm.isNullOrEmpty())
                        {
                            lemmaForm = sug;
                        }
                        if (sug.Length < lemmaForm.Length)
                        {
                            lemmaForm = sug;
                        }
                    }
                }

                output.lemmaForm = lemmaForm;
                output.rootWord  = rootComposite;
                output.inputForm = item.inputForm;

                foreach (string sug in possibleTerm)
                {
                    output.instances.Add(sug);
                    //log.log(sug);
                }
            }
            else
            {
                output.lemmaForm = item.inputForm;
                output.rootWord  = item.inputForm;
                output.inputForm = item.inputForm;
            }

            //log.log("Input term: " + item.inputForm);
            //log.log("Root: " + output.rootWord);
            //log.log("Lemma: " + output.lemmaForm);
            //log.log("Instances: ");



            output.ToString(log);

            return(output);
        }