Esempio n. 1
0
        //public static termExploreModelSet exploreAndExpandWithUnitex(String word, ILogBuilder response)
        //{
        //    termExploreModelSet output = new termExploreModelSet();
        //    return output;
        //}


        //public static termExploreItem constructTermItemFromUnitex(String unitexDelaf)
        //{

        //}

        public static termExploreModel getSynonymsWithHunspell(this termExploreModel model, ILogBuilder log)
        {
            throw new NotImplementedException();
            // List<string> suggest = imbLanguageFrameworkManager.serbian.basic.hunspellEngine.Suggest(model.lemma.inputForm);
            // model.synonyms.AddRange(suggest);
            return(model);
        }
Esempio n. 2
0
        public static termExploreModel getSynonymsWithSerbianWordNet(termExploreModel model, ILogBuilder response)
        {
            tokenGraph result = new tokenGraph(model.lemma.inputForm);

            languageManagerWordnet.manager.queryWithGraph(result, response, WordnetSource.serbian, WordnetQueryType.getSymsetCodesByWord);
            model.wordnetSecondarySymsets.AddRange(result.getAllLeafs().getNames());

            if (response != null)
            {
                response.consoleAltColorToggle();
                string rst = result.ToStringTreeview();

                response.Append(rst);
                response.consoleAltColorToggle();
            }

            languageManagerWordnet.manager.queryWithGraph(result, response, WordnetSource.serbian, WordnetQueryType.getWordsBySymsetCode);

            model.synonyms.AddRange(result.getAllLeafs().getNames());

            if (response != null)
            {
                response.consoleAltColorToggle();
                string rst = result.ToStringTreeview();

                response.Append(rst);
                response.consoleAltColorToggle();
            }
            model.graph = result;
            return(model);
        }
Esempio n. 3
0
 public void AddModel(termExploreModel model)
 {
     modelRegistry.Add(model.inputForm.ToLower(), model);
     foreach (termExploreItem item in model.instances)
     {
         modelRegistry.Add(item.inputForm.ToLower(), model);
     }
 }
Esempio n. 4
0
        public termExploreModel makeTempModel(string term, pos_type type)
        {
            termExploreModel output = new termExploreModel(term);

            output.lemma = new termExploreItem(term);
            output.gramSet.Add(new gramFlags(type.ToString()));
            output.flags = termExploreItemEnumFlag.temp;
            return(output);
        }
Esempio n. 5
0
        public TermLemma modelToLemma(termExploreModel termModel)
        {
            TermLemma tl = new TermLemma();

            //tl = manager.lexiconContext.TermLemmas.Create();
            tl.name    = termModel.inputForm;
            tl.type    = termModel.gramSet.getPosType().toStringSafe("N");
            tl.gramSet = termModel.gramSet.GetAll();

            foreach (termExploreItem item in termModel.instances)
            {
                ITermInstance ti = new TermInstance(); //manager.lexiconContext.TermInstances.Create();
                ti.name    = item.inputForm;
                ti.type    = item.gramSet.getPosType().ToString();
                ti.gramSet = item.gramSet.GetAll();
                ti.lemma   = tl;
                // manager.lexiconContext.TermInstances.Add(ti);
            }

            return(tl);
        }
Esempio n. 6
0
        public static termExploreModel getSynonymsByCorpus(termExploreModel model, ILogBuilder response)
        {
            tokenGraph result = new tokenGraph(model.lemma.inputForm);

            var lines = semanticLexiconManager.manager.settings.sourceFiles.getOperater(lexiconSourceTypeEnum.corpus).Search(model.lemma.inputForm);

            result.Add(lines.getLineContentList(), tokenGraphNodeType.word_srb);

            model.synonyms.AddRange(result.getAllLeafs().getNames());

            if (response != null)
            {
                response.consoleAltColorToggle();
                string rst = result.ToStringTreeview();

                response.Append(rst);
                response.consoleAltColorToggle();
            }
            model.graph = result;
            return(model);
        }
Esempio n. 7
0
        public static termExploreModel getSynonymsWithSerbianWordNetAndApertium(termExploreModel model, ILogBuilder response)
        {
            model = getSynonymsWithSerbianWordNet(model, response);
            tokenGraph result = model.graph;

            languageManagerApertium.manager.queryByGraphNode(model.graph, apertiumDictQueryScope.exact, apertiumDictNeedleSide.native);

            model.translations.AddRange(result.getAllLeafs().getNames());

            languageManagerApertium.manager.queryByGraphNode(result, apertiumDictQueryScope.exact, apertiumDictNeedleSide.translated);
            string st = result.ToStringTreeview();

            if (response != null)
            {
                response.Append(st);
            }

            model.synonyms.AddRange(result.getAllLeafs().getNames());

            model.graph = result;
            return(model);
        }
Esempio n. 8
0
        public static termExploreModel getSynonymsWithApertium(termExploreModel model, ILogBuilder response)
        {
            tokenGraph result = languageManagerApertium.manager.queryForGraph(model.lemma.inputForm, apertiumDictQueryScope.exact);

            if (result.Count() == 0)
            {
                model.wasExploreFailed = true;
            }
            else
            {
                model.translations.AddRange(result.getAllLeafs().getNames());

                languageManagerApertium.manager.queryByGraphNode(result, apertiumDictQueryScope.exact, apertiumDictNeedleSide.translated);
                string st = result.ToStringTreeview();
                if (response != null)
                {
                    response.Append(st);
                }

                model.synonyms.AddRange(result.getAllLeafs().getNames());
            }
            model.graph = result;
            return(model);
        }
Esempio n. 9
0
        /// <summary>
        /// Explores definition on an unknown term
        /// </summary>
        /// <param name="term">The term.</param>
        /// <param name="loger">The loger.</param>
        /// <param name="shortExplore">if set to <c>true</c> [short explore].</param>
        /// <param name="debug">if set to <c>true</c> [debug].</param>
        /// <returns></returns>
        public List <termExploreModel> explore(string term, ILogBuilder loger, bool shortExplore = true, bool debug = true, termExploreModel exploreModel = null)
        {
            term = term.Trim();
            List <termExploreModel> output = new List <termExploreModel>();


            if (modelRegistry.ContainsKey(term))
            {
                return(modelRegistry[term]);
            }
            if (missing.Contains(term))
            {
                return(GetModels(term));
            }

            if (term.isNumber())
            {
                termExploreModel tmp = makeTempModel(term, pos_type.NUMnumerical);
                tmp.flags = termExploreItemEnumFlag.datapoint;
                if (loger != null)
                {
                    loger.AppendLine("Term [" + term + "] detected to be number.");
                }
                AddModel(tmp);
                return(GetModels(term));
            }

            // <----- drugi test
            exploreModel = termExploreProcedures.exploreWithHunspell(new termExploreItem(term), loger);
            List <string> suggests = new List <string>();

            exploreModel.instances.ForEach(x => suggests.Add(x.inputForm));


            //languageManagerDBNamedEntities.manager.exploreEntities(exploreModel.rootWord, exploreModel);

            suggests.Add(exploreModel.rootWord);

            // s



            apertiumDictionaryResult result = languageManagerApertium.manager.query(suggests, apertiumDictQueryScope.exact, apertiumDictNeedleSide.serbian);

            if (result.Any())
            {
                List <termExploreItem> gramCheck = new List <termExploreItem>();

                gramFlags gr = null;



                if (result.termVsGramFlags.ContainsKey(exploreModel.inputForm))
                {
                    exploreModel.gramSet.Add(new gramFlags(result.termVsGramFlags[exploreModel.inputForm]));

                    if (exploreModel.lemma == null)
                    {
                        exploreModel.lemma = exploreModel.instances[exploreModel.inputForm];
                    }

                    gramCheck.Add(exploreModel);
                    if (debug)
                    {
                        if (loger != null)
                        {
                            loger.AppendLine("Apertium discovered model [" + exploreModel.inputForm + "]");
                        }
                    }
                }
                else
                {
                    //if (loger != null) loger.AppendLine("Apertium failed to discover [" + exploreModel.inputForm + "]");
                }

                foreach (termExploreItem item in exploreModel.instances)
                {
                    if (result.termVsGramFlags.ContainsKey(item.inputForm))
                    {
                        item.gramSet.Add(new gramFlags(result.termVsGramFlags[item.inputForm]));
                        gramCheck.Add(exploreModel);
                        exploreModel.lemmaForm = item.inputForm;
                        if (exploreModel.lemma == null)
                        {
                            exploreModel.lemma = item;
                        }


                        if (debug)
                        {
                            if (loger != null)
                            {
                                loger.AppendLine("Apertium discovered model [" + item.inputForm + "]");
                            }
                        }
                    }
                    else
                    {
                        //if (loger != null) loger.AppendLine("Apertium failed to discover [" + item.inputForm + "]");
                    }
                }

                exploreModel.translations.AddRange(result.GetEnglish());



                gramCheck.RemoveAll(x => posConverter.posTypeVsPattern[x.gramSet.getPosType()].Count() == 0);

                int disc = 0;
                foreach (var gram in gramCheck)
                {
                    if (discoverGram(gram, loger, debug))
                    {
                        disc++;
                    }
                }

                if (loger != null)
                {
                    loger.AppendLine("Gram [" + term + "] autodiscovered for [" + disc + "] / [" + gramCheck.Count() + "]");
                }
                if (debug)
                {
                    if (loger != null)
                    {
                        exploreModel.ToString(loger, true, false);
                        manager.constructor.saveTermModel(exploreModel, "Apertium_");
                    }
                }

                AddModel(exploreModel);
                exploreModel.flags = termExploreItemEnumFlag.aper;


                if (shortExplore)
                {
                    return(GetModels(term));
                }
            }
            else
            {
                if (loger != null)
                {
                    loger.AppendLine("Apertium failed to discover any information on [" + term + "]");
                }
            }


            if (loger != null)
            {
                loger.consoleAltColorToggle();
            }

            // <------------------ APERTIUM ^^

            foreach (string s in suggests)
            {
                languageManagerDBNamedEntities.manager.exploreEntities(s, exploreModel);
            }

            if (exploreModel.flags == termExploreItemEnumFlag.namedEntity)
            {
                AddModel(exploreModel);

                if (debug)
                {
                    if (loger != null)
                    {
                        exploreModel.ToString(loger, true, false);
                        manager.constructor.saveTermModel(exploreModel, "NamedEntity_");
                        loger.AppendLine("Named entities discovered model [" + exploreModel.inputForm + "]:" + exploreModel.gramSet.ToString());
                    }
                }



                if (shortExplore)
                {
                    return(GetModels(term));
                }
            }
            else
            {
                if (loger != null)
                {
                    if (debug)
                    {
                        loger.AppendLine("Named entities found nothing for [" + exploreModel.inputForm + "]:" + exploreModel.gramSet.ToString());
                    }
                }
            }


            if (loger != null)
            {
                loger.consoleAltColorToggle();
            }

            // <------------------ NAMED ENTITY ^^

            // <----------------- Wordnet
            wordnetSymsetResults resSrWordnet = languageManagerWordnet.manager.query_srb(suggests, loger);
            bool found = false;

            if (resSrWordnet.Any())
            {
                foreach (termExploreItem item in exploreModel.instances)
                {
                    if (resSrWordnet.GetByKey(item.inputForm).Any())
                    {
                        exploreModel.lemma     = item;
                        exploreModel.lemmaForm = item.inputForm;
                        exploreModel.translations.AddRange(resSrWordnet.GetValues());
                        exploreModel.synonyms.AddRange(resSrWordnet.GetKeys());
                        exploreModel.flags = termExploreItemEnumFlag.srWNet;
                        found = true;

                        item.gramSet.Add(new gramFlags(new Enum[] { resSrWordnet.models[item.inputForm].gramSet.getPosType() }));
                    }
                }

                foreach (termExploreItem item in exploreModel.instances)
                {
                    discoverGram(item, loger, debug);
                }
            }

            if (found)
            {
                if (loger != null)
                {
                    loger.AppendLine("SerbianWordNet discovered model [" + term + "]:" + exploreModel.gramSet.ToString());
                }
                if (debug)
                {
                    if (loger != null)
                    {
                        exploreModel.ToString(loger, true, false);
                        manager.constructor.saveTermModel(exploreModel, "SrWordNet_");;
                    }
                }

                AddModel(exploreModel);
                exploreModel.flags = termExploreItemEnumFlag.srWNet;

                if (shortExplore)
                {
                    return(GetModels(term));
                }
            }
            else
            {
                if (loger != null)
                {
                    if (debug)
                    {
                        loger.AppendLine("Serbian wordnet found nothing for [" + term + "]");
                    }
                }
            }

            // <------------------ SERBIAN WORD NET ^^

            bool failed = discoverGram(exploreModel, loger, debug);

            exploreModel.instances.ForEach(x => discoverGram(x, loger, debug));

            int d = 0;

            List <termExploreItem> lastCheck = new List <termExploreItem>();

            foreach (var gram in lastCheck)
            {
                if (discoverGram(gram, loger, debug))
                {
                    d++;
                }
            }

            if (debug)
            {
                if (loger != null)
                {
                    loger.AppendLine("The last check [" + term + "] autodiscovered for [" + d + "] / [" + lastCheck.Count() + "]");
                }
            }

            if (d == 0)
            {
                failed = true;
            }

            if (loger != null)
            {
                loger.consoleAltColorToggle();
            }

            // <------------------ LAST CHECK ^^



            if (!failed)
            {
                exploreModel.flags = termExploreItemEnumFlag.termExplorer;
                AddModel(exploreModel);
                return(GetModels(term));
            }
            else
            {
                if (debug)
                {
                    if (loger != null)
                    {
                        loger.AppendLine("Exploration failed for [" + term + "] -- creating temporary term model");
                    }
                }
                output.Add(makeTempModel(term, pos_type.TEMP));
                missing.Add(term);
                return(output);
            }
        }
Esempio n. 10
0
        /// <summary>
        /// The stage two exploration
        /// </summary>
        /// <param name="lemma">The lemma.</param>
        /// <param name="response">The response.</param>
        /// <param name="savemodel">if set to <c>true</c> [savemodel].</param>
        /// <param name="debug">if set to <c>true</c> [debug].</param>
        /// <param name="verbose">if set to <c>true</c> [verbose].</param>
        /// <returns></returns>
        public static termExploreModelSet exploreStageTwo(string lemma, ILogBuilder response, bool savemodel, bool debug, bool verbose, lexiconTaskBase task = null)
        {
            lexiconConstructor  constructor = semanticLexiconManager.manager.constructor;
            termExploreModelSet outset      = semanticLexiconManager.manager.constructor.loadTermModels(lemma, true);

            if (!Enumerable.Any(outset))
            {
                outset.missingLemmas.Add(lemma);
                return(outset);
            }

            foreach (termExploreModel mod in outset)
            {
                builderForLog logout = new builderForLog();
                if (verbose)
                {
                    aceLog.consoleControl.setAsOutput(logout, "stage2");
                }
                termExploreModel model = getSynonymsWithApertium(mod, logout);

                string pt = model.lemma.gramSet.getPosType().ToString();
                if (savemodel)
                {
                    //  model.graph.saveDescription(constructor.projectFolderStructure[lexiconConstructorProjectFolder.logs].path, pt + "_related");
                }


                model = getSynonymsWithWordnetViaApertium(model, logout, true, false);

                if (savemodel)
                {
                    model.graph.saveDescription(constructor.projectFolderStructure[lexiconConstructorProjectFolder.logs].path, pt + "_concepts");
                    // model.graph.savePaths(constructor.projectFolderStructure[lexiconConstructorProjectFolder.logs].path, pt + "_concepts");
                }


                model.PostProcess();

                if (debug)
                {
                    model.ToString(logout, true, true);
                    string fn = model.lemma.inputForm + "_" + pt + "_log.md";
                    logout.ToString(false).saveStringToFile(constructor.projectFolderStructure[lexiconConstructorProjectFolder.logs].pathFor(fn), getWritableFileMode.overwrite);
                }

                if (verbose)
                {
                    aceLog.consoleControl.removeFromOutput(logout);
                }

                if (savemodel)
                {
                    if (task != null)
                    {
                        model.lastModifiedByStage = task.taskTitle;
                    }
                    else
                    {
                        model.lastModifiedByStage = "stageTwo-exploreProcedure";
                    }
                    if (!model.wasExploreFailed)
                    {
                        constructor.saveTermModel(model);
                    }
                    else
                    {
                        outset.failedModels.Add(model);
                    }
                }
            }
            return(outset);
        }
Esempio n. 11
0
        /// <summary>
        /// Explores the with hunspell.
        /// </summary>
        /// <param name="item">The item.</param>
        /// <param name="log">The log.</param>
        /// <returns></returns>
        public static termExploreModel exploreWithHunspell(this termExploreItem item, ILogBuilder log)
        {
            termExploreModel output = new termExploreModel();
            List <string>    terms  = new List <string>();

            terms.Add(item.inputForm);
            throw new NotImplementedException();

            //List<string> suggest = imbLanguageFrameworkManager.serbian.basic.hunspellEngine.Suggest(item.inputForm);



            //List<string> sug2 = new List<string>();
            //suggest.ForEach(x=>sug2.Add(x.Replace("\\-", "-")));
            //suggest = sug2;
            //if (Enumerable.Any(suggest))
            //{
            //    int min_l = Enumerable.Min(suggest, x => x.Length);


            //    List<string> possibleTerm = new List<string>();



            //    int tocut = min_l - item.inputForm.Length;


            //    string start = item.inputForm;

            //    if (tocut != 0) start = start.substring(tocut);


            //    string rootComposite = "";
            //    int rootCompositeSplit = 0;
            //    foreach (string sug in suggest)
            //    {
            //        if (!sug.Contains(" "))
            //        {
            //            if (sug.Contains("-"))
            //            {
            //                int rcSplit = sug.IndexOf("-");
            //                if (rcSplit > rootCompositeSplit)
            //                {
            //                    rootCompositeSplit = rcSplit;
            //                    rootComposite = sug.Substring(0, rootCompositeSplit).Trim(Enumerable.ToArray("-"));
            //                }
            //            }
            //            else
            //            {
            //                if (sug.StartsWith(start))
            //                {
            //                    possibleTerm.Add(sug);
            //                }
            //            }
            //        }
            //    }

            //    if (tocut == 0)
            //    {
            //        if (possibleTerm.Count == 0)
            //        {
            //            possibleTerm.AddRange(suggest);
            //        }
            //    }
            //    possibleTerm.Add(item.inputForm);


            //    if (rootCompositeSplit == 0)
            //    {
            //        rootComposite = possibleTerm.MinItem(x => x.Length);
            //    }


            //    suggest = possibleTerm.Clone();
            //    possibleTerm.Clear();

            //    string lemmaForm = "";

            //    foreach (string sug in suggest)
            //    {
            //        if (sug.Contains(rootComposite, StringComparison.CurrentCultureIgnoreCase))
            //        {
            //            possibleTerm.Add(sug);
            //            if (lemmaForm.isNullOrEmpty())
            //            {
            //                lemmaForm = sug;
            //            }
            //            if (sug.Length < lemmaForm.Length)
            //            {
            //                lemmaForm = sug;
            //            }
            //        }

            //    }

            //    output.lemmaForm = lemmaForm;
            //    output.rootWord = rootComposite;
            //    output.inputForm = item.inputForm;

            //    foreach (string sug in possibleTerm)
            //    {
            //        output.instances.Add(sug);
            //        //log.log(sug);
            //    }
            //} else
            //{
            //    output.lemmaForm = item.inputForm;
            //    output.rootWord = item.inputForm;
            //    output.inputForm = item.inputForm;
            //}

            ////log.log("Input term: " + item.inputForm);
            ////log.log("Root: " + output.rootWord);
            ////log.log("Lemma: " + output.lemmaForm);
            ////log.log("Instances: ");



            //output.ToString(log);

            return(output);
        }
Esempio n. 12
0
        /// <summary>
        /// Builds a term model out from Word input
        /// </summary>
        /// <param name="word">The word.</param>
        /// <param name="response">The response.</param>
        /// <returns></returns>
        public static termExploreModel exploreWithUnitex(string word, ILogBuilder response, bool wordIsLemma = false)
        {
            termExploreModel output = new termExploreModel();

            output.modelSource = termExploreModelSource.fromToken;
            output.inputForm   = word;

            string lemma = word;

            var tls = semanticLexiconManager.manager.resolve(word);

            if (tls != null)
            {
                if (Enumerable.Count(tls) > 0)
                {
                    if (response != null)
                    {
                        response.AppendLine("#1 Lemma already defined in the triplestore [" + word + "] ");
                    }
                    output = semanticLexiconManager.manager.constructor.getTermModel(Enumerable.First(tls));
                    return(output);
                }
            }

            if (!wordIsLemma)
            {
                if (response != null)
                {
                    response.AppendLine("#1 Finding Lemma for [" + word + "] ");
                }
                string query = string.Format(posConverter.REGEX_UNITEX_InstanceToLemmaFormat, word);
                fileTextSearchResult reslt = languageManagerUnitex.manager.operatorDelaf.Search(query, true, 1, RegexOptions.IgnoreCase);

                if (response != null)
                {
                    reslt.ToString(response, true);
                }

                Regex instanceToLemmaReg = new Regex(query);


                if (reslt.Count() > 0)
                {
                    var lnp = reslt.First();

                    Match mch = instanceToLemmaReg.Match(lnp.Value);
                    lemma = mch.Groups[1].Value;
                }
            }
            else
            {
                if (response != null)
                {
                    response.AppendLine("#1 The word is trusted to be a lemma [" + word + "] - skipping search");
                }
            }
            // <------------------------------------------------------------------- preparing chache ---------------

            var cache = languageManagerUnitex.manager.operatorDelaf.Search(lemma, false, 300);

            if (response != null)
            {
                response.AppendLine("Cached definitions [" + cache.Count() + "] ");
            }



            // <------------------------------------------------------------  2. finding lemma definition

            output.lemmaForm = lemma;
            output.lemma     = new termExploreItem(lemma);

            if (response != null)
            {
                response.AppendLine("#2 Finding Lemma definition [" + lemma + "] ");
            }

            string lemmaQuery                = string.Format(posConverter.REGEX_UNITEX_DeclarationForLemma, lemma);
            Regex  lemmaQueryRegex           = new Regex(lemmaQuery);
            fileTextSearchResult lemmaResult = languageManagerUnitex.manager.operatorDelaf.Search(cache, lemmaQuery, true, 5, RegexOptions.IgnoreCase);

            if (response != null)
            {
                lemmaResult.ToString(response, true);
            }

            if (lemmaResult.Count() == 0)
            {
                if (response != null)
                {
                    response.consoleAltColorToggle();
                    response.AppendLine("Failed to find lemma definition for [" + word + "]. Aborting exploration.");
                    response.consoleAltColorToggle();
                }
                output.wasExploreFailed = true;
                return(output);
            }

            foreach (var lr_lnp in lemmaResult)
            {
                Match lmch = lemmaQueryRegex.Match(lr_lnp.Value);
                if (lmch.Success)
                {
                    output.lemma.gramSet.Add(lmch.Groups[1].Value);
                }
            }
            if (response != null)
            {
                output.lemma.ToString(response);
            }



            // <------------------------------------------------------------  3. getting all instances for the lemma
            if (response != null)
            {
                response.AppendLine("#3 Extracting all instances for the Lemma [" + lemma + "] ");
            }

            string instanceQuery                = string.Format(posConverter.REGEX_UNITEX_LemmaToInstanceFormat, lemma);
            string instanceUnitexQuery          = "," + lemma + ".";
            Regex  instanceQueryRegex           = new Regex(instanceQuery);
            fileTextSearchResult instanceResult = languageManagerUnitex.manager.operatorDelaf.Search(cache, instanceUnitexQuery, false, 100, RegexOptions.IgnoreCase);

            if (response != null)
            {
                instanceResult.ToString(response, true);
            }

            foreach (var lr_lnp in instanceResult)
            {
                Match lmch = instanceQueryRegex.Match(lr_lnp.Value);
                output.instances.Add(lmch.Groups[1].Value, lmch.Groups[2].Value);
            }

            // <------------------------------------------------------------  4. Resulting term model
            if (response != null)
            {
                response.AppendLine("#4 Resulting term model [" + lemma + "] ");
                output.ToString(response);
            }


            return(output);
        }
Esempio n. 13
0
        /// <summary>
        /// Method: word -- translation --- synset ---- other synsets --- collecting all words --- translation --- word
        /// </summary>
        /// <param name="model">The model.</param>
        /// <param name="response">The response.</param>
        /// <param name="disableCodePrefixFilter">if set to <c>true</c> [disable code prefix filter].</param>
        /// <param name="disableCodeBranchFilter">if set to <c>true</c> [disable code branch filter].</param>
        /// <returns></returns>
        public static termExploreModel getSynonymsWithWordnetViaApertium(termExploreModel model, ILogBuilder response, bool disableCodePrefixFilter = false, bool disableCodeBranchFilter = false)
        {
            tokenGraph result = model.graph;


            result = languageManagerApertium.manager.queryForGraph(model.lemma.inputForm, apertiumDictQueryScope.exact);
            model.translations.AddRange(result.getAllLeafs().getNames());


            if (response != null)
            {
                response.consoleAltColorToggle();
                string rst = result.ToStringTreeview();

                response.Append(rst);
                response.consoleAltColorToggle();
            }

            languageManagerWordnet.manager.queryWithGraph(result, response, WordnetSource.english, WordnetQueryType.getSymsetCodesByWord);


            if (response != null)
            {
                response.consoleAltColorToggle();
                string st = result.ToStringTreeview();

                response.Append(st);
                response.consoleAltColorToggle();
            }

            model.wordnetSecondarySymsets.AddRange(result.getAllLeafs().getDeepest().getNames());

            if (!disableCodePrefixFilter)
            {
                string codeStart    = model.lemma.gramSet.getPosType().GetWordNetCodeStart().ToString();
                Regex  codeCriteria = new Regex("^" + codeStart + "");

                var badCodes = result.getAllLeafs(codeCriteria, true);

                if (response != null)
                {
                    response.AppendHorizontalLine();
                    response.AppendLine("Reducing to proper codes [" + codeStart + "]->filtered-out[" + badCodes.Count() + "]");
                }

                badCodes.removeFromParent();

                if (response != null)
                {
                    response.consoleAltColorToggle();
                    string rst = result.ToStringTreeview();

                    response.Append(rst);
                    response.consoleAltColorToggle();
                }
            }


            model.wordnetPrimarySymsets.AddRange(result.getAllLeafs().getDeepest().getNames());

            languageManagerWordnet.manager.queryWithGraph(result, response, WordnetSource.english, WordnetQueryType.getWordsBySymsetCode);

            model.translationRelated.AddRange(result.getAllLeafs().getDeepest().getNames());


            if (response != null)
            {
                response.AppendHorizontalLine();
                response.AppendLine("Getting English words by symsetcodes via WordNet");

                response.consoleAltColorToggle();
                string rst = result.ToStringTreeview();

                response.Append(rst);
                response.consoleAltColorToggle();
            }

            languageManagerApertium.manager.queryByGraphNode(result, apertiumDictQueryScope.exact, apertiumDictNeedleSide.translated);
            model.wordnetSynonyms.AddRange(result.getAllLeafs().getDeepest().getNames());

            if (response != null)
            {
                response.AppendHorizontalLine();
                response.AppendLine("Translating back to Serbian via Apertium");

                response.consoleAltColorToggle();
                string rst = result.ToStringTreeview();

                response.Append(rst);
                response.consoleAltColorToggle();
            }

            if (!disableCodeBranchFilter) // <------ removes the symset nodes that contain none of first-level translation words
            {
                var codeLevel = result.getAllChildren().getOnLevel(3);
                List <IObjectWithPathAndChildren> toTakeOut = new List <IObjectWithPathAndChildren>();

                foreach (var clb in codeLevel)
                {
                    foreach (var clb_c in clb)
                    {
                        bool takeOut = true;
                        foreach (var clb_cc in clb_c)
                        {
                            if (clb_cc.name == model.lemma.inputForm)
                            {
                                takeOut = false;
                                break;
                            }
                        }
                        if (takeOut)
                        {
                            if (response != null)
                            {
                                response.AppendLine("-- take out: " + clb.path);
                            }
                            toTakeOut.Add(clb);
                            break;
                        }
                    }
                }

                toTakeOut.removeFromParent();

                int wps = Enumerable.Count(model.wordnetSecondarySymsets);
                int tr  = Enumerable.Count(model.translationRelated);
                int ws  = Enumerable.Count(model.wordnetSynonyms);

                if (response != null)
                {
                    response.AppendLine("----- Branch-node filter ----");

                    response.AppendLine("Symsets: " + wps);
                    response.AppendLine("Translations: " + tr);
                    response.AppendLine("Terms: " + ws);



                    response.consoleAltColorToggle();
                    string rst = result.ToStringTreeview();

                    response.Append(rst);
                    response.consoleAltColorToggle();
                }



                model.wordnetPrimarySymsets = result.getAllChildren().getOnLevel(3).getNames(true);
                model.translations          = result.getAllChildren().getOnLevel(4).getNames(true);
                model.synonyms = result.getAllChildren().getOnLevel(5).getNames(true);

                wps = wps - Enumerable.Count(model.wordnetPrimarySymsets);
                tr  = tr - Enumerable.Count(model.translations);
                ws  = ws - Enumerable.Count(model.synonyms);

                if (response != null)
                {
                    //response.AppendLine("----- Branch-node filter ----");

                    response.AppendLine("Reduction of Symsets: " + wps);
                    response.AppendLine("Reduction of Translations: " + tr);
                    response.AppendLine("Reduction of Terms: " + ws);



                    response.consoleAltColorToggle();
                    string rst = result.ToStringTreeview();

                    response.Append(rst);
                    response.consoleAltColorToggle();
                }
            }

            /*
             * String rgex_pat = "^([\\w]*\\\\[\\w]*\\\\[\\w]*\\\\[\\w]*\\\\{0}$)";
             *
             * Regex rgex = new Regex(String.Format(rgex_pat, model.lemma.inputForm));
             *
             * var onlyWithLemma = result.getAllLeafs().getFilterOut(rgex);
             */
            //languageManagerApertium.manager.queryByGraphNode(result, apertiumDictQueryScope.exact, apertiumDictNeedleSide.english);


            model.graph = result;
            return(model);
        }
Esempio n. 14
0
        public static termExploreModel explore(string word, ILogBuilder response, termExploreMode mode, bool verbose = false)
        {
            termExploreModel    model  = new termExploreModel(word);
            termExploreModelSet outset = semanticLexiconManager.manager.constructor.loadTermModels(word, true);

            if (response != null)
            {
                response.consoleAltColorToggle();
                response.AppendHorizontalLine();
                response.AppendLine("Exploring term[" + model.inputForm + "] with [" + mode.ToString() + "]");
                response.consoleAltColorToggle();
            }

            if (Enumerable.Any(outset))
            {
                model = Enumerable.First(outset);

                if (response != null)
                {
                    response.AppendLine("term[" + model.inputForm + "]->lemma[" + model.lemma.inputForm + "]");
                }
            }
            else
            {
                model.lemmaForm = "";
                if (response != null)
                {
                    response.AppendLine("term[" + word + "]->missingLemma[]");
                }
            }


            var output = response;

            if (!verbose)
            {
                response = null;
            }

            switch (mode)
            {
            case termExploreMode.apertium_direct:
                model = getSynonymsWithApertium(model, response);
                break;

            case termExploreMode.apertium_wordnet_eng:
                model = getSynonymsWithWordnetViaApertium(model, response);
                break;

            case termExploreMode.apertium_wordnet_srb:
                model = getSynonymsWithSerbianWordNetAndApertium(model, response);
                break;

            case termExploreMode.corpus:
                model = getSynonymsByCorpus(model, response);
                break;

            case termExploreMode.hunspell_srb:
                model = getSynonymsWithHunspell(model, response);
                break;

            case termExploreMode.none:
                break;

            case termExploreMode.wordnet_srb:
                model = getSynonymsWithSerbianWordNet(model, response);
                break;

            case termExploreMode.unitex:
                model = exploreWithUnitex(word, response);
                break;
            }

            model.PostProcess();
            if (output != null)
            {
                model.ToString(output, verbose, false);
            }

            return(model);
        }