示例#1
0
        /// <summary>
        /// Adds the specified sufix: $ is last letter of nominative, # is n, t or s
        /// </summary>
        /// <param name="sufix">The sufix.</param>
        /// <param name="gram">The gram.</param>
        /// <returns></returns>
        public morphRule Add(string sufix, gramFlags gram)
        {
            gram.type = type;

            morphRule output = new morphRule(sufix, gram);

            rules.Add(output);

            return(output);
        }
        public wordnetSymsetResults query_srb(List <String> srb_tokens, ILogBuilder response, Boolean buildModel = true)
        {
            wordnetSymsetResults output = new wordnetSymsetResults();

            getReady();

            List <DataRow> matches = new List <DataRow>();

            foreach (String tkn in srb_tokens)
            {
                matches.AddRangeUnique(table.Select(SRB_COLUMN_TOKEN + " LIKE '" + tkn + "'"));
            }

            foreach (DataRow dr in matches)
            {
                String eng  = dr[SRB_COLUMN_TOKEN].toStringSafe();
                String code = dr[SRB_COLUMN_CODE].toStringSafe();
                output.Add(code, eng);
            }

            if (buildModel)
            {
                foreach (var pair in output)
                {
                    termExploreModel md  = null;
                    String           srb = pair.Key;
                    if (!output.models.ContainsKey(srb))
                    {
                        md = new termExploreModel(srb);



                        output.models.Add(srb, md);
                    }
                    else
                    {
                        md = output.models[srb];
                    }

                    String   symc = pair.Value[0].ToString();
                    pos_type pt   = posConverter.wordNetFirstNumToPosType.getValue(symc, pos_type.none);

                    gramFlags gr = new gramFlags();
                    gr.Set(pt);
                    md.gramSet.Add(gr);

                    md.wordnetPrimarySymsets.AddUnique(pair.Value);
                }
            }


            return(output);
        }
示例#3
0
        public bool discoverGram(termExploreItem item, ILogBuilder loger, bool debug = true)
        {
            //List<termExploreItem> inst = new List<termExploreItem>();
            //exploreModel.instances.ForEach(x => inst.Add(x));

            //inst.Add(exploreModel);

            // instanceCountCollection<pos_type> pct = new instanceCountCollection<pos_type>();
            bool failed = false;

            //// <--------------- Trying to resolve alone
            //foreach (termExploreItem item in inst)
            //{


            if (loger != null)
            {
                loger.AppendLine("Item:" + item.inputForm);
            }

            instanceCountCollection <object> res = termDiscoveryResolver.resolveQuery(item.inputForm);

            res.reCalculate();

            if (res.Count > 0)
            {
                List <object> sorted = res.getSorted();

                if (item.gramSet.getPosType() != pos_type.none)
                {
                    sorted.RemoveAll(x => x is pos_type);
                }

                gramFlags gf = new gramFlags();

                if (sorted.Any(x => x is pos_type))
                {
                    gf.Set((pos_type)sorted.First(x => x is pos_type));
                }
                //pct.AddInstance(gf.type, 1);

                var tl = posConverter.posTypeVsPattern[gf.type];
                sorted.RemoveAll(x => !tl.Contains(x.GetType()));

                if (loger != null)
                {
                    loger.AppendLine("Votes:");
                    for (int i = 0; i < Math.Max(sorted.Count(), 20); i++)
                    {
                        loger.Append(sorted[i].ToString() + "; ");
                    }
                }

                if (sorted.Any(x => x is pos_gender))
                {
                    gf.Set((pos_gender)sorted.First(x => x is pos_gender));
                }
                if (sorted.Any(x => x is pos_gramaticalCase))
                {
                    gf.Set((pos_gramaticalCase)sorted.First(x => x is pos_gramaticalCase));
                }
                if (sorted.Any(x => x is pos_verbform))
                {
                    gf.Set((pos_verbform)sorted.First(x => x is pos_verbform));
                }
                if (sorted.Any(x => x is pos_number))
                {
                    gf.Set((pos_number)sorted.First(x => x is pos_number));
                }
                if (sorted.Any(x => x is pos_degree))
                {
                    gf.Set((pos_degree)sorted.First(x => x is pos_degree));
                }
                if (sorted.Any(x => x is pos_person))
                {
                    gf.Set((pos_person)sorted.First(x => x is pos_person));
                }


                if (loger != null)
                {
                    loger.AppendLine("Final gram:" + gf.ToString());
                }
                item.gramSet.Add(gf);
            }
            else
            {
                if (item.inputForm.Length < 4)
                {
                    return(false);
                }
                //item.flags = termExploreItemEnumFlag.none;
                failed = true;
            }

            return(failed);
        }
示例#4
0
        /// <summary>
        /// Explores definition on an unknown term
        /// </summary>
        /// <param name="term">The term.</param>
        /// <param name="loger">The loger.</param>
        /// <param name="shortExplore">if set to <c>true</c> [short explore].</param>
        /// <param name="debug">if set to <c>true</c> [debug].</param>
        /// <returns></returns>
        public List <termExploreModel> explore(string term, ILogBuilder loger, bool shortExplore = true, bool debug = true, termExploreModel exploreModel = null)
        {
            term = term.Trim();
            List <termExploreModel> output = new List <termExploreModel>();


            if (modelRegistry.ContainsKey(term))
            {
                return(modelRegistry[term]);
            }
            if (missing.Contains(term))
            {
                return(GetModels(term));
            }

            if (term.isNumber())
            {
                termExploreModel tmp = makeTempModel(term, pos_type.NUMnumerical);
                tmp.flags = termExploreItemEnumFlag.datapoint;
                if (loger != null)
                {
                    loger.AppendLine("Term [" + term + "] detected to be number.");
                }
                AddModel(tmp);
                return(GetModels(term));
            }

            // <----- drugi test
            exploreModel = termExploreProcedures.exploreWithHunspell(new termExploreItem(term), loger);
            List <string> suggests = new List <string>();

            exploreModel.instances.ForEach(x => suggests.Add(x.inputForm));


            //languageManagerDBNamedEntities.manager.exploreEntities(exploreModel.rootWord, exploreModel);

            suggests.Add(exploreModel.rootWord);

            // s



            apertiumDictionaryResult result = languageManagerApertium.manager.query(suggests, apertiumDictQueryScope.exact, apertiumDictNeedleSide.serbian);

            if (result.Any())
            {
                List <termExploreItem> gramCheck = new List <termExploreItem>();

                gramFlags gr = null;



                if (result.termVsGramFlags.ContainsKey(exploreModel.inputForm))
                {
                    exploreModel.gramSet.Add(new gramFlags(result.termVsGramFlags[exploreModel.inputForm]));

                    if (exploreModel.lemma == null)
                    {
                        exploreModel.lemma = exploreModel.instances[exploreModel.inputForm];
                    }

                    gramCheck.Add(exploreModel);
                    if (debug)
                    {
                        if (loger != null)
                        {
                            loger.AppendLine("Apertium discovered model [" + exploreModel.inputForm + "]");
                        }
                    }
                }
                else
                {
                    //if (loger != null) loger.AppendLine("Apertium failed to discover [" + exploreModel.inputForm + "]");
                }

                foreach (termExploreItem item in exploreModel.instances)
                {
                    if (result.termVsGramFlags.ContainsKey(item.inputForm))
                    {
                        item.gramSet.Add(new gramFlags(result.termVsGramFlags[item.inputForm]));
                        gramCheck.Add(exploreModel);
                        exploreModel.lemmaForm = item.inputForm;
                        if (exploreModel.lemma == null)
                        {
                            exploreModel.lemma = item;
                        }


                        if (debug)
                        {
                            if (loger != null)
                            {
                                loger.AppendLine("Apertium discovered model [" + item.inputForm + "]");
                            }
                        }
                    }
                    else
                    {
                        //if (loger != null) loger.AppendLine("Apertium failed to discover [" + item.inputForm + "]");
                    }
                }

                exploreModel.translations.AddRange(result.GetEnglish());



                gramCheck.RemoveAll(x => posConverter.posTypeVsPattern[x.gramSet.getPosType()].Count() == 0);

                int disc = 0;
                foreach (var gram in gramCheck)
                {
                    if (discoverGram(gram, loger, debug))
                    {
                        disc++;
                    }
                }

                if (loger != null)
                {
                    loger.AppendLine("Gram [" + term + "] autodiscovered for [" + disc + "] / [" + gramCheck.Count() + "]");
                }
                if (debug)
                {
                    if (loger != null)
                    {
                        exploreModel.ToString(loger, true, false);
                        manager.constructor.saveTermModel(exploreModel, "Apertium_");
                    }
                }

                AddModel(exploreModel);
                exploreModel.flags = termExploreItemEnumFlag.aper;


                if (shortExplore)
                {
                    return(GetModels(term));
                }
            }
            else
            {
                if (loger != null)
                {
                    loger.AppendLine("Apertium failed to discover any information on [" + term + "]");
                }
            }


            if (loger != null)
            {
                loger.consoleAltColorToggle();
            }

            // <------------------ APERTIUM ^^

            foreach (string s in suggests)
            {
                languageManagerDBNamedEntities.manager.exploreEntities(s, exploreModel);
            }

            if (exploreModel.flags == termExploreItemEnumFlag.namedEntity)
            {
                AddModel(exploreModel);

                if (debug)
                {
                    if (loger != null)
                    {
                        exploreModel.ToString(loger, true, false);
                        manager.constructor.saveTermModel(exploreModel, "NamedEntity_");
                        loger.AppendLine("Named entities discovered model [" + exploreModel.inputForm + "]:" + exploreModel.gramSet.ToString());
                    }
                }



                if (shortExplore)
                {
                    return(GetModels(term));
                }
            }
            else
            {
                if (loger != null)
                {
                    if (debug)
                    {
                        loger.AppendLine("Named entities found nothing for [" + exploreModel.inputForm + "]:" + exploreModel.gramSet.ToString());
                    }
                }
            }


            if (loger != null)
            {
                loger.consoleAltColorToggle();
            }

            // <------------------ NAMED ENTITY ^^

            // <----------------- Wordnet
            wordnetSymsetResults resSrWordnet = languageManagerWordnet.manager.query_srb(suggests, loger);
            bool found = false;

            if (resSrWordnet.Any())
            {
                foreach (termExploreItem item in exploreModel.instances)
                {
                    if (resSrWordnet.GetByKey(item.inputForm).Any())
                    {
                        exploreModel.lemma     = item;
                        exploreModel.lemmaForm = item.inputForm;
                        exploreModel.translations.AddRange(resSrWordnet.GetValues());
                        exploreModel.synonyms.AddRange(resSrWordnet.GetKeys());
                        exploreModel.flags = termExploreItemEnumFlag.srWNet;
                        found = true;

                        item.gramSet.Add(new gramFlags(new Enum[] { resSrWordnet.models[item.inputForm].gramSet.getPosType() }));
                    }
                }

                foreach (termExploreItem item in exploreModel.instances)
                {
                    discoverGram(item, loger, debug);
                }
            }

            if (found)
            {
                if (loger != null)
                {
                    loger.AppendLine("SerbianWordNet discovered model [" + term + "]:" + exploreModel.gramSet.ToString());
                }
                if (debug)
                {
                    if (loger != null)
                    {
                        exploreModel.ToString(loger, true, false);
                        manager.constructor.saveTermModel(exploreModel, "SrWordNet_");;
                    }
                }

                AddModel(exploreModel);
                exploreModel.flags = termExploreItemEnumFlag.srWNet;

                if (shortExplore)
                {
                    return(GetModels(term));
                }
            }
            else
            {
                if (loger != null)
                {
                    if (debug)
                    {
                        loger.AppendLine("Serbian wordnet found nothing for [" + term + "]");
                    }
                }
            }

            // <------------------ SERBIAN WORD NET ^^

            bool failed = discoverGram(exploreModel, loger, debug);

            exploreModel.instances.ForEach(x => discoverGram(x, loger, debug));

            int d = 0;

            List <termExploreItem> lastCheck = new List <termExploreItem>();

            foreach (var gram in lastCheck)
            {
                if (discoverGram(gram, loger, debug))
                {
                    d++;
                }
            }

            if (debug)
            {
                if (loger != null)
                {
                    loger.AppendLine("The last check [" + term + "] autodiscovered for [" + d + "] / [" + lastCheck.Count() + "]");
                }
            }

            if (d == 0)
            {
                failed = true;
            }

            if (loger != null)
            {
                loger.consoleAltColorToggle();
            }

            // <------------------ LAST CHECK ^^



            if (!failed)
            {
                exploreModel.flags = termExploreItemEnumFlag.termExplorer;
                AddModel(exploreModel);
                return(GetModels(term));
            }
            else
            {
                if (debug)
                {
                    if (loger != null)
                    {
                        loger.AppendLine("Exploration failed for [" + term + "] -- creating temporary term model");
                    }
                }
                output.Add(makeTempModel(term, pos_type.TEMP));
                missing.Add(term);
                return(output);
            }
        }
示例#5
0
 public morphRule(string __sufix, gramFlags __gram)
 {
     sufix     = __sufix;
     gramEntry = __gram;
 }