コード例 #1
0
        /// <summary>
        /// Gets multi-lemma in type query graph
        /// </summary>
        /// <param name="word">The word to look for</param>
        /// <param name="limit">The limit - limit on number of entries to take</param>
        /// <param name="logger">The logger - to log out on error</param>
        /// <returns></returns>
        public lexicQuery GetLexicQuery(string word, int limit = -1, ILogBuilder logger = null)
        {
            if (limit == -1)
            {
                limit = GENERAL_LEXIC_QUERY_SEARCH_LIMIT;
            }

            lexicQuery output = new lexicQuery(word);

            String reg = GetSearchRegex(word, "", "");


            var primResult = resourceFileOperater.Search(reg, true, limit, RegexOptions.IgnoreCase);

            foreach (string line in primResult.getLineContentList())
            {
                string inflectForm = "";
                string lemma       = "";
                string gramTag     = "";

                SelectFromLine(line, out inflectForm, out lemma, out gramTag);

                var      gramTagColl = grammTagConverter.ConvertFromString(gramTag);
                pos_type posType     = gramTagColl.Get <pos_type>(pos_type.none);

                lexicLemmaInTypeNode lemmaInType = output.AddLemmaInType(lemma, posType);
                lexicInflection      inflection  = lemmaInType.AddInflection(inflectForm);
                inflection.AddGrammarCase(gramTagColl);
            }


            return(output);
        }
コード例 #2
0
 public void AddGraph(lexicInflection _graph)
 {
     graph = _graph;
     foreach (lexicGrammarCase gcase in graph)
     {
         flagBag.AddRange(gcase.tags.GetTags(), true);
     }
 }
コード例 #3
0
        /// <summary>
        /// Returns single lexicInflection graph, with different grammTags
        /// </summary>
        /// <param name="word">The word.</param>
        /// <param name="limit">The limit - limit on number of entries to take</param>
        /// <param name="logger">The logger.</param>
        /// <returns></returns>
        public lexicInflection GetInflectionGraph(String word, int limit = -1, ILogBuilder logger = null)
        {
            if (limit == -1)
            {
                limit = GENERAL_INFLECTION_SEARCH_LIMIT;
            }

            lexicInflection output = new lexicInflection();

            output.name = word;

            String reg = GetSearchRegex(word, "", "");

            Boolean firstSetup = true;

            var primResult = resourceFileOperater.Search(reg, true, limit, RegexOptions.IgnoreCase);

            foreach (string line in primResult.getLineContentList())
            {
                string inflectForm = "";
                string lemma       = "";
                string gramTag     = "";

                SelectFromLine(line, out inflectForm, out lemma, out gramTag);

                if (gramTag.isNullOrEmpty())
                {
                    // Something is wrong with gramTag selection
                    String msg = "GramTag failed to be found in [" + line + "] for [" + word + "]";
                    msg += Environment.NewLine + "inflectedForm [" + inflectForm.toStringSafe("not found") + "]";
                    msg += Environment.NewLine + "lemma         [" + lemma.toStringSafe("not found") + "]";
                    msg += Environment.NewLine + "gramTag       [" + gramTag.toStringSafe("not found") + "]";

                    if (logger == null)
                    {
                        logger.log(msg);
                    }
                }

                var gramTagColl = grammTagConverter.ConvertFromString(gramTag);

                if (firstSetup)
                {
                    output.lemmaForm = lemma;
                    firstSetup       = false;
                }



                //pos_type posType = gramTagColl.Get<pos_type>();


                output.AddGrammarCase(gramTagColl);
            }


            return(output);
        }
コード例 #4
0
        /// <summary>
        /// Quering the lexic inflections in parallel mode.
        /// </summary>
        /// <param name="words">The words to query inflection graphs for. It may contain duplicates, as it will preprocess list to the unique tokens only.</param>
        /// <param name="limitPerWord">The limit in results per word.</param>
        /// <param name="logger">The logger.</param>
        /// <param name="regOpt">The reg opt.</param>
        /// <returns></returns>
        public lexicGraphSet <lexicInflection> GetLexicInflection(IEnumerable <String> words, int limitPerWord = -1, ILogBuilder logger = null, RegexOptions regOpt = RegexOptions.IgnoreCase)
        {
            lexicGraphSet <lexicInflection> output = new lexicGraphSet <lexicInflection>();

            if (limitPerWord == -1)
            {
                limitPerWord = GENERAL_INFLECTION_SEARCH_LIMIT;
            }

            List <String> uniqueWords = new List <string>();

            foreach (String word in words)
            {
                uniqueWords.AddUnique(word);
            }


            var needles = new Dictionary <String, String>();

            foreach (String word in uniqueWords)
            {
                needles.Add(GetSearchRegex(word, "", ""), word);
            }
            // IEnumerable<String> __needles, Boolean useRegex = false, StringComparison comparison=StringComparison.CurrentCultureIgnoreCase, RegexOptions regexOptions = RegexOptions.None, Int32 limitResult = -1
            var primResult = resourceFileOperater.Search(needles.Keys, true, regOpt, limitPerWord);

            Parallel.ForEach(primResult, line =>
            {
                String word = needles[line.needle];

                var inflect = new lexicInflection(word);
                inflect     = output.GetOrAdd(word, inflect);

                foreach (string content in line.getLineContentList())
                {
                    string inflectForm = "";
                    string lemma       = "";
                    string gramTag     = "";

                    SelectFromLine(content, out inflectForm, out lemma, out gramTag);



                    var gramTagColl = grammTagConverter.ConvertFromString(gramTag);


                    inflect.AddGrammarCase(gramTagColl);

                    if (inflect.lemmaForm.isNullOrEmpty())
                    {
                        inflect.lemmaForm = lemma;
                    }
                }
            });

            return(output);
        }