/// <summary> /// Gets multi-lemma in type query graph /// </summary> /// <param name="word">The word to look for</param> /// <param name="limit">The limit - limit on number of entries to take</param> /// <param name="logger">The logger - to log out on error</param> /// <returns></returns> public lexicQuery GetLexicQuery(string word, int limit = -1, ILogBuilder logger = null) { if (limit == -1) { limit = GENERAL_LEXIC_QUERY_SEARCH_LIMIT; } lexicQuery output = new lexicQuery(word); String reg = GetSearchRegex(word, "", ""); var primResult = resourceFileOperater.Search(reg, true, limit, RegexOptions.IgnoreCase); foreach (string line in primResult.getLineContentList()) { string inflectForm = ""; string lemma = ""; string gramTag = ""; SelectFromLine(line, out inflectForm, out lemma, out gramTag); var gramTagColl = grammTagConverter.ConvertFromString(gramTag); pos_type posType = gramTagColl.Get <pos_type>(pos_type.none); lexicLemmaInTypeNode lemmaInType = output.AddLemmaInType(lemma, posType); lexicInflection inflection = lemmaInType.AddInflection(inflectForm); inflection.AddGrammarCase(gramTagColl); } return(output); }
/// <summary> /// Returns single lexicInflection graph, with different grammTags /// </summary> /// <param name="word">The word.</param> /// <param name="limit">The limit - limit on number of entries to take</param> /// <param name="logger">The logger.</param> /// <returns></returns> public lexicInflection GetInflectionGraph(String word, int limit = -1, ILogBuilder logger = null) { if (limit == -1) { limit = GENERAL_INFLECTION_SEARCH_LIMIT; } lexicInflection output = new lexicInflection(); output.name = word; String reg = GetSearchRegex(word, "", ""); Boolean firstSetup = true; var primResult = resourceFileOperater.Search(reg, true, limit, RegexOptions.IgnoreCase); foreach (string line in primResult.getLineContentList()) { string inflectForm = ""; string lemma = ""; string gramTag = ""; SelectFromLine(line, out inflectForm, out lemma, out gramTag); if (gramTag.isNullOrEmpty()) { // Something is wrong with gramTag selection String msg = "GramTag failed to be found in [" + line + "] for [" + word + "]"; msg += Environment.NewLine + "inflectedForm [" + inflectForm.toStringSafe("not found") + "]"; msg += Environment.NewLine + "lemma [" + lemma.toStringSafe("not found") + "]"; msg += Environment.NewLine + "gramTag [" + gramTag.toStringSafe("not found") + "]"; if (logger == null) { logger.log(msg); } } var gramTagColl = grammTagConverter.ConvertFromString(gramTag); if (firstSetup) { output.lemmaForm = lemma; firstSetup = false; } //pos_type posType = gramTagColl.Get<pos_type>(); output.AddGrammarCase(gramTagColl); } return(output); }
/// <summary> /// Quering the lexic inflections in parallel mode. /// </summary> /// <param name="words">The words to query inflection graphs for. It may contain duplicates, as it will preprocess list to the unique tokens only.</param> /// <param name="limitPerWord">The limit in results per word.</param> /// <param name="logger">The logger.</param> /// <param name="regOpt">The reg opt.</param> /// <returns></returns> public lexicGraphSet <lexicInflection> GetLexicInflection(IEnumerable <String> words, int limitPerWord = -1, ILogBuilder logger = null, RegexOptions regOpt = RegexOptions.IgnoreCase) { lexicGraphSet <lexicInflection> output = new lexicGraphSet <lexicInflection>(); if (limitPerWord == -1) { limitPerWord = GENERAL_INFLECTION_SEARCH_LIMIT; } List <String> uniqueWords = new List <string>(); foreach (String word in words) { uniqueWords.AddUnique(word); } var needles = new Dictionary <String, String>(); foreach (String word in uniqueWords) { needles.Add(GetSearchRegex(word, "", ""), word); } // IEnumerable<String> __needles, Boolean useRegex = false, StringComparison comparison=StringComparison.CurrentCultureIgnoreCase, RegexOptions regexOptions = RegexOptions.None, Int32 limitResult = -1 var primResult = resourceFileOperater.Search(needles.Keys, true, regOpt, limitPerWord); Parallel.ForEach(primResult, line => { String word = needles[line.needle]; var inflect = new lexicInflection(word); inflect = output.GetOrAdd(word, inflect); foreach (string content in line.getLineContentList()) { string inflectForm = ""; string lemma = ""; string gramTag = ""; SelectFromLine(content, out inflectForm, out lemma, out gramTag); var gramTagColl = grammTagConverter.ConvertFromString(gramTag); inflect.AddGrammarCase(gramTagColl); if (inflect.lemmaForm.isNullOrEmpty()) { inflect.lemmaForm = lemma; } } }); return(output); }