/// <summary> /// Преобразовать наречие в прилагательное (это пока только для русского языка) /// </summary> /// <param name="adverb">наречие</param> /// <param name="bi">род число падеж</param> /// <return>прилагательное</return> public static string ConvertAdverbToAdjective(string adverb, MorphBaseInfo bi) { if (adverb == null || (adverb.Length < 4)) { return(null); } char last = adverb[adverb.Length - 1]; if (last != 'О' && last != 'Е') { return(adverb); } string var1 = adverb.Substring(0, adverb.Length - 1) + "ИЙ"; string var2 = adverb.Substring(0, adverb.Length - 1) + "ЫЙ"; MorphWordForm bi1 = GetWordBaseInfo(var1, null, false, false); MorphWordForm bi2 = GetWordBaseInfo(var2, null, false, false); string var = var1; if (!bi1.Class.IsAdjective && bi2.Class.IsAdjective) { var = var2; } if (bi == null) { return(var); } return(m_Morph.GetWordform(var, MorphClass.Adjective, bi.Gender, bi.Case, bi.Number, MorphLang.Unknown, null) ?? var); }
/// <summary> /// Получить вариант написания словоформы /// </summary> /// <param name="word">слово</param> /// <param name="morphInfo">морфологическая информация</param> /// <return>вариант написания</return> public static string GetWordform(string word, MorphBaseInfo morphInfo) { if (morphInfo == null || string.IsNullOrEmpty(word)) { return(word); } MorphClass cla = morphInfo.Class; if (cla.IsUndefined) { MorphWordForm mi0 = GetWordBaseInfo(word, null, false, false); if (mi0 != null) { cla = mi0.Class; } } string word1 = word; foreach (char ch in word) { if (char.IsLower(ch)) { word1 = word.ToUpper(); break; } } MorphWordForm wf = morphInfo as MorphWordForm; string res = m_Morph.GetWordform(word1, cla, morphInfo.Gender, morphInfo.Case, morphInfo.Number, morphInfo.Language, wf); if (string.IsNullOrEmpty(res)) { return(word); } return(res); }
public void CopyFromWordForm(MorphWordForm src) { base.CopyFrom(src); UndefCoef = src.UndefCoef; NormalCase = src.NormalCase; NormalFull = src.NormalFull; Misc = src.Misc; }
/// <summary> /// Получить для словоформы род\число\падеж /// </summary> /// <param name="word">словоформа</param> /// <param name="lang">возможный язык</param> /// <param name="isCaseNominative">исходное слово в именительном падеже (иначе считается падеж любым)</param> /// <param name="inDictOnly">при true не строить гипотезы для несловарных слов</param> /// <return>базовая морфологическая информация</return> public static MorphWordForm GetWordBaseInfo(string word, MorphLang lang = null, bool isCaseNominative = false, bool inDictOnly = false) { List <MorphToken> mt = m_Morph.Run(word, false, lang, false, null); MorphWordForm bi = new MorphWordForm(); MorphClass cla = new MorphClass(); if (mt != null && mt.Count > 0) { for (int k = 0; k < 2; k++) { bool ok = false; foreach (MorphWordForm wf in mt[0].WordForms) { if (k == 0) { if (!wf.IsInDictionary) { continue; } } else if (wf.IsInDictionary) { continue; } if (isCaseNominative) { if (!wf.Case.IsNominative && !wf.Case.IsUndefined) { continue; } } cla.Value |= wf.Class.Value; bi.Gender |= wf.Gender; bi.Case |= wf.Case; bi.Number |= wf.Number; if (wf.Misc != null && bi.Misc == null) { bi.Misc = wf.Misc; } ok = true; } if (ok || inDictOnly) { break; } } } bi.Class = cla; return(bi); }
/// <summary> /// Лемма (вариант морфологической нормализации) /// </summary> public string GetLemma() { if (m_Lemma != null) { return(m_Lemma); } string res = null; if (WordForms != null && WordForms.Count > 0) { if (WordForms.Count == 1) { res = WordForms[0].NormalFull ?? WordForms[0].NormalCase; } if (res == null && !CharInfo.IsAllLower) { foreach (MorphWordForm m in WordForms) { if (m.Class.IsProperSurname) { string s = m.NormalFull ?? m.NormalCase ?? ""; if (LanguageHelper.EndsWithEx(s, "ОВ", "ЕВ", null, null)) { res = s; break; } } else if (m.Class.IsProperName && m.IsInDictionary) { return(m.NormalCase); } } } if (res == null) { MorphWordForm best = null; foreach (MorphWordForm m in WordForms) { if (best == null) { best = m; } else if (this.CompareForms(best, m) > 0) { best = m; } } res = best.NormalFull ?? best.NormalCase; } } if (res != null) { if (LanguageHelper.EndsWithEx(res, "АНЫЙ", "ЕНЫЙ", null, null)) { res = res.Substring(0, res.Length - 3) + "ННЫЙ"; } else if (LanguageHelper.EndsWith(res, "ЙСЯ")) { res = res.Substring(0, res.Length - 2); } else if (LanguageHelper.EndsWith(res, "АНИЙ") && res == Term) { foreach (MorphWordForm wf in WordForms) { if (wf.IsInDictionary) { return(res); } } return(res.Substring(0, res.Length - 1) + "Е"); } return(res); } return(Term ?? "?"); }
int CompareForms(MorphWordForm x, MorphWordForm y) { string vx = x.NormalFull ?? x.NormalCase; string vy = y.NormalFull ?? y.NormalCase; if (vx == vy) { return(0); } if (string.IsNullOrEmpty(vx)) { return(1); } if (string.IsNullOrEmpty(vy)) { return(-1); } char lastx = vx[vx.Length - 1]; char lasty = vy[vy.Length - 1]; if (x.Class.IsProperSurname && !CharInfo.IsAllLower) { if (LanguageHelper.EndsWithEx(vx, "ОВ", "ЕВ", "ИН", null)) { if (!y.Class.IsProperSurname) { return(-1); } } } if (y.Class.IsProperSurname && !CharInfo.IsAllLower) { if (LanguageHelper.EndsWithEx(vy, "ОВ", "ЕВ", "ИН", null)) { if (!x.Class.IsProperSurname) { return(1); } if (vx.Length > vy.Length) { return(-1); } if (vx.Length < vy.Length) { return(1); } return(0); } } if (x.Class == y.Class) { if (x.Class.IsAdjective) { if (lastx == 'Й' && lasty != 'Й') { return(-1); } if (lastx != 'Й' && lasty == 'Й') { return(1); } if (!LanguageHelper.EndsWith(vx, "ОЙ") && LanguageHelper.EndsWith(vy, "ОЙ")) { return(-1); } if (LanguageHelper.EndsWith(vx, "ОЙ") && !LanguageHelper.EndsWith(vy, "ОЙ")) { return(1); } } if (x.Class.IsNoun) { if (x.Number == MorphNumber.Singular && y.Number == MorphNumber.Plural && vx.Length <= (vy.Length + 1)) { return(-1); } if (x.Number == MorphNumber.Plural && y.Number == MorphNumber.Singular && vx.Length >= (vy.Length - 1)) { return(1); } } if (vx.Length < vy.Length) { return(-1); } if (vx.Length > vy.Length) { return(1); } return(0); } if (x.Class.IsAdverb) { return(1); } if (x.Class.IsNoun && x.IsInDictionary) { if (y.Class.IsAdjective && y.IsInDictionary) { if (!y.Misc.Attrs.Contains("к.ф.")) { return(1); } } return(-1); } if (x.Class.IsAdjective) { if (!x.IsInDictionary && y.Class.IsNoun && y.IsInDictionary) { return(1); } return(-1); } if (x.Class.IsVerb) { if (y.Class.IsNoun || y.Class.IsAdjective || y.Class.IsPreposition) { return(1); } return(-1); } if (y.Class.IsAdverb) { return(-1); } if (y.Class.IsNoun && y.IsInDictionary) { return(1); } if (y.Class.IsAdjective) { if (((x.Class.IsNoun || x.Class.IsProperSecname)) && x.IsInDictionary) { return(-1); } if (x.Class.IsNoun && !y.IsInDictionary) { if (vx.Length < vy.Length) { return(-1); } } return(1); } if (y.Class.IsVerb) { if (x.Class.IsNoun || x.Class.IsAdjective || x.Class.IsPreposition) { return(-1); } if (x.Class.IsProper) { return(-1); } return(1); } if (vx.Length < vy.Length) { return(-1); } if (vx.Length > vy.Length) { return(1); } return(0); }