Exemple #1
0
        static string GetNameWithoutBrackets(Pullenti.Ner.Token begin, Pullenti.Ner.Token end, bool normalizeFirstNounGroup = false, bool normalFirstGroupSingle = false, bool ignoreGeoReferent = false)
        {
            string res = null;

            if (BracketHelper.CanBeStartOfSequence(begin, false, false) && BracketHelper.CanBeEndOfSequence(end, false, begin, false))
            {
                begin = begin.Next;
                end   = end.Previous;
            }
            if (normalizeFirstNounGroup && !begin.Morph.Class.IsPreposition)
            {
                NounPhraseToken npt = NounPhraseHelper.TryParse(begin, NounPhraseParseAttr.ReferentCanBeNoun, 0, null);
                if (npt != null)
                {
                    if (npt.Noun.GetMorphClassInDictionary().IsUndefined&& npt.Adjectives.Count == 0)
                    {
                        npt = null;
                    }
                }
                if (npt != null && npt.EndToken.EndChar > end.EndChar)
                {
                    npt = null;
                }
                if (npt != null)
                {
                    res = npt.GetNormalCaseText(null, (normalFirstGroupSingle ? Pullenti.Morph.MorphNumber.Singular : Pullenti.Morph.MorphNumber.Undefined), Pullenti.Morph.MorphGender.Undefined, false);
                    Pullenti.Ner.Token te = npt.EndToken.Next;
                    if (((te != null && te.Next != null && te.IsComma) && (te.Next is Pullenti.Ner.TextToken) && te.Next.EndChar <= end.EndChar) && te.Next.Morph.Class.IsVerb && te.Next.Morph.Class.IsAdjective)
                    {
                        foreach (Pullenti.Morph.MorphBaseInfo it in te.Next.Morph.Items)
                        {
                            if (it.Gender == npt.Morph.Gender || ((it.Gender & npt.Morph.Gender)) != Pullenti.Morph.MorphGender.Undefined)
                            {
                                if (!((it.Case & npt.Morph.Case)).IsUndefined)
                                {
                                    if (it.Number == npt.Morph.Number || ((it.Number & npt.Morph.Number)) != Pullenti.Morph.MorphNumber.Undefined)
                                    {
                                        string var = (te.Next as Pullenti.Ner.TextToken).Term;
                                        if (it is Pullenti.Morph.MorphWordForm)
                                        {
                                            var = (it as Pullenti.Morph.MorphWordForm).NormalCase;
                                        }
                                        Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo()
                                        {
                                            Class = Pullenti.Morph.MorphClass.Adjective, Gender = npt.Morph.Gender, Number = npt.Morph.Number, Language = npt.Morph.Language
                                        };
                                        var = Pullenti.Morph.MorphologyService.GetWordform(var, bi);
                                        if (var != null)
                                        {
                                            res = string.Format("{0}, {1}", res, var);
                                            te  = te.Next.Next;
                                        }
                                        break;
                                    }
                                }
                            }
                        }
                    }
                    if (te != null && te.EndChar <= end.EndChar)
                    {
                        string s = GetNameEx(te, end, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, true, ignoreGeoReferent);
                        if (!string.IsNullOrEmpty(s))
                        {
                            if (!char.IsLetterOrDigit(s[0]))
                            {
                                res = string.Format("{0}{1}", res, s);
                            }
                            else
                            {
                                res = string.Format("{0} {1}", res, s);
                            }
                        }
                    }
                }
                else if ((begin is Pullenti.Ner.TextToken) && begin.Chars.IsCyrillicLetter)
                {
                    Pullenti.Morph.MorphClass mm = begin.GetMorphClassInDictionary();
                    if (!mm.IsUndefined)
                    {
                        res = begin.GetNormalCaseText(mm, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
                        if (begin.EndChar < end.EndChar)
                        {
                            res = string.Format("{0} {1}", res, GetNameEx(begin.Next, end, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, true, false));
                        }
                    }
                }
            }
            if (res == null)
            {
                res = GetNameEx(begin, end, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, true, ignoreGeoReferent);
            }
            if (!string.IsNullOrEmpty(res))
            {
                int k = 0;
                for (int i = res.Length - 1; i >= 0; i--, k++)
                {
                    if (res[i] == '*' || char.IsWhiteSpace(res[i]))
                    {
                    }
                    else
                    {
                        break;
                    }
                }
                if (k > 0)
                {
                    if (k == res.Length)
                    {
                        return(null);
                    }
                    res = res.Substring(0, res.Length - k);
                }
            }
            return(res);
        }
Exemple #2
0
        public static string GetNameEx(Pullenti.Ner.Token begin, Pullenti.Ner.Token end, Pullenti.Morph.MorphClass cla, Pullenti.Morph.MorphCase mc, Pullenti.Morph.MorphGender gender = Pullenti.Morph.MorphGender.Undefined, bool ignoreBracketsAndHiphens = false, bool ignoreGeoReferent = false)
        {
            if (end == null || begin == null)
            {
                return(null);
            }
            if (begin.EndChar > end.BeginChar && begin != end)
            {
                return(null);
            }
            StringBuilder res    = new StringBuilder();
            string        prefix = null;

            for (Pullenti.Ner.Token t = begin; t != null && t.EndChar <= end.EndChar; t = t.Next)
            {
                if (res.Length > 1000)
                {
                    break;
                }
                if (t.IsTableControlChar)
                {
                    continue;
                }
                if (ignoreBracketsAndHiphens)
                {
                    if (BracketHelper.IsBracket(t, false))
                    {
                        if (t == end)
                        {
                            break;
                        }
                        if (t.IsCharOf("(<["))
                        {
                            BracketSequenceToken br = BracketHelper.TryParse(t, BracketParseAttr.No, 100);
                            if (br != null && br.EndChar <= end.EndChar)
                            {
                                string tmp = GetNameEx(br.BeginToken.Next, br.EndToken.Previous, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, ignoreBracketsAndHiphens, false);
                                if (tmp != null)
                                {
                                    if ((br.EndChar == end.EndChar && br.BeginToken.Next == br.EndToken.Previous && !br.BeginToken.Next.Chars.IsLetter) && !(br.BeginToken.Next is Pullenti.Ner.ReferentToken))
                                    {
                                    }
                                    else
                                    {
                                        res.AppendFormat(" {0}{1}{2}", t.GetSourceText(), tmp, br.EndToken.GetSourceText());
                                    }
                                }
                                t = br.EndToken;
                            }
                        }
                        continue;
                    }
                    if (t.IsHiphen)
                    {
                        if (t == end)
                        {
                            break;
                        }
                        else if (t.IsWhitespaceBefore || t.IsWhitespaceAfter)
                        {
                            continue;
                        }
                    }
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt != null)
                {
                    if (!ignoreBracketsAndHiphens)
                    {
                        if ((tt.Next != null && tt.Next.IsHiphen && (tt.Next.Next is Pullenti.Ner.TextToken)) && tt != end && tt.Next != end)
                        {
                            if (prefix == null)
                            {
                                prefix = tt.Term;
                            }
                            else
                            {
                                prefix = string.Format("{0}-{1}", prefix, tt.Term);
                            }
                            t = tt.Next;
                            if (t == end)
                            {
                                break;
                            }
                            else
                            {
                                continue;
                            }
                        }
                    }
                    string s = null;
                    if (cla.Value != 0 || !mc.IsUndefined || gender != Pullenti.Morph.MorphGender.Undefined)
                    {
                        foreach (Pullenti.Morph.MorphBaseInfo wff in tt.Morph.Items)
                        {
                            Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm;
                            if (wf == null)
                            {
                                continue;
                            }
                            if (cla.Value != 0)
                            {
                                if (((wf.Class.Value & cla.Value)) == 0)
                                {
                                    continue;
                                }
                            }
                            if (!mc.IsUndefined)
                            {
                                if (((wf.Case & mc)).IsUndefined)
                                {
                                    continue;
                                }
                            }
                            if (gender != Pullenti.Morph.MorphGender.Undefined)
                            {
                                if (((wf.Gender & gender)) == Pullenti.Morph.MorphGender.Undefined)
                                {
                                    continue;
                                }
                            }
                            if (s == null || wf.NormalCase == tt.Term)
                            {
                                s = wf.NormalCase;
                            }
                        }
                        if (s == null && gender != Pullenti.Morph.MorphGender.Undefined)
                        {
                            foreach (Pullenti.Morph.MorphBaseInfo wff in tt.Morph.Items)
                            {
                                Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm;
                                if (wf == null)
                                {
                                    continue;
                                }
                                if (cla.Value != 0)
                                {
                                    if (((wf.Class.Value & cla.Value)) == 0)
                                    {
                                        continue;
                                    }
                                }
                                if (!mc.IsUndefined)
                                {
                                    if (((wf.Case & mc)).IsUndefined)
                                    {
                                        continue;
                                    }
                                }
                                if (s == null || wf.NormalCase == tt.Term)
                                {
                                    s = wf.NormalCase;
                                }
                            }
                        }
                    }
                    if (s == null)
                    {
                        s = tt.Term;
                        if (tt.Chars.IsLastLower && tt.LengthChar > 2)
                        {
                            s = tt.GetSourceText();
                            for (int i = s.Length - 1; i >= 0; i--)
                            {
                                if (char.IsUpper(s[i]))
                                {
                                    s = s.Substring(0, i + 1);
                                    break;
                                }
                            }
                        }
                    }
                    if (prefix != null)
                    {
                        string delim = "-";
                        if (ignoreBracketsAndHiphens)
                        {
                            delim = " ";
                        }
                        s = string.Format("{0}{1}{2}", prefix, delim, s);
                    }
                    prefix = null;
                    if (res.Length > 0 && s.Length > 0)
                    {
                        if (char.IsLetterOrDigit(s[0]))
                        {
                            char ch0 = res[res.Length - 1];
                            if (ch0 == '-')
                            {
                            }
                            else
                            {
                                res.Append(' ');
                            }
                        }
                        else if (!ignoreBracketsAndHiphens && BracketHelper.CanBeStartOfSequence(tt, false, false))
                        {
                            res.Append(' ');
                        }
                    }
                    res.Append(s);
                }
                else if (t is Pullenti.Ner.NumberToken)
                {
                    if (res.Length > 0)
                    {
                        if (!t.IsWhitespaceBefore && res[res.Length - 1] == '-')
                        {
                        }
                        else
                        {
                            res.Append(' ');
                        }
                    }
                    Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
                    if ((t.Morph.Class.IsAdjective && nt.Typ == Pullenti.Ner.NumberSpellingType.Words && nt.BeginToken == nt.EndToken) && (nt.BeginToken is Pullenti.Ner.TextToken))
                    {
                        res.Append((nt.BeginToken as Pullenti.Ner.TextToken).Term);
                    }
                    else
                    {
                        res.Append(nt.Value);
                    }
                }
                else if (t is Pullenti.Ner.MetaToken)
                {
                    if ((ignoreGeoReferent && t != begin && t.GetReferent() != null) && t.GetReferent().TypeName == "GEO")
                    {
                        continue;
                    }
                    string s = GetNameEx((t as Pullenti.Ner.MetaToken).BeginToken, (t as Pullenti.Ner.MetaToken).EndToken, cla, mc, gender, ignoreBracketsAndHiphens, ignoreGeoReferent);
                    if (!string.IsNullOrEmpty(s))
                    {
                        if (res.Length > 0)
                        {
                            if (!t.IsWhitespaceBefore && res[res.Length - 1] == '-')
                            {
                            }
                            else
                            {
                                res.Append(' ');
                            }
                        }
                        res.Append(s);
                    }
                }
                if (t == end)
                {
                    break;
                }
            }
            if (res.Length == 0)
            {
                return(null);
            }
            return(res.ToString());
        }