예제 #1
0
        /// <summary>
        /// Попытаться выделить предлог с указанного токена
        /// </summary>
        /// <param name="t">начальный токен</param>
        /// <return>результат или null</return>
        public static PrepositionToken TryParse(Pullenti.Ner.Token t)
        {
            if (!(t is Pullenti.Ner.TextToken))
            {
                return(null);
            }
            TerminToken tok = m_Ontology.TryParse(t, TerminParseAttr.No);

            if (tok != null)
            {
                return new PrepositionToken(t, tok.EndToken)
                       {
                           Normal = tok.Termin.CanonicText, NextCase = (Pullenti.Morph.MorphCase)tok.Termin.Tag
                       }
            }
            ;
            Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
            if (!mc.IsPreposition)
            {
                return(null);
            }
            PrepositionToken res = new PrepositionToken(t, t);

            res.Normal   = t.GetNormalCaseText(Pullenti.Morph.MorphClass.Preposition, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
            res.NextCase = Pullenti.Morph.LanguageHelper.GetCaseAfterPreposition(res.Normal);
            if ((t.Next != null && t.Next.IsHiphen && !t.IsWhitespaceAfter) && (t.Next.Next is Pullenti.Ner.TextToken) && t.Next.Next.GetMorphClassInDictionary().IsPreposition)
            {
                res.EndToken = t.Next.Next;
            }
            return(res);
        }
예제 #2
0
 public NounPhraseItemTextVar(Pullenti.Morph.MorphBaseInfo src = null, Pullenti.Ner.Token t = null) : base()
 {
     if (src != null)
     {
         this.CopyFrom(src);
     }
     Pullenti.Morph.MorphWordForm wf = src as Pullenti.Morph.MorphWordForm;
     if (wf != null)
     {
         NormalValue = wf.NormalCase;
         if (wf.Number == Pullenti.Morph.MorphNumber.Plural && wf.NormalFull != null)
         {
             SingleNumberValue = wf.NormalFull;
         }
         UndefCoef = wf.UndefCoef;
     }
     else if (t != null)
     {
         NormalValue = t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
     }
     if (Case.IsUndefined && src != null)
     {
         if (src.ContainsAttr("неизм.", null))
         {
             Case = Pullenti.Morph.MorphCase.AllCases;
         }
     }
 }
예제 #3
0
        public override string ToString()
        {
            string res = UnknownName ?? ((ExtOnto == null ? Unit.ToString() : ExtOnto.ToString()));

            if (Pow != 1)
            {
                res = string.Format("{0}<{1}>", res, Pow);
            }
            if (IsDoubt)
            {
                res += "?";
            }
            if (Keyword != null)
            {
                res = string.Format("{0} (<-{1})", res, Keyword.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false));
            }
            return(res);
        }
예제 #4
0
        public static bool CheckKeyword(Pullenti.Ner.Measure.MeasureKind ki, Pullenti.Ner.Token t)
        {
            if (t == null || ki == Pullenti.Ner.Measure.MeasureKind.Undefined)
            {
                return(false);
            }
            if (t is Pullenti.Ner.MetaToken)
            {
                for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= t.EndChar; tt = tt.Next)
                {
                    if (CheckKeyword(ki, tt))
                    {
                        return(true);
                    }
                }
                return(false);
            }
            if (!(t is Pullenti.Ner.TextToken))
            {
                return(false);
            }
            string term = t.GetNormalCaseText(Pullenti.Morph.MorphClass.Noun, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);

            foreach (Unit u in Units)
            {
                if (u.Kind == ki)
                {
                    if (u.Keywords.Contains(term))
                    {
                        return(true);
                    }
                }
            }
            if (m_KindsKeywords.ContainsKey(ki))
            {
                if (m_KindsKeywords[ki].Contains(term))
                {
                    return(true);
                }
            }
            return(false);
        }
예제 #5
0
 public static NamedItemToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.IntOntologyCollection locOnto)
 {
     if (t == null)
     {
         return(null);
     }
     if (t is Pullenti.Ner.ReferentToken)
     {
         Pullenti.Ner.Referent r = t.GetReferent();
         if ((r.TypeName == "PERSON" || r.TypeName == "PERSONPROPERTY" || (r is Pullenti.Ner.Geo.GeoReferent)) || r.TypeName == "ORGANIZATION")
         {
             return new NamedItemToken(t, t)
                    {
                        Ref = r, Morph = t.Morph
                    }
         }
         ;
         return(null);
     }
     Pullenti.Ner.Core.TerminToken typ = m_Types.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
     Pullenti.Ner.Core.TerminToken nam = m_Names.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
     if (typ != null)
     {
         if (!(t is Pullenti.Ner.TextToken))
         {
             return(null);
         }
         NamedItemToken res = new NamedItemToken(typ.BeginToken, typ.EndToken)
         {
             Morph = typ.Morph, Chars = typ.Chars
         };
         res.Kind      = (Pullenti.Ner.Named.NamedEntityKind)typ.Termin.Tag;
         res.TypeValue = typ.Termin.CanonicText;
         if ((nam != null && nam.EndToken == typ.EndToken && !t.Chars.IsAllLower) && ((Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag) == res.Kind)
         {
             res.NameValue   = nam.Termin.CanonicText;
             res.IsWellknown = true;
         }
         return(res);
     }
     if (nam != null)
     {
         if (nam.BeginToken.Chars.IsAllLower)
         {
             return(null);
         }
         NamedItemToken res = new NamedItemToken(nam.BeginToken, nam.EndToken)
         {
             Morph = nam.Morph, Chars = nam.Chars
         };
         res.Kind      = (Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag;
         res.NameValue = nam.Termin.CanonicText;
         bool ok = true;
         if (!t.IsWhitespaceBefore && t.Previous != null)
         {
             ok = false;
         }
         else if (!t.IsWhitespaceAfter && t.Next != null)
         {
             if (t.Next.IsCharOf(",.;!?") && t.Next.IsWhitespaceAfter)
             {
             }
             else
             {
                 ok = false;
             }
         }
         if (ok)
         {
             res.IsWellknown = true;
             res.TypeValue   = nam.Termin.Tag2 as string;
         }
         return(res);
     }
     Pullenti.Ner.MetaToken adj = Pullenti.Ner.Geo.Internal.MiscLocationHelper.TryAttachNordWest(t);
     if (adj != null)
     {
         if (adj.Morph.Class.IsNoun)
         {
             if (adj.EndToken.IsValue("ВОСТОК", null))
             {
                 if (adj.BeginToken == adj.EndToken)
                 {
                     return(null);
                 }
                 NamedItemToken re = new NamedItemToken(t, adj.EndToken)
                 {
                     Morph = adj.Morph
                 };
                 re.Kind        = Pullenti.Ner.Named.NamedEntityKind.Location;
                 re.NameValue   = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, adj.EndToken, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative);
                 re.IsWellknown = true;
                 return(re);
             }
             return(null);
         }
         if (adj.WhitespacesAfterCount > 2)
         {
             return(null);
         }
         if ((adj.EndToken.Next is Pullenti.Ner.ReferentToken) && (adj.EndToken.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
         {
             NamedItemToken re = new NamedItemToken(t, adj.EndToken.Next)
             {
                 Morph = adj.EndToken.Next.Morph
             };
             re.Kind        = Pullenti.Ner.Named.NamedEntityKind.Location;
             re.NameValue   = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, adj.EndToken.Next, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative);
             re.IsWellknown = true;
             re.Ref         = adj.EndToken.Next.GetReferent();
             return(re);
         }
         NamedItemToken res = TryParse(adj.EndToken.Next, locOnto);
         if (res != null && res.Kind == Pullenti.Ner.Named.NamedEntityKind.Location)
         {
             string s = adj.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, res.Morph.Gender, false);
             if (s != null)
             {
                 if (res.NameValue == null)
                 {
                     res.NameValue = s.ToUpper();
                 }
                 else
                 {
                     res.NameValue = string.Format("{0} {1}", s.ToUpper(), res.NameValue);
                     res.TypeValue = null;
                 }
                 res.BeginToken  = t;
                 res.Chars       = t.Chars;
                 res.IsWellknown = true;
                 return(res);
             }
         }
     }
     if (t.Chars.IsCapitalUpper && !Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t))
     {
         Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
         if (npt != null && npt.Adjectives.Count > 0)
         {
             NamedItemToken test = TryParse(npt.Noun.BeginToken, locOnto);
             if (test != null && test.EndToken == npt.EndToken && test.TypeValue != null)
             {
                 test.BeginToken = t;
                 StringBuilder tmp = new StringBuilder();
                 foreach (Pullenti.Ner.MetaToken a in npt.Adjectives)
                 {
                     string s = a.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, test.Morph.Gender, false);
                     if (tmp.Length > 0)
                     {
                         tmp.Append(' ');
                     }
                     tmp.Append(s);
                 }
                 test.NameValue = tmp.ToString();
                 test.Chars     = t.Chars;
                 if (test.Kind == Pullenti.Ner.Named.NamedEntityKind.Location)
                 {
                     test.IsWellknown = true;
                 }
                 return(test);
             }
         }
     }
     if ((Pullenti.Ner.Core.BracketHelper.IsBracket(t, true) && t.Next != null && t.Next.Chars.IsLetter) && !t.Next.Chars.IsAllLower)
     {
         Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100);
         if (br != null)
         {
             NamedItemToken res = new NamedItemToken(t, br.EndToken);
             res.IsInBracket = true;
             res.NameValue   = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, br.EndToken, Pullenti.Ner.Core.GetTextAttr.No);
             nam             = m_Names.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No);
             if (nam != null && nam.EndToken == br.EndToken.Previous)
             {
                 res.Kind        = (Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag;
                 res.IsWellknown = true;
                 res.NameValue   = nam.Termin.CanonicText;
             }
             return(res);
         }
     }
     if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && !t.Chars.IsAllLower) && t.LengthChar > 2)
     {
         NamedItemToken res = new NamedItemToken(t, t)
         {
             Morph = t.Morph
         };
         string str = (t as Pullenti.Ner.TextToken).Term;
         if (str.EndsWith("О") || str.EndsWith("И") || str.EndsWith("Ы"))
         {
             res.NameValue = str;
         }
         else
         {
             res.NameValue = t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
         }
         res.Chars = t.Chars;
         if (((!t.IsWhitespaceAfter && t.Next != null && t.Next.IsHiphen) && (t.Next.Next is Pullenti.Ner.TextToken) && !t.Next.Next.IsWhitespaceAfter) && t.Chars.IsCyrillicLetter == t.Next.Next.Chars.IsCyrillicLetter)
         {
             t             = (res.EndToken = t.Next.Next);
             res.NameValue = string.Format("{0}-{1}", res.NameValue, t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false));
         }
         return(res);
     }
     return(null);
 }
예제 #6
0
 public static Pullenti.Semantic.SemObject CreateNounGroup(Pullenti.Semantic.SemGraph gr, Pullenti.Ner.Core.NounPhraseToken npt)
 {
     Pullenti.Ner.Token          noun = npt.Noun.BeginToken;
     Pullenti.Semantic.SemObject sem  = new Pullenti.Semantic.SemObject(gr);
     sem.Tokens.Add(npt.Noun);
     sem.Typ = Pullenti.Semantic.SemObjectType.Noun;
     if (npt.Noun.Morph.Class.IsPersonalPronoun)
     {
         sem.Typ = Pullenti.Semantic.SemObjectType.PersonalPronoun;
     }
     else if (npt.Noun.Morph.Class.IsPronoun)
     {
         sem.Typ = Pullenti.Semantic.SemObjectType.Pronoun;
     }
     if (npt.Noun.BeginToken != npt.Noun.EndToken)
     {
         sem.Morph.NormalCase = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
         sem.Morph.NormalFull = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
         sem.Morph.Class      = Pullenti.Morph.MorphClass.Noun;
         sem.Morph.Number     = npt.Morph.Number;
         sem.Morph.Gender     = npt.Morph.Gender;
         sem.Morph.Case       = npt.Morph.Case;
     }
     else if (noun is Pullenti.Ner.TextToken)
     {
         foreach (Pullenti.Morph.MorphBaseInfo wf in noun.Morph.Items)
         {
             if (wf.CheckAccord(npt.Morph, false, false) && (wf is Pullenti.Morph.MorphWordForm))
             {
                 _setMorph(sem, wf as Pullenti.Morph.MorphWordForm);
                 break;
             }
         }
         if (sem.Morph.NormalCase == null)
         {
             sem.Morph.NormalCase = noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
             sem.Morph.NormalFull = noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
         }
         List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(sem.Morph.NormalFull, true, null);
         if (grs != null && grs.Count > 0)
         {
             sem.Concept = grs[0];
         }
     }
     else if (noun is Pullenti.Ner.ReferentToken)
     {
         Pullenti.Ner.Referent r = (noun as Pullenti.Ner.ReferentToken).Referent;
         if (r == null)
         {
             return(null);
         }
         sem.Morph.NormalFull = (sem.Morph.NormalCase = r.ToString());
         sem.Concept          = r;
     }
     else if (noun is Pullenti.Ner.NumberToken)
     {
         Pullenti.Ner.NumberToken num = noun as Pullenti.Ner.NumberToken;
         sem.Morph.Gender = noun.Morph.Gender;
         sem.Morph.Number = noun.Morph.Number;
         if (num.IntValue != null)
         {
             sem.Morph.NormalCase = Pullenti.Ner.Core.NumberHelper.GetNumberAdjective(num.IntValue.Value, noun.Morph.Gender, noun.Morph.Number);
             sem.Morph.NormalFull = Pullenti.Ner.Core.NumberHelper.GetNumberAdjective(num.IntValue.Value, Pullenti.Morph.MorphGender.Masculine, Pullenti.Morph.MorphNumber.Singular);
         }
         else
         {
             sem.Morph.NormalFull = (sem.Morph.NormalCase = noun.GetSourceText().ToUpper());
         }
     }
     noun.Tag = sem;
     if (npt.Adjectives.Count > 0)
     {
         foreach (Pullenti.Ner.MetaToken a in npt.Adjectives)
         {
             if (npt.MultiNouns && a != npt.Adjectives[0])
             {
                 break;
             }
             Pullenti.Semantic.SemObject asem = CreateNptAdj(gr, npt, a);
             if (asem != null)
             {
                 gr.AddLink(Pullenti.Semantic.SemLinkType.Detail, sem, asem, "какой", false, null);
             }
         }
     }
     if (npt.InternalNoun != null)
     {
         Pullenti.Semantic.SemObject intsem = CreateNounGroup(gr, npt.InternalNoun);
         if (intsem != null)
         {
             gr.AddLink(Pullenti.Semantic.SemLinkType.Detail, sem, intsem, null, false, null);
         }
     }
     gr.Objects.Add(sem);
     return(sem);
 }
예제 #7
0
        static VerbPhraseToken TryParseRu(Pullenti.Ner.Token t, bool canBePartition, bool canBeAdjPartition, bool forceParse)
        {
            VerbPhraseToken res = null;

            Pullenti.Ner.Token t0         = t;
            Pullenti.Ner.Token not        = null;
            bool             hasVerb      = false;
            bool             verbBeBefore = false;
            PrepositionToken prep         = null;

            for (; t != null; t = t.Next)
            {
                if (!(t is Pullenti.Ner.TextToken))
                {
                    break;
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                bool isParticiple         = false;
                if (tt.Term == "НЕ")
                {
                    not = t;
                    continue;
                }
                int    ty   = 0;
                string norm = null;
                Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary();
                if (tt.Term == "НЕТ")
                {
                    if (hasVerb)
                    {
                        break;
                    }
                    ty = 1;
                }
                else if (tt.Term == "ДОПУСТИМО")
                {
                    ty = 3;
                }
                else if (mc.IsAdverb && !mc.IsVerb)
                {
                    ty = 2;
                }
                else if (tt.IsPureVerb || tt.IsVerbBe)
                {
                    ty = 1;
                    if (hasVerb)
                    {
                        if (!tt.Morph.ContainsAttr("инф.", null))
                        {
                            if (verbBeBefore)
                            {
                            }
                            else
                            {
                                break;
                            }
                        }
                    }
                }
                else if (mc.IsVerb)
                {
                    if (mc.IsPreposition || mc.IsMisc || mc.IsPronoun)
                    {
                    }
                    else if (mc.IsNoun)
                    {
                        if (tt.Term == "СТАЛИ" || tt.Term == "СТЕКЛО" || tt.Term == "БЫЛИ")
                        {
                            ty = 1;
                        }
                        else if (!tt.Chars.IsAllLower && !MiscHelper.CanBeStartOfSentence(tt))
                        {
                            ty = 1;
                        }
                        else if (mc.IsAdjective && canBePartition)
                        {
                            ty = 1;
                        }
                        else if (forceParse)
                        {
                            ty = 1;
                        }
                    }
                    else if (mc.IsProper)
                    {
                        if (tt.Chars.IsAllLower)
                        {
                            ty = 1;
                        }
                    }
                    else
                    {
                        ty = 1;
                    }
                    if (mc.IsAdjective)
                    {
                        isParticiple = true;
                    }
                    if (!tt.Morph.Case.IsUndefined)
                    {
                        isParticiple = true;
                    }
                    if (!canBePartition && isParticiple)
                    {
                        break;
                    }
                    if (hasVerb)
                    {
                        if (tt.Morph.ContainsAttr("инф.", null))
                        {
                        }
                        else if (!isParticiple)
                        {
                        }
                        else
                        {
                            break;
                        }
                    }
                }
                else if ((mc.IsAdjective && tt.Morph.ContainsAttr("к.ф.", null) && tt.Term.EndsWith("О")) && NounPhraseHelper.TryParse(tt, NounPhraseParseAttr.No, 0, null) == null)
                {
                    ty = 2;
                }
                else if (mc.IsAdjective && ((canBePartition || canBeAdjPartition)))
                {
                    if (tt.Morph.ContainsAttr("к.ф.", null) && !canBeAdjPartition)
                    {
                        break;
                    }
                    norm = tt.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Masculine, false);
                    if (norm.EndsWith("ЙШИЙ"))
                    {
                    }
                    else
                    {
                        List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, null);
                        if (grs != null && grs.Count > 0)
                        {
                            bool hVerb = false;
                            bool hPart = false;
                            foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs)
                            {
                                foreach (Pullenti.Semantic.Utils.DerivateWord w in gr.Words)
                                {
                                    if (w.Class.IsAdjective && w.Class.IsVerb)
                                    {
                                        if (w.Spelling == norm)
                                        {
                                            hPart = true;
                                        }
                                    }
                                    else if (w.Class.IsVerb)
                                    {
                                        hVerb = true;
                                    }
                                }
                            }
                            if (hPart && hVerb)
                            {
                                ty = 3;
                            }
                            else if (canBeAdjPartition)
                            {
                                ty = 3;
                            }
                            if (ty != 3 && !string.IsNullOrEmpty(grs[0].Prefix) && norm.StartsWith(grs[0].Prefix))
                            {
                                hVerb = false;
                                hPart = false;
                                string norm1 = norm.Substring(grs[0].Prefix.Length);
                                grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm1, true, null);
                                if (grs != null && grs.Count > 0)
                                {
                                    foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs)
                                    {
                                        foreach (Pullenti.Semantic.Utils.DerivateWord w in gr.Words)
                                        {
                                            if (w.Class.IsAdjective && w.Class.IsVerb)
                                            {
                                                if (w.Spelling == norm1)
                                                {
                                                    hPart = true;
                                                }
                                            }
                                            else if (w.Class.IsVerb)
                                            {
                                                hVerb = true;
                                            }
                                        }
                                    }
                                }
                                if (hPart && hVerb)
                                {
                                    ty = 3;
                                }
                            }
                        }
                    }
                }
                if (ty == 0 && t == t0 && canBePartition)
                {
                    prep = PrepositionHelper.TryParse(t);
                    if (prep != null)
                    {
                        t = prep.EndToken;
                        continue;
                    }
                }
                if (ty == 0)
                {
                    break;
                }
                if (res == null)
                {
                    res = new VerbPhraseToken(t0, t);
                }
                res.EndToken = t;
                VerbPhraseItemToken it = new VerbPhraseItemToken(t, t)
                {
                    Morph = new Pullenti.Ner.MorphCollection(t.Morph)
                };
                if (not != null)
                {
                    it.BeginToken = not;
                    it.Not        = true;
                    not           = null;
                }
                it.IsAdverb = ty == 2;
                if (prep != null && !t.Morph.Case.IsUndefined && res.Items.Count == 0)
                {
                    if (((prep.NextCase & t.Morph.Case)).IsUndefined)
                    {
                        return(null);
                    }
                    it.Morph.RemoveItems(prep.NextCase);
                    res.Preposition = prep;
                }
                if (norm == null)
                {
                    norm = t.GetNormalCaseText((ty == 3 ? Pullenti.Morph.MorphClass.Adjective : (ty == 2 ? Pullenti.Morph.MorphClass.Adverb : Pullenti.Morph.MorphClass.Verb)), Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Masculine, false);
                    if (ty == 1 && !tt.Morph.Case.IsUndefined)
                    {
                        Pullenti.Morph.MorphWordForm mi = new Pullenti.Morph.MorphWordForm()
                        {
                            Case = Pullenti.Morph.MorphCase.Nominative, Number = Pullenti.Morph.MorphNumber.Singular, Gender = Pullenti.Morph.MorphGender.Masculine
                        };
                        foreach (Pullenti.Morph.MorphBaseInfo mit in tt.Morph.Items)
                        {
                            if (mit is Pullenti.Morph.MorphWordForm)
                            {
                                mi.Misc = (mit as Pullenti.Morph.MorphWordForm).Misc;
                                break;
                            }
                        }
                        string nnn = Pullenti.Morph.MorphologyService.GetWordform("КК" + (t as Pullenti.Ner.TextToken).Term, mi);
                        if (nnn != null)
                        {
                            norm = nnn.Substring(2);
                        }
                    }
                }
                it.Normal = norm;
                res.Items.Add(it);
                if (!hasVerb && ((ty == 1 || ty == 3)))
                {
                    res.Morph = it.Morph;
                    hasVerb   = true;
                }
                if (ty == 1 || ty == 3)
                {
                    if (ty == 1 && tt.IsVerbBe)
                    {
                        verbBeBefore = true;
                    }
                    else
                    {
                        verbBeBefore = false;
                    }
                }
            }
            if (!hasVerb)
            {
                return(null);
            }
            for (int i = res.Items.Count - 1; i > 0; i--)
            {
                if (res.Items[i].IsAdverb)
                {
                    res.Items.RemoveAt(i);
                    res.EndToken = res.Items[i - 1].EndToken;
                }
                else
                {
                    break;
                }
            }
            return(res);
        }
예제 #8
0
        // Основная функция выделения телефонов
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
            bool hasDenoms = false;

            foreach (Pullenti.Ner.Analyzer a in kit.Processor.Analyzers)
            {
                if ((a is Pullenti.Ner.Denomination.DenominationAnalyzer) && !a.IgnoreThisAnalyzer)
                {
                    hasDenoms = true;
                }
            }
            if (!hasDenoms)
            {
                Pullenti.Ner.Denomination.DenominationAnalyzer a = new Pullenti.Ner.Denomination.DenominationAnalyzer();
                a.Process(kit);
            }
            List <KeywordReferent> li   = new List <KeywordReferent>();
            StringBuilder          tmp  = new StringBuilder();
            List <string>          tmp2 = new List <string>();
            int max = 0;

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                max++;
            }
            int cur = 0;

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next, cur++)
            {
                Pullenti.Ner.Referent r = t.GetReferent();
                if (r != null)
                {
                    t = this._addReferents(ad, t, cur, max);
                    continue;
                }
                if (!(t is Pullenti.Ner.TextToken))
                {
                    continue;
                }
                if (!t.Chars.IsLetter || (t.LengthChar < 3))
                {
                    continue;
                }
                string term = (t as Pullenti.Ner.TextToken).Term;
                if (term == "ЕСТЬ")
                {
                    if ((t.Previous is Pullenti.Ner.TextToken) && t.Previous.Morph.Class.IsVerb)
                    {
                    }
                    else
                    {
                        continue;
                    }
                }
                Pullenti.Ner.Core.NounPhraseToken npt = null;
                npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.AdjectiveCanBeLast | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null);
                if (npt == null)
                {
                    Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                    if (mc.IsVerb && !mc.IsPreposition)
                    {
                        if ((t as Pullenti.Ner.TextToken).IsVerbBe)
                        {
                            continue;
                        }
                        if (t.IsValue("МОЧЬ", null) || t.IsValue("WOULD", null))
                        {
                            continue;
                        }
                        KeywordReferent kref = new KeywordReferent()
                        {
                            Typ = KeywordType.Predicate
                        };
                        string norm = t.GetNormalCaseText(Pullenti.Morph.MorphClass.Verb, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
                        if (norm == null)
                        {
                            norm = (t as Pullenti.Ner.TextToken).Lemma;
                        }
                        if (norm.EndsWith("ЬСЯ"))
                        {
                            norm = norm.Substring(0, norm.Length - 2);
                        }
                        kref.AddSlot(KeywordReferent.ATTR_VALUE, norm, false, 0);
                        List <Pullenti.Semantic.Utils.DerivateGroup> drv = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, t.Morph.Language);
                        _addNormals(kref, drv, norm);
                        kref = ad.RegisterReferent(kref) as KeywordReferent;
                        _setRank(kref, cur, max);
                        Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref), t, t)
                        {
                            Morph = t.Morph
                        };
                        kit.EmbedToken(rt1);
                        t = rt1;
                        continue;
                    }
                    continue;
                }
                if (npt.InternalNoun != null)
                {
                    continue;
                }
                if (npt.EndToken.IsValue("ЦЕЛОМ", null) || npt.EndToken.IsValue("ЧАСТНОСТИ", null))
                {
                    if (npt.Preposition != null)
                    {
                        t = npt.EndToken;
                        continue;
                    }
                }
                if (npt.EndToken.IsValue("СТОРОНЫ", null) && npt.Preposition != null && npt.Preposition.Normal == "С")
                {
                    t = npt.EndToken;
                    continue;
                }
                if (npt.BeginToken == npt.EndToken)
                {
                    Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                    if (mc.IsPreposition)
                    {
                        continue;
                    }
                    else if (mc.IsAdverb)
                    {
                        if (t.IsValue("ПОТОМ", null))
                        {
                            continue;
                        }
                    }
                }
                else
                {
                }
                li.Clear();
                Pullenti.Ner.Token t0 = t;
                for (Pullenti.Ner.Token tt = t; tt != null && tt.EndChar <= npt.EndChar; tt = tt.Next)
                {
                    if (!(tt is Pullenti.Ner.TextToken))
                    {
                        continue;
                    }
                    if (tt.IsValue("NATURAL", null))
                    {
                    }
                    if ((tt.LengthChar < 3) || !tt.Chars.IsLetter)
                    {
                        continue;
                    }
                    Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary();
                    if ((mc.IsPreposition || mc.IsPronoun || mc.IsPersonalPronoun) || mc.IsConjunction)
                    {
                        if (tt.IsValue("ОТНОШЕНИЕ", null))
                        {
                        }
                        else
                        {
                            continue;
                        }
                    }
                    if (mc.IsMisc)
                    {
                        if (Pullenti.Ner.Core.MiscHelper.IsEngArticle(tt))
                        {
                            continue;
                        }
                    }
                    KeywordReferent kref = new KeywordReferent()
                    {
                        Typ = KeywordType.Object
                    };
                    string norm = (tt as Pullenti.Ner.TextToken).Lemma;
                    kref.AddSlot(KeywordReferent.ATTR_VALUE, norm, false, 0);
                    if (norm != "ЕСТЬ")
                    {
                        List <Pullenti.Semantic.Utils.DerivateGroup> drv = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, tt.Morph.Language);
                        _addNormals(kref, drv, norm);
                    }
                    kref = ad.RegisterReferent(kref) as KeywordReferent;
                    _setRank(kref, cur, max);
                    Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kref, tt, tt)
                    {
                        Morph = tt.Morph
                    };
                    kit.EmbedToken(rt1);
                    if (tt == t && li.Count == 0)
                    {
                        t0 = rt1;
                    }
                    t = rt1;
                    li.Add(kref);
                }
                if (li.Count > 1)
                {
                    KeywordReferent kref = new KeywordReferent()
                    {
                        Typ = KeywordType.Object
                    };
                    tmp.Length = 0;
                    tmp2.Clear();
                    bool hasNorm = false;
                    foreach (KeywordReferent kw in li)
                    {
                        string s = kw.GetStringValue(KeywordReferent.ATTR_VALUE);
                        if (tmp.Length > 0)
                        {
                            tmp.Append(' ');
                        }
                        tmp.Append(s);
                        string n = kw.GetStringValue(KeywordReferent.ATTR_NORMAL);
                        if (n != null)
                        {
                            hasNorm = true;
                            tmp2.Add(n);
                        }
                        else
                        {
                            tmp2.Add(s);
                        }
                        kref.AddSlot(KeywordReferent.ATTR_REF, kw, false, 0);
                    }
                    string val = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
                    kref.AddSlot(KeywordReferent.ATTR_VALUE, val, false, 0);
                    tmp.Length = 0;
                    tmp2.Sort();
                    foreach (string s in tmp2)
                    {
                        if (tmp.Length > 0)
                        {
                            tmp.Append(' ');
                        }
                        tmp.Append(s);
                    }
                    string norm = tmp.ToString();
                    if (norm != val)
                    {
                        kref.AddSlot(KeywordReferent.ATTR_NORMAL, norm, false, 0);
                    }
                    kref = ad.RegisterReferent(kref) as KeywordReferent;
                    _setRank(kref, cur, max);
                    Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kref, t0, t)
                    {
                        Morph = npt.Morph
                    };
                    kit.EmbedToken(rt1);
                    t = rt1;
                }
            }
            cur = 0;
            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next, cur++)
            {
                KeywordReferent kw = t.GetReferent() as KeywordReferent;
                if (kw == null || kw.Typ != KeywordType.Object)
                {
                    continue;
                }
                if (t.Next == null || kw.ChildWords > 2)
                {
                    continue;
                }
                Pullenti.Ner.Token t1 = t.Next;
                if (t1.IsValue("OF", null) && (t1.WhitespacesAfterCount < 3) && t1.Next != null)
                {
                    t1 = t1.Next;
                    if ((t1 is Pullenti.Ner.TextToken) && Pullenti.Ner.Core.MiscHelper.IsEngArticle(t1) && t1.Next != null)
                    {
                        t1 = t1.Next;
                    }
                }
                else if (!t1.Morph.Case.IsGenitive || t.WhitespacesAfterCount > 1)
                {
                    continue;
                }
                KeywordReferent kw2 = t1.GetReferent() as KeywordReferent;
                if (kw2 == null)
                {
                    continue;
                }
                if (kw == kw2)
                {
                    continue;
                }
                if (kw2.Typ != KeywordType.Object || (kw.ChildWords + kw2.ChildWords) > 3)
                {
                    continue;
                }
                KeywordReferent kwUn = new KeywordReferent();
                kwUn.Union(kw, kw2, Pullenti.Ner.Core.MiscHelper.GetTextValue(t1, t1, Pullenti.Ner.Core.GetTextAttr.No));
                kwUn = ad.RegisterReferent(kwUn) as KeywordReferent;
                _setRank(kwUn, cur, max);
                Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kwUn, t, t1)
                {
                    Morph = t.Morph
                };
                kit.EmbedToken(rt1);
                t = rt1;
            }
            if (SortKeywordsByRank)
            {
                List <Pullenti.Ner.Referent> all = new List <Pullenti.Ner.Referent>(ad.Referents);
                all.Sort(new CompByRank());
                ad.Referents = all;
            }
            if (AnnotationMaxSentences > 0)
            {
                KeywordReferent ano = Pullenti.Ner.Keyword.Internal.AutoannoSentToken.CreateAnnotation(kit, AnnotationMaxSentences);
                if (ano != null)
                {
                    ad.RegisterReferent(ano);
                }
            }
        }
예제 #9
0
        /// <summary>
        /// Попробовать восстановить последовательность, обрамляемую кавычками или скобками. Поддерживается
        /// вложенность, возможность отсутствия закрывающего элемента и др.
        /// </summary>
        /// <param name="t">начальный токен</param>
        /// <param name="attrs">параметры выделения</param>
        /// <param name="maxTokens">максимально токенов (вдруг забыли закрывающую кавычку)</param>
        /// <return>метатокен BracketSequenceToken</return>
        public static BracketSequenceToken TryParse(Pullenti.Ner.Token t, BracketParseAttr attrs = BracketParseAttr.No, int maxTokens = 100)
        {
            Pullenti.Ner.Token t0 = t;
            int cou = 0;

            if (!CanBeStartOfSequence(t0, false, false))
            {
                return(null);
            }
            List <Bracket> brList = new List <Bracket>();

            brList.Add(new Bracket(t0));
            cou = 0;
            int crlf = 0;

            Pullenti.Ner.Token last = null;
            int  lev     = 1;
            bool isAssim = brList[0].Char != '«' && m_AssymOPenChars.IndexOf(brList[0].Char) >= 0;
            bool genCase = false;

            for (t = t0.Next; t != null; t = t.Next)
            {
                if (t.IsTableControlChar)
                {
                    break;
                }
                last = t;
                if (t.IsCharOf(m_OpenChars) || t.IsCharOf(m_CloseChars))
                {
                    if (t.IsNewlineBefore && ((attrs & BracketParseAttr.CanBeManyLines)) == BracketParseAttr.No)
                    {
                        if (t.WhitespacesBeforeCount > 10 || CanBeStartOfSequence(t, false, false))
                        {
                            if (t.IsChar('(') && !t0.IsChar('('))
                            {
                            }
                            else
                            {
                                last = t.Previous;
                                break;
                            }
                        }
                    }
                    Bracket bb = new Bracket(t);
                    brList.Add(bb);
                    if (brList.Count > 20)
                    {
                        break;
                    }
                    if ((brList.Count == 3 && brList[1].CanBeOpen && bb.CanBeClose) && MustBeCloseChar(bb.Char, brList[1].Char) && MustBeCloseChar(bb.Char, brList[0].Char))
                    {
                        bool ok = false;
                        for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next)
                        {
                            if (tt.IsNewlineBefore)
                            {
                                break;
                            }
                            if (tt.IsChar(','))
                            {
                                break;
                            }
                            if (tt.IsChar('.'))
                            {
                                for (tt = tt.Next; tt != null; tt = tt.Next)
                                {
                                    if (tt.IsNewlineBefore)
                                    {
                                        break;
                                    }
                                    else if (tt.IsCharOf(m_OpenChars) || tt.IsCharOf(m_CloseChars))
                                    {
                                        Bracket bb2 = new Bracket(tt);
                                        if (BracketHelper.CanBeEndOfSequence(tt, false, null, false) && CanBeCloseChar(bb2.Char, brList[0].Char))
                                        {
                                            ok = true;
                                        }
                                        break;
                                    }
                                }
                                break;
                            }
                            if (t.IsCharOf(m_OpenChars) || t.IsCharOf(m_CloseChars))
                            {
                                ok = true;
                                break;
                            }
                        }
                        if (!ok)
                        {
                            break;
                        }
                    }
                    if (isAssim)
                    {
                        if (bb.CanBeOpen && !bb.CanBeClose && bb.Char == brList[0].Char)
                        {
                            lev++;
                        }
                        else if (bb.CanBeClose && !bb.CanBeOpen && m_OpenChars.IndexOf(brList[0].Char) == m_CloseChars.IndexOf(bb.Char))
                        {
                            lev--;
                            if (lev == 0)
                            {
                                break;
                            }
                        }
                    }
                }
                else
                {
                    if ((++cou) > maxTokens)
                    {
                        break;
                    }
                    if (((attrs & BracketParseAttr.CanContainsVerbs)) == BracketParseAttr.No)
                    {
                        if (t.Morph.Language.IsCyrillic)
                        {
                            if (t.GetMorphClassInDictionary() == Pullenti.Morph.MorphClass.Verb)
                            {
                                if (!t.Morph.Class.IsAdjective && !t.Morph.ContainsAttr("страд.з.", null))
                                {
                                    if (t.Chars.IsAllLower)
                                    {
                                        string norm = t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
                                        if (!Pullenti.Morph.LanguageHelper.EndsWith(norm, "СЯ"))
                                        {
                                            if (brList.Count > 1)
                                            {
                                                break;
                                            }
                                            if (brList[0].Char != '(')
                                            {
                                                break;
                                            }
                                        }
                                    }
                                }
                            }
                        }
                        else if (t.Morph.Language.IsEn)
                        {
                            if (t.Morph.Class == Pullenti.Morph.MorphClass.Verb && t.Chars.IsAllLower)
                            {
                                break;
                            }
                        }
                        Pullenti.Ner.Referent r = t.GetReferent();
                        if (r != null && r.TypeName == "ADDRESS")
                        {
                            if (!t0.IsChar('('))
                            {
                                break;
                            }
                        }
                    }
                }
                if (((attrs & BracketParseAttr.CanBeManyLines)) != BracketParseAttr.No)
                {
                    if (t.IsNewlineBefore)
                    {
                        if (t.NewlinesBeforeCount > 1)
                        {
                            break;
                        }
                        crlf++;
                    }
                    continue;
                }
                if (t.IsNewlineBefore)
                {
                    if (t.WhitespacesBeforeCount > 15)
                    {
                        last = t.Previous;
                        break;
                    }
                    crlf++;
                    if (!t.Chars.IsAllLower)
                    {
                        if (MiscHelper.CanBeStartOfSentence(t))
                        {
                            bool has = false;
                            for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next)
                            {
                                if (tt.IsNewlineBefore)
                                {
                                    break;
                                }
                                else if (tt.LengthChar == 1 && tt.IsCharOf(m_OpenChars) && tt.IsWhitespaceBefore)
                                {
                                    break;
                                }
                                else if (tt.LengthChar == 1 && tt.IsCharOf(m_CloseChars) && !tt.IsWhitespaceBefore)
                                {
                                    has = true;
                                    break;
                                }
                            }
                            if (!has)
                            {
                                last = t.Previous;
                                break;
                            }
                        }
                    }
                    if ((t.Previous is Pullenti.Ner.MetaToken) && CanBeEndOfSequence((t.Previous as Pullenti.Ner.MetaToken).EndToken, false, null, false))
                    {
                        last = t.Previous;
                        break;
                    }
                }
                if (crlf > 1)
                {
                    if (brList.Count > 1)
                    {
                        break;
                    }
                    if (crlf > 10)
                    {
                        break;
                    }
                }
                if (t.IsChar(';') && t.IsNewlineAfter)
                {
                    break;
                }
                NounPhraseToken npt = NounPhraseHelper.TryParse(t, NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    if (t.IsNewlineBefore)
                    {
                        genCase = npt.Morph.Case.IsGenitive;
                    }
                    last = (t = npt.EndToken);
                }
            }
            if ((brList.Count == 1 && brList[0].CanBeOpen && (last is Pullenti.Ner.MetaToken)) && last.IsNewlineAfter)
            {
                if (BracketHelper.CanBeEndOfSequence((last as Pullenti.Ner.MetaToken).EndToken, false, null, false))
                {
                    return(new BracketSequenceToken(t0, last));
                }
            }
            if ((brList.Count == 1 && brList[0].CanBeOpen && genCase) && last.IsNewlineAfter && crlf <= 2)
            {
                return(new BracketSequenceToken(t0, last));
            }
            if (brList.Count < 1)
            {
                return(null);
            }
            for (int i = 1; i < (brList.Count - 1); i++)
            {
                if (brList[i].Char == '<' && brList[i + 1].Char == '>')
                {
                    brList[i].CanBeOpen      = true;
                    brList[i + 1].CanBeClose = true;
                }
            }
            List <BracketSequenceToken> internals = null;

            while (brList.Count > 3)
            {
                int i = brList.Count - 1;
                if ((brList[i].CanBeClose && brList[i - 1].CanBeOpen && !CanBeCloseChar(brList[i].Char, brList[0].Char)) && CanBeCloseChar(brList[i].Char, brList[i - 1].Char))
                {
                    brList.RemoveRange(brList.Count - 2, 2);
                    continue;
                }
                break;
            }
            while (brList.Count >= 4)
            {
                bool changed = false;
                for (int i = 1; i < (brList.Count - 2); i++)
                {
                    if ((brList[i].CanBeOpen && !brList[i].CanBeClose && brList[i + 1].CanBeClose) && !brList[i + 1].CanBeOpen)
                    {
                        bool ok = false;
                        if (MustBeCloseChar(brList[i + 1].Char, brList[i].Char) || brList[i].Char != brList[0].Char)
                        {
                            ok = true;
                            if ((i == 1 && ((i + 2) < brList.Count) && brList[i + 2].Char == ')') && brList[i + 1].Char != ')' && CanBeCloseChar(brList[i + 1].Char, brList[i - 1].Char))
                            {
                                brList[i + 2] = brList[i + 1];
                            }
                        }
                        else if (i > 1 && ((i + 2) < brList.Count) && MustBeCloseChar(brList[i + 2].Char, brList[i - 1].Char))
                        {
                            ok = true;
                        }
                        if (ok)
                        {
                            if (internals == null)
                            {
                                internals = new List <BracketSequenceToken>();
                            }
                            internals.Add(new BracketSequenceToken(brList[i].Source, brList[i + 1].Source));
                            brList.RemoveRange(i, 2);
                            changed = true;
                            break;
                        }
                    }
                }
                if (!changed)
                {
                    break;
                }
            }
            BracketSequenceToken res = null;

            if ((brList.Count >= 4 && brList[1].CanBeOpen && brList[2].CanBeClose) && brList[3].CanBeClose && !brList[3].CanBeOpen)
            {
                if (CanBeCloseChar(brList[3].Char, brList[0].Char))
                {
                    res = new BracketSequenceToken(brList[0].Source, brList[3].Source);
                    if (brList[0].Source.Next != brList[1].Source || brList[2].Source.Next != brList[3].Source)
                    {
                        res.Internal.Add(new BracketSequenceToken(brList[1].Source, brList[2].Source));
                    }
                    if (internals != null)
                    {
                        res.Internal.AddRange(internals);
                    }
                }
            }
            if ((res == null && brList.Count >= 3 && brList[2].CanBeClose) && !brList[2].CanBeOpen)
            {
                if (((attrs & BracketParseAttr.NearCloseBracket)) != BracketParseAttr.No)
                {
                    if (CanBeCloseChar(brList[1].Char, brList[0].Char))
                    {
                        return(new BracketSequenceToken(brList[0].Source, brList[1].Source));
                    }
                }
                bool ok = true;
                if (CanBeCloseChar(brList[2].Char, brList[0].Char) && CanBeCloseChar(brList[1].Char, brList[0].Char) && brList[1].CanBeClose)
                {
                    for (t = brList[1].Source; t != brList[2].Source && t != null; t = t.Next)
                    {
                        if (t.IsNewlineBefore)
                        {
                            ok = false;
                            break;
                        }
                        if (t.Chars.IsLetter && t.Chars.IsAllLower)
                        {
                            ok = false;
                            break;
                        }
                        NounPhraseToken npt = NounPhraseHelper.TryParse(t, NounPhraseParseAttr.No, 0, null);
                        if (npt != null)
                        {
                            t = npt.EndToken;
                        }
                    }
                    if (ok)
                    {
                        for (t = brList[0].Source.Next; t != brList[1].Source && t != null; t = t.Next)
                        {
                            if (t.IsNewlineBefore)
                            {
                                return(new BracketSequenceToken(brList[0].Source, t.Previous));
                            }
                        }
                    }
                    int lev1 = 0;
                    for (Pullenti.Ner.Token tt = brList[0].Source.Previous; tt != null; tt = tt.Previous)
                    {
                        if (tt.IsNewlineAfter || tt.IsTableControlChar)
                        {
                            break;
                        }
                        if (!(tt is Pullenti.Ner.TextToken))
                        {
                            continue;
                        }
                        if (tt.Chars.IsLetter || tt.LengthChar > 1)
                        {
                            continue;
                        }
                        char ch = (tt as Pullenti.Ner.TextToken).Term[0];
                        if (CanBeCloseChar(ch, brList[0].Char))
                        {
                            lev1++;
                        }
                        else if (CanBeCloseChar(brList[1].Char, ch))
                        {
                            lev1--;
                            if (lev1 < 0)
                            {
                                return(new BracketSequenceToken(brList[0].Source, brList[1].Source));
                            }
                        }
                    }
                }
                if (ok && CanBeCloseChar(brList[2].Char, brList[0].Char))
                {
                    BracketSequenceToken intern = new BracketSequenceToken(brList[1].Source, brList[2].Source);
                    res = new BracketSequenceToken(brList[0].Source, brList[2].Source);
                    res.Internal.Add(intern);
                }
                else if (ok && CanBeCloseChar(brList[2].Char, brList[1].Char) && brList[0].CanBeOpen)
                {
                    if (CanBeCloseChar(brList[2].Char, brList[0].Char))
                    {
                        BracketSequenceToken intern = new BracketSequenceToken(brList[1].Source, brList[2].Source);
                        res = new BracketSequenceToken(brList[0].Source, brList[2].Source);
                        res.Internal.Add(intern);
                    }
                    else if (brList.Count == 3)
                    {
                        return(null);
                    }
                }
            }
            if (res == null && brList.Count > 1 && brList[1].CanBeClose)
            {
                res = new BracketSequenceToken(brList[0].Source, brList[1].Source);
            }
            if (res == null && brList.Count > 1 && CanBeCloseChar(brList[1].Char, brList[0].Char))
            {
                res = new BracketSequenceToken(brList[0].Source, brList[1].Source);
            }
            if (res == null && brList.Count == 2 && brList[0].Char == brList[1].Char)
            {
                res = new BracketSequenceToken(brList[0].Source, brList[1].Source);
            }
            if (res != null && internals != null)
            {
                foreach (BracketSequenceToken i in internals)
                {
                    if (i.BeginChar < res.EndChar)
                    {
                        res.Internal.Add(i);
                    }
                }
            }
            if (res == null)
            {
                cou = 0;
                for (Pullenti.Ner.Token tt = t0.Next; tt != null; tt = tt.Next, cou++)
                {
                    if (tt.IsTableControlChar)
                    {
                        break;
                    }
                    if (MiscHelper.CanBeStartOfSentence(tt))
                    {
                        break;
                    }
                    if (maxTokens > 0 && cou > maxTokens)
                    {
                        break;
                    }
                    Pullenti.Ner.MetaToken mt = tt as Pullenti.Ner.MetaToken;
                    if (mt == null)
                    {
                        continue;
                    }
                    if (mt.EndToken is Pullenti.Ner.TextToken)
                    {
                        if ((mt.EndToken as Pullenti.Ner.TextToken).IsCharOf(m_CloseChars))
                        {
                            Bracket bb = new Bracket(mt.EndToken as Pullenti.Ner.TextToken);
                            if (bb.CanBeClose && CanBeCloseChar(bb.Char, brList[0].Char))
                            {
                                return(new BracketSequenceToken(t0, tt));
                            }
                        }
                    }
                }
            }
            return(res);
        }
예제 #10
0
        static string GetNameWithoutBrackets(Pullenti.Ner.Token begin, Pullenti.Ner.Token end, bool normalizeFirstNounGroup = false, bool normalFirstGroupSingle = false, bool ignoreGeoReferent = false)
        {
            string res = null;

            if (BracketHelper.CanBeStartOfSequence(begin, false, false) && BracketHelper.CanBeEndOfSequence(end, false, begin, false))
            {
                begin = begin.Next;
                end   = end.Previous;
            }
            if (normalizeFirstNounGroup && !begin.Morph.Class.IsPreposition)
            {
                NounPhraseToken npt = NounPhraseHelper.TryParse(begin, NounPhraseParseAttr.ReferentCanBeNoun, 0, null);
                if (npt != null)
                {
                    if (npt.Noun.GetMorphClassInDictionary().IsUndefined&& npt.Adjectives.Count == 0)
                    {
                        npt = null;
                    }
                }
                if (npt != null && npt.EndToken.EndChar > end.EndChar)
                {
                    npt = null;
                }
                if (npt != null)
                {
                    res = npt.GetNormalCaseText(null, (normalFirstGroupSingle ? Pullenti.Morph.MorphNumber.Singular : Pullenti.Morph.MorphNumber.Undefined), Pullenti.Morph.MorphGender.Undefined, false);
                    Pullenti.Ner.Token te = npt.EndToken.Next;
                    if (((te != null && te.Next != null && te.IsComma) && (te.Next is Pullenti.Ner.TextToken) && te.Next.EndChar <= end.EndChar) && te.Next.Morph.Class.IsVerb && te.Next.Morph.Class.IsAdjective)
                    {
                        foreach (Pullenti.Morph.MorphBaseInfo it in te.Next.Morph.Items)
                        {
                            if (it.Gender == npt.Morph.Gender || ((it.Gender & npt.Morph.Gender)) != Pullenti.Morph.MorphGender.Undefined)
                            {
                                if (!((it.Case & npt.Morph.Case)).IsUndefined)
                                {
                                    if (it.Number == npt.Morph.Number || ((it.Number & npt.Morph.Number)) != Pullenti.Morph.MorphNumber.Undefined)
                                    {
                                        string var = (te.Next as Pullenti.Ner.TextToken).Term;
                                        if (it is Pullenti.Morph.MorphWordForm)
                                        {
                                            var = (it as Pullenti.Morph.MorphWordForm).NormalCase;
                                        }
                                        Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo()
                                        {
                                            Class = Pullenti.Morph.MorphClass.Adjective, Gender = npt.Morph.Gender, Number = npt.Morph.Number, Language = npt.Morph.Language
                                        };
                                        var = Pullenti.Morph.MorphologyService.GetWordform(var, bi);
                                        if (var != null)
                                        {
                                            res = string.Format("{0}, {1}", res, var);
                                            te  = te.Next.Next;
                                        }
                                        break;
                                    }
                                }
                            }
                        }
                    }
                    if (te != null && te.EndChar <= end.EndChar)
                    {
                        string s = GetNameEx(te, end, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, true, ignoreGeoReferent);
                        if (!string.IsNullOrEmpty(s))
                        {
                            if (!char.IsLetterOrDigit(s[0]))
                            {
                                res = string.Format("{0}{1}", res, s);
                            }
                            else
                            {
                                res = string.Format("{0} {1}", res, s);
                            }
                        }
                    }
                }
                else if ((begin is Pullenti.Ner.TextToken) && begin.Chars.IsCyrillicLetter)
                {
                    Pullenti.Morph.MorphClass mm = begin.GetMorphClassInDictionary();
                    if (!mm.IsUndefined)
                    {
                        res = begin.GetNormalCaseText(mm, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
                        if (begin.EndChar < end.EndChar)
                        {
                            res = string.Format("{0} {1}", res, GetNameEx(begin.Next, end, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, true, false));
                        }
                    }
                }
            }
            if (res == null)
            {
                res = GetNameEx(begin, end, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, true, ignoreGeoReferent);
            }
            if (!string.IsNullOrEmpty(res))
            {
                int k = 0;
                for (int i = res.Length - 1; i >= 0; i--, k++)
                {
                    if (res[i] == '*' || char.IsWhiteSpace(res[i]))
                    {
                    }
                    else
                    {
                        break;
                    }
                }
                if (k > 0)
                {
                    if (k == res.Length)
                    {
                        return(null);
                    }
                    res = res.Substring(0, res.Length - k);
                }
            }
            return(res);
        }