예제 #1
0
 public DelimToken(Pullenti.Ner.Token b, Pullenti.Ner.Token e) : base(b, e, null)
 {
 }
예제 #2
0
 public static NamedItemToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.IntOntologyCollection locOnto)
 {
     if (t == null)
     {
         return(null);
     }
     if (t is Pullenti.Ner.ReferentToken)
     {
         Pullenti.Ner.Referent r = t.GetReferent();
         if ((r.TypeName == "PERSON" || r.TypeName == "PERSONPROPERTY" || (r is Pullenti.Ner.Geo.GeoReferent)) || r.TypeName == "ORGANIZATION")
         {
             return new NamedItemToken(t, t)
                    {
                        Ref = r, Morph = t.Morph
                    }
         }
         ;
         return(null);
     }
     Pullenti.Ner.Core.TerminToken typ = m_Types.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
     Pullenti.Ner.Core.TerminToken nam = m_Names.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
     if (typ != null)
     {
         if (!(t is Pullenti.Ner.TextToken))
         {
             return(null);
         }
         NamedItemToken res = new NamedItemToken(typ.BeginToken, typ.EndToken)
         {
             Morph = typ.Morph, Chars = typ.Chars
         };
         res.Kind      = (Pullenti.Ner.Named.NamedEntityKind)typ.Termin.Tag;
         res.TypeValue = typ.Termin.CanonicText;
         if ((nam != null && nam.EndToken == typ.EndToken && !t.Chars.IsAllLower) && ((Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag) == res.Kind)
         {
             res.NameValue   = nam.Termin.CanonicText;
             res.IsWellknown = true;
         }
         return(res);
     }
     if (nam != null)
     {
         if (nam.BeginToken.Chars.IsAllLower)
         {
             return(null);
         }
         NamedItemToken res = new NamedItemToken(nam.BeginToken, nam.EndToken)
         {
             Morph = nam.Morph, Chars = nam.Chars
         };
         res.Kind      = (Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag;
         res.NameValue = nam.Termin.CanonicText;
         bool ok = true;
         if (!t.IsWhitespaceBefore && t.Previous != null)
         {
             ok = false;
         }
         else if (!t.IsWhitespaceAfter && t.Next != null)
         {
             if (t.Next.IsCharOf(",.;!?") && t.Next.IsWhitespaceAfter)
             {
             }
             else
             {
                 ok = false;
             }
         }
         if (ok)
         {
             res.IsWellknown = true;
             res.TypeValue   = nam.Termin.Tag2 as string;
         }
         return(res);
     }
     Pullenti.Ner.MetaToken adj = Pullenti.Ner.Geo.Internal.MiscLocationHelper.TryAttachNordWest(t);
     if (adj != null)
     {
         if (adj.Morph.Class.IsNoun)
         {
             if (adj.EndToken.IsValue("ВОСТОК", null))
             {
                 if (adj.BeginToken == adj.EndToken)
                 {
                     return(null);
                 }
                 NamedItemToken re = new NamedItemToken(t, adj.EndToken)
                 {
                     Morph = adj.Morph
                 };
                 re.Kind        = Pullenti.Ner.Named.NamedEntityKind.Location;
                 re.NameValue   = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, adj.EndToken, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative);
                 re.IsWellknown = true;
                 return(re);
             }
             return(null);
         }
         if (adj.WhitespacesAfterCount > 2)
         {
             return(null);
         }
         if ((adj.EndToken.Next is Pullenti.Ner.ReferentToken) && (adj.EndToken.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
         {
             NamedItemToken re = new NamedItemToken(t, adj.EndToken.Next)
             {
                 Morph = adj.EndToken.Next.Morph
             };
             re.Kind        = Pullenti.Ner.Named.NamedEntityKind.Location;
             re.NameValue   = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, adj.EndToken.Next, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative);
             re.IsWellknown = true;
             re.Ref         = adj.EndToken.Next.GetReferent();
             return(re);
         }
         NamedItemToken res = TryParse(adj.EndToken.Next, locOnto);
         if (res != null && res.Kind == Pullenti.Ner.Named.NamedEntityKind.Location)
         {
             string s = adj.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, res.Morph.Gender, false);
             if (s != null)
             {
                 if (res.NameValue == null)
                 {
                     res.NameValue = s.ToUpper();
                 }
                 else
                 {
                     res.NameValue = string.Format("{0} {1}", s.ToUpper(), res.NameValue);
                     res.TypeValue = null;
                 }
                 res.BeginToken  = t;
                 res.Chars       = t.Chars;
                 res.IsWellknown = true;
                 return(res);
             }
         }
     }
     if (t.Chars.IsCapitalUpper && !Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t))
     {
         Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
         if (npt != null && npt.Adjectives.Count > 0)
         {
             NamedItemToken test = TryParse(npt.Noun.BeginToken, locOnto);
             if (test != null && test.EndToken == npt.EndToken && test.TypeValue != null)
             {
                 test.BeginToken = t;
                 StringBuilder tmp = new StringBuilder();
                 foreach (Pullenti.Ner.MetaToken a in npt.Adjectives)
                 {
                     string s = a.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, test.Morph.Gender, false);
                     if (tmp.Length > 0)
                     {
                         tmp.Append(' ');
                     }
                     tmp.Append(s);
                 }
                 test.NameValue = tmp.ToString();
                 test.Chars     = t.Chars;
                 if (test.Kind == Pullenti.Ner.Named.NamedEntityKind.Location)
                 {
                     test.IsWellknown = true;
                 }
                 return(test);
             }
         }
     }
     if ((Pullenti.Ner.Core.BracketHelper.IsBracket(t, true) && t.Next != null && t.Next.Chars.IsLetter) && !t.Next.Chars.IsAllLower)
     {
         Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100);
         if (br != null)
         {
             NamedItemToken res = new NamedItemToken(t, br.EndToken);
             res.IsInBracket = true;
             res.NameValue   = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, br.EndToken, Pullenti.Ner.Core.GetTextAttr.No);
             nam             = m_Names.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No);
             if (nam != null && nam.EndToken == br.EndToken.Previous)
             {
                 res.Kind        = (Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag;
                 res.IsWellknown = true;
                 res.NameValue   = nam.Termin.CanonicText;
             }
             return(res);
         }
     }
     if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && !t.Chars.IsAllLower) && t.LengthChar > 2)
     {
         NamedItemToken res = new NamedItemToken(t, t)
         {
             Morph = t.Morph
         };
         string str = (t as Pullenti.Ner.TextToken).Term;
         if (str.EndsWith("О") || str.EndsWith("И") || str.EndsWith("Ы"))
         {
             res.NameValue = str;
         }
         else
         {
             res.NameValue = t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
         }
         res.Chars = t.Chars;
         if (((!t.IsWhitespaceAfter && t.Next != null && t.Next.IsHiphen) && (t.Next.Next is Pullenti.Ner.TextToken) && !t.Next.Next.IsWhitespaceAfter) && t.Chars.IsCyrillicLetter == t.Next.Next.Chars.IsCyrillicLetter)
         {
             t             = (res.EndToken = t.Next.Next);
             res.NameValue = string.Format("{0}-{1}", res.NameValue, t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false));
         }
         return(res);
     }
     return(null);
 }
예제 #3
0
        bool CalcRankAndValue(int minNewlinesCount)
        {
            Rank = 0;
            if (BeginToken.Chars.IsAllLower)
            {
                Rank -= 30;
            }
            int words      = 0;
            int upWords    = 0;
            int notwords   = 0;
            int lineNumber = 0;

            Pullenti.Ner.Token tstart = BeginToken;
            Pullenti.Ner.Token tend   = EndToken;
            for (Pullenti.Ner.Token t = BeginToken; t != EndToken.Next && t != null && t.EndChar <= EndToken.EndChar; t = t.Next)
            {
                if (t.IsNewlineBefore)
                {
                }
                TitleItemToken tit = TitleItemToken.TryAttach(t);
                if (tit != null)
                {
                    if (tit.Typ == TitleItemToken.Types.Theme || tit.Typ == TitleItemToken.Types.TypAndTheme)
                    {
                        if (t != BeginToken)
                        {
                            if (lineNumber > 0)
                            {
                                return(false);
                            }
                            words  = (upWords = (notwords = 0));
                            tstart = tit.EndToken.Next;
                        }
                        t = tit.EndToken;
                        if (t.Next == null)
                        {
                            return(false);
                        }
                        if (t.Next.Chars.IsLetter && t.Next.Chars.IsAllLower)
                        {
                            Rank += 20;
                        }
                        else
                        {
                            Rank += 100;
                        }
                        tstart = t.Next;
                        if (tit.Typ == TitleItemToken.Types.TypAndTheme)
                        {
                            TypeValue = tit.Value;
                        }
                        continue;
                    }
                    if (tit.Typ == TitleItemToken.Types.Typ)
                    {
                        if (t == BeginToken)
                        {
                            if (tit.EndToken.IsNewlineAfter)
                            {
                                TypeValue = tit.Value;
                                Rank     += 5;
                                tstart    = tit.EndToken.Next;
                            }
                        }
                        t = tit.EndToken;
                        words++;
                        if (tit.BeginToken != tit.EndToken)
                        {
                            words++;
                        }
                        if (tit.Chars.IsAllUpper)
                        {
                            upWords++;
                        }
                        continue;
                    }
                    if (tit.Typ == TitleItemToken.Types.Dust || tit.Typ == TitleItemToken.Types.Speciality)
                    {
                        if (t == BeginToken)
                        {
                            return(false);
                        }
                        Rank -= 20;
                        if (tit.Typ == TitleItemToken.Types.Speciality)
                        {
                            Speciality = tit.Value;
                        }
                        t = tit.EndToken;
                        continue;
                    }
                    if (tit.Typ == TitleItemToken.Types.Consultant || tit.Typ == TitleItemToken.Types.Boss || tit.Typ == TitleItemToken.Types.Editor)
                    {
                        t = tit.EndToken;
                        if (t.Next != null && ((t.Next.IsCharOf(":") || t.Next.IsHiphen || t.WhitespacesAfterCount > 4)))
                        {
                            Rank -= 10;
                        }
                        else
                        {
                            Rank -= 2;
                        }
                        continue;
                    }
                    return(false);
                }
                Pullenti.Ner.Booklink.Internal.BookLinkToken blt = Pullenti.Ner.Booklink.Internal.BookLinkToken.TryParse(t, 0);
                if (blt != null)
                {
                    if (blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.Misc || blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.N || blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.Pages)
                    {
                        Rank -= 10;
                    }
                    else if (blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.N || blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.PageRange)
                    {
                        Rank -= 20;
                    }
                }
                if (t == BeginToken && Pullenti.Ner.Booklink.Internal.BookLinkToken.TryParseAuthor(t, Pullenti.Ner.Person.Internal.FioTemplateType.Undefined) != null)
                {
                    Rank -= 20;
                }
                if (t.IsNewlineBefore && t != BeginToken)
                {
                    lineNumber++;
                    if (lineNumber > 4)
                    {
                        return(false);
                    }
                    if (t.Chars.IsAllLower)
                    {
                        Rank += 10;
                    }
                    else if (t.Previous.IsChar('.'))
                    {
                        Rank -= 10;
                    }
                    else if (t.Previous.IsCharOf(",-"))
                    {
                        Rank += 10;
                    }
                    else
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Previous, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                        if (npt != null && npt.EndChar >= t.EndChar)
                        {
                            Rank += 10;
                        }
                    }
                }
                if (t != BeginToken && t.NewlinesBeforeCount > minNewlinesCount)
                {
                    Rank -= (t.NewlinesBeforeCount - minNewlinesCount);
                }
                Pullenti.Ner.Core.BracketSequenceToken bst = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                if (bst != null && bst.IsQuoteType && bst.EndToken.EndChar <= EndToken.EndChar)
                {
                    if (words == 0)
                    {
                        tstart = bst.BeginToken;
                        Rank  += 10;
                        if (bst.EndToken == EndToken)
                        {
                            tend  = EndToken;
                            Rank += 10;
                        }
                    }
                }
                List <Pullenti.Ner.Referent> rli = t.GetReferents();
                if (rli != null)
                {
                    foreach (Pullenti.Ner.Referent r in rli)
                    {
                        if (r is Pullenti.Ner.Org.OrganizationReferent)
                        {
                            if (t.IsNewlineBefore)
                            {
                                Rank -= 10;
                            }
                            else
                            {
                                Rank -= 4;
                            }
                            continue;
                        }
                        if ((r is Pullenti.Ner.Geo.GeoReferent) || (r is Pullenti.Ner.Person.PersonReferent))
                        {
                            if (t.IsNewlineBefore)
                            {
                                Rank -= 5;
                                if (t.IsNewlineAfter || t.Next == null)
                                {
                                    Rank -= 20;
                                }
                                else if (t.Next.IsHiphen || (t.Next is Pullenti.Ner.NumberToken) || (t.Next.GetReferent() is Pullenti.Ner.Date.DateReferent))
                                {
                                    Rank -= 20;
                                }
                                else if (t != BeginToken)
                                {
                                    Rank -= 20;
                                }
                            }
                            continue;
                        }
                        if ((r is Pullenti.Ner.Geo.GeoReferent) || (r is Pullenti.Ner.Denomination.DenominationReferent))
                        {
                            continue;
                        }
                        if ((r is Pullenti.Ner.Uri.UriReferent) || (r is Pullenti.Ner.Phone.PhoneReferent))
                        {
                            return(false);
                        }
                        if (t.IsNewlineBefore)
                        {
                            Rank -= 4;
                        }
                        else
                        {
                            Rank -= 2;
                        }
                        if (t == BeginToken && (EndToken.GetReferent() is Pullenti.Ner.Person.PersonReferent))
                        {
                            Rank -= 10;
                        }
                    }
                    words++;
                    if (t.Chars.IsAllUpper)
                    {
                        upWords++;
                    }
                    if (t == BeginToken)
                    {
                        if (t.IsNewlineAfter)
                        {
                            Rank -= 10;
                        }
                        else if (t.Next != null && t.Next.IsChar('.') && t.Next.IsNewlineAfter)
                        {
                            Rank -= 10;
                        }
                    }
                    continue;
                }
                if (t is Pullenti.Ner.NumberToken)
                {
                    if ((t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words)
                    {
                        words++;
                        if (t.Chars.IsAllUpper)
                        {
                            upWords++;
                        }
                    }
                    else
                    {
                        notwords++;
                    }
                    continue;
                }
                Pullenti.Ner.Person.Internal.PersonAttrToken pat = Pullenti.Ner.Person.Internal.PersonAttrToken.TryAttach(t, null, Pullenti.Ner.Person.Internal.PersonAttrToken.PersonAttrAttachAttrs.No);
                if (pat != null)
                {
                    if (t.IsNewlineBefore)
                    {
                        if (!pat.Morph.Case.IsUndefined && !pat.Morph.Case.IsNominative)
                        {
                        }
                        else if (pat.Chars.IsAllUpper)
                        {
                        }
                        else
                        {
                            Rank -= 20;
                        }
                    }
                    else if (t.Chars.IsAllLower)
                    {
                        Rank--;
                    }
                    for (; t != null; t = t.Next)
                    {
                        words++;
                        if (t.Chars.IsAllUpper)
                        {
                            upWords++;
                        }
                        if (t == pat.EndToken)
                        {
                            break;
                        }
                    }
                    continue;
                }
                Pullenti.Ner.Org.Internal.OrgItemTypeToken oitt = Pullenti.Ner.Org.Internal.OrgItemTypeToken.TryAttach(t, true, null);
                if (oitt != null)
                {
                    if (oitt.Morph.Number != Pullenti.Morph.MorphNumber.Plural && !oitt.IsDoubtRootWord)
                    {
                        if (!oitt.Morph.Case.IsUndefined && !oitt.Morph.Case.IsNominative)
                        {
                            words++;
                            if (t.Chars.IsAllUpper)
                            {
                                upWords++;
                            }
                        }
                        else
                        {
                            Rank -= 4;
                            if (t == BeginToken)
                            {
                                Rank -= 5;
                            }
                        }
                    }
                    else
                    {
                        words += 1;
                        if (t.Chars.IsAllUpper)
                        {
                            upWords++;
                        }
                    }
                    t = oitt.EndToken;
                    continue;
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt != null)
                {
                    if (tt.IsChar('©'))
                    {
                        Rank -= 10;
                    }
                    if (tt.IsChar('_'))
                    {
                        Rank--;
                    }
                    if (tt.Chars.IsLetter)
                    {
                        if (tt.LengthChar > 2)
                        {
                            words++;
                            if (t.Chars.IsAllUpper)
                            {
                                upWords++;
                            }
                        }
                    }
                    else if (!tt.IsChar(','))
                    {
                        notwords++;
                    }
                    if (tt.IsPureVerb)
                    {
                        {
                            Rank -= 30;
                            words--;
                        }
                        break;
                    }
                    if (tt == EndToken)
                    {
                        if (tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction)
                        {
                            Rank -= 10;
                        }
                        else if (tt.IsChar('.'))
                        {
                            Rank += 5;
                        }
                    }
                    else if (tt.IsCharOf("._"))
                    {
                        Rank -= 5;
                    }
                }
            }
            Rank += words;
            Rank -= notwords;
            if ((words < 1) && (Rank < 50))
            {
                return(false);
            }
            if (tstart == null || tend == null)
            {
                return(false);
            }
            if (tstart.EndChar > tend.EndChar)
            {
                return(false);
            }
            TitleItemToken tit1 = TitleItemToken.TryAttach(EndToken.Next);

            if (tit1 != null && ((tit1.Typ == TitleItemToken.Types.Typ || tit1.Typ == TitleItemToken.Types.Speciality)))
            {
                if (tit1.EndToken.IsNewlineAfter)
                {
                    Rank += 15;
                }
                else
                {
                    Rank += 10;
                }
                if (tit1.Typ == TitleItemToken.Types.Speciality)
                {
                    Speciality = tit1.Value;
                }
            }
            if (upWords > 4 && upWords > ((int)((0.8 * words))))
            {
                if (tstart.Previous != null && (tstart.Previous.GetReferent() is Pullenti.Ner.Person.PersonReferent))
                {
                    Rank += (5 + upWords);
                }
            }
            BeginNameToken = tstart;
            EndNameToken   = tend;
            return(true);
        }
예제 #4
0
 void CorrectWordsByMerging(Pullenti.Morph.MorphLang lang)
 {
     for (Pullenti.Ner.Token t = FirstToken; t != null && t.Next != null; t = t.Next)
     {
         if (!t.Chars.IsLetter || (t.LengthChar < 2))
         {
             continue;
         }
         Pullenti.Morph.MorphClass mc0 = t.GetMorphClassInDictionary();
         if (t.Morph.ContainsAttr("прдктв.", null))
         {
             continue;
         }
         Pullenti.Ner.Token t1 = t.Next;
         if (t1.IsHiphen && t1.Next != null && !t1.IsNewlineAfter)
         {
             t1 = t1.Next;
         }
         if (t1.LengthChar == 1)
         {
             continue;
         }
         if (!t1.Chars.IsLetter || !t.Chars.IsLetter || t1.Chars.IsLatinLetter != t.Chars.IsLatinLetter)
         {
             continue;
         }
         if (t1.Chars.IsAllUpper && !t.Chars.IsAllUpper)
         {
             continue;
         }
         else if (!t1.Chars.IsAllLower)
         {
             continue;
         }
         else if (t.Chars.IsAllUpper)
         {
             continue;
         }
         if (t1.Morph.ContainsAttr("прдктв.", null))
         {
             continue;
         }
         Pullenti.Morph.MorphClass mc1 = t1.GetMorphClassInDictionary();
         if (!mc1.IsUndefined && !mc0.IsUndefined)
         {
             continue;
         }
         if (((t as Pullenti.Ner.TextToken).Term.Length + (t1 as Pullenti.Ner.TextToken).Term.Length) < 6)
         {
             continue;
         }
         string corw = (t as Pullenti.Ner.TextToken).Term + (t1 as Pullenti.Ner.TextToken).Term;
         List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null);
         if (ccc == null || ccc.Count != 1)
         {
             continue;
         }
         if (corw == "ПОСТ" || corw == "ВРЕД")
         {
             continue;
         }
         Pullenti.Ner.TextToken tt = new Pullenti.Ner.TextToken(ccc[0], this, t.BeginChar, t1.EndChar);
         if (tt.GetMorphClassInDictionary().IsUndefined)
         {
             continue;
         }
         tt.Chars = t.Chars;
         if (t == FirstToken)
         {
             FirstToken = tt;
         }
         else
         {
             t.Previous.Next = tt;
         }
         if (t1.Next != null)
         {
             tt.Next = t1.Next;
         }
         t = tt;
     }
 }
예제 #5
0
        void MergeLetters()
        {
            bool          beforeWord = false;
            StringBuilder tmp        = new StringBuilder();

            for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next)
            {
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (!tt.Chars.IsLetter || tt.LengthChar != 1)
                {
                    beforeWord = false;
                    continue;
                }
                int i = t.WhitespacesBeforeCount;
                if (i > 2 || ((i == 2 && beforeWord)))
                {
                }
                else
                {
                    beforeWord = false;
                    continue;
                }
                i = 0;
                Pullenti.Ner.Token t1;
                tmp.Length = 0;
                tmp.Append(tt.GetSourceText());
                for (t1 = t; t1.Next != null; t1 = t1.Next)
                {
                    tt = t1.Next as Pullenti.Ner.TextToken;
                    if (tt.LengthChar != 1 || tt.WhitespacesBeforeCount != 1)
                    {
                        break;
                    }
                    i++;
                    tmp.Append(tt.GetSourceText());
                }
                if (i > 3 || ((i > 1 && beforeWord)))
                {
                }
                else
                {
                    beforeWord = false;
                    continue;
                }
                beforeWord = false;
                List <Pullenti.Morph.MorphToken> mt = Pullenti.Morph.MorphologyService.Process(tmp.ToString(), null, null);
                if (mt == null || mt.Count != 1)
                {
                    t = t1;
                    continue;
                }
                foreach (Pullenti.Morph.MorphWordForm wf in mt[0].WordForms)
                {
                    if (wf.IsInDictionary)
                    {
                        beforeWord = true;
                        break;
                    }
                }
                if (!beforeWord)
                {
                    t = t1;
                    continue;
                }
                tt = new Pullenti.Ner.TextToken(mt[0], this, t.BeginChar, t1.EndChar);
                if (t == FirstToken)
                {
                    FirstToken = tt;
                }
                else
                {
                    tt.Previous = t.Previous;
                }
                tt.Next = t1.Next;
                t       = tt;
            }
        }
예제 #6
0
 private Line(Pullenti.Ner.Token begin, Pullenti.Ner.Token end) : base(begin, end, null)
 {
 }
예제 #7
0
 void ClearDust()
 {
     for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next)
     {
         int cou  = CalcAbnormalCoef(t);
         int norm = 0;
         if (cou < 1)
         {
             continue;
         }
         Pullenti.Ner.Token t1 = t;
         for (Pullenti.Ner.Token tt = t; tt != null; tt = tt.Next)
         {
             int co = CalcAbnormalCoef(tt);
             if (co == 0)
             {
                 continue;
             }
             if (co < 0)
             {
                 norm++;
                 if (norm > 1)
                 {
                     break;
                 }
             }
             else
             {
                 norm = 0;
                 cou += co;
                 t1   = tt;
             }
         }
         int len = t1.EndChar - t.BeginChar;
         if (cou > 20 && len > 500)
         {
             for (int p = t.BeginChar; p < t1.EndChar; p++)
             {
                 if (Sofa.Text[p] == Sofa.Text[p + 1])
                 {
                     len--;
                 }
             }
             if (len > 500)
             {
                 if (t.Previous != null)
                 {
                     t.Previous.Next = t1.Next;
                 }
                 else
                 {
                     FirstToken = t1.Next;
                 }
                 t = t1;
             }
             else
             {
                 t = t1;
             }
         }
         else
         {
             t = t1;
         }
     }
 }
예제 #8
0
        public static void RefreshGenerals(Pullenti.Ner.Processor proc, Pullenti.Ner.Core.AnalysisKit kit)
        {
            Dictionary <string, Dictionary <string, List <Pullenti.Ner.Referent> > > all = new Dictionary <string, Dictionary <string, List <Pullenti.Ner.Referent> > >();
            List <Node> allRefs = new List <Node>();

            foreach (Pullenti.Ner.Analyzer a in proc.Analyzers)
            {
                Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(a);
                if (ad == null)
                {
                    continue;
                }
                foreach (Pullenti.Ner.Referent r in ad.Referents)
                {
                    Node nod = new Node()
                    {
                        Ref = r, Ad = ad
                    };
                    allRefs.Add(nod);
                    r.Tag = nod;
                    Dictionary <string, List <Pullenti.Ner.Referent> > si;
                    if (!all.TryGetValue(a.Name, out si))
                    {
                        all.Add(a.Name, (si = new Dictionary <string, List <Pullenti.Ner.Referent> >()));
                    }
                    List <string> strs = r.GetCompareStrings();
                    if (strs == null || strs.Count == 0)
                    {
                        continue;
                    }
                    foreach (string s in strs)
                    {
                        if (s == null)
                        {
                            continue;
                        }
                        List <Pullenti.Ner.Referent> li;
                        if (!si.TryGetValue(s, out li))
                        {
                            si.Add(s, (li = new List <Pullenti.Ner.Referent>()));
                        }
                        li.Add(r);
                    }
                }
            }
            foreach (Node r in allRefs)
            {
                foreach (Pullenti.Ner.Slot s in r.Ref.Slots)
                {
                    if (s.Value is Pullenti.Ner.Referent)
                    {
                        Pullenti.Ner.Referent to = s.Value as Pullenti.Ner.Referent;
                        Node tn = to.Tag as Node;
                        if (tn == null)
                        {
                            continue;
                        }
                        if (tn.RefsFrom == null)
                        {
                            tn.RefsFrom = new List <Node>();
                        }
                        tn.RefsFrom.Add(r);
                        if (r.RefsTo == null)
                        {
                            r.RefsTo = new List <Node>();
                        }
                        r.RefsTo.Add(tn);
                    }
                }
            }
            foreach (Dictionary <string, List <Pullenti.Ner.Referent> > ty in all.Values)
            {
                foreach (List <Pullenti.Ner.Referent> li in ty.Values)
                {
                    if (li.Count < 2)
                    {
                        continue;
                    }
                    if (li.Count > 3000)
                    {
                        continue;
                    }
                    for (int i = 0; i < li.Count; i++)
                    {
                        for (int j = i + 1; j < li.Count; j++)
                        {
                            Node n1 = null;
                            Node n2 = null;
                            if (li[i].CanBeGeneralFor(li[j]) && !li[j].CanBeGeneralFor(li[i]))
                            {
                                n1 = li[i].Tag as Node;
                                n2 = li[j].Tag as Node;
                            }
                            else if (li[j].CanBeGeneralFor(li[i]) && !li[i].CanBeGeneralFor(li[j]))
                            {
                                n1 = li[j].Tag as Node;
                                n2 = li[i].Tag as Node;
                            }
                            if (n1 != null && n2 != null)
                            {
                                if (n1.GenFrom == null)
                                {
                                    n1.GenFrom = new List <Node>();
                                }
                                if (!n1.GenFrom.Contains(n2))
                                {
                                    n1.GenFrom.Add(n2);
                                }
                                if (n2.GenTo == null)
                                {
                                    n2.GenTo = new List <Node>();
                                }
                                if (!n2.GenTo.Contains(n1))
                                {
                                    n2.GenTo.Add(n1);
                                }
                            }
                        }
                    }
                }
            }
            foreach (Node n in allRefs)
            {
                if (n.GenTo != null && n.GenTo.Count > 1)
                {
                    for (int i = n.GenTo.Count - 1; i >= 0; i--)
                    {
                        Node p   = n.GenTo[i];
                        bool del = false;
                        for (int j = 0; j < n.GenTo.Count; j++)
                        {
                            if (j != i && n.GenTo[j].IsInGenParentsOrHigher(p))
                            {
                                del = true;
                            }
                        }
                        if (del)
                        {
                            p.GenFrom.Remove(n);
                            n.GenTo.RemoveAt(i);
                        }
                    }
                }
            }
            foreach (Node n in allRefs)
            {
                if (!n.Deleted && n.GenTo != null && n.GenTo.Count == 1)
                {
                    Node p = n.GenTo[0];
                    if (p.GenFrom.Count == 1)
                    {
                        n.Ref.MergeSlots(p.Ref, true);
                        p.Ref.Tag = n.Ref;
                        p.ReplaceValues(n);
                        foreach (Pullenti.Ner.TextAnnotation o in p.Ref.Occurrence)
                        {
                            n.Ref.AddOccurence(o);
                        }
                        p.Deleted = true;
                    }
                    else
                    {
                        n.Ref.GeneralReferent = p.Ref;
                    }
                }
            }
            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                _correctReferents(t);
            }
            foreach (Node n in allRefs)
            {
                if (n.Deleted)
                {
                    n.Ad.RemoveReferent(n.Ref);
                }
                n.Ref.Tag = null;
            }
        }
예제 #9
0
        Pullenti.Ner.ReferentToken TryAttach(Pullenti.Ner.Token t, bool keyWord)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.Token t0       = t;
            Pullenti.Ner.Token t1       = t;
            List <string>      urisKeys = null;
            List <Pullenti.Ner.Uri.UriReferent> uris = null;

            Pullenti.Ner.Referent org    = null;
            Pullenti.Ner.Referent corOrg = null;
            bool orgIsBank = false;
            int  empty     = 0;

            Pullenti.Ner.Uri.UriReferent lastUri = null;
            for (; t != null; t = t.Next)
            {
                if (t.IsTableControlChar && t != t0)
                {
                    break;
                }
                if (t.IsComma || t.Morph.Class.IsPreposition || t.IsCharOf("/\\"))
                {
                    continue;
                }
                bool bankKeyword = false;
                if (t.IsValue("ПОЛНЫЙ", null) && t.Next != null && ((t.Next.IsValue("НАИМЕНОВАНИЕ", null) || t.Next.IsValue("НАЗВАНИЕ", null))))
                {
                    t = t.Next.Next;
                    if (t == null)
                    {
                        break;
                    }
                }
                if (t.IsValue("БАНК", null))
                {
                    if ((t is Pullenti.Ner.ReferentToken) && t.GetReferent().TypeName == "ORGANIZATION")
                    {
                        bankKeyword = true;
                    }
                    Pullenti.Ner.Token tt = t.Next;
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null)
                    {
                        tt = npt.EndToken.Next;
                    }
                    if (tt != null && tt.IsChar(':'))
                    {
                        tt = tt.Next;
                    }
                    if (tt != null)
                    {
                        if (!bankKeyword)
                        {
                            t           = tt;
                            bankKeyword = true;
                        }
                        else if (tt.GetReferent() != null && tt.GetReferent().TypeName == "ORGANIZATION")
                        {
                            t = tt;
                        }
                    }
                }
                Pullenti.Ner.Referent r = t.GetReferent();
                if (r != null && r.TypeName == "ORGANIZATION")
                {
                    bool isBank = false;
                    int  kk     = 0;
                    for (Pullenti.Ner.Referent rr = r; rr != null && (kk < 4); rr = rr.ParentReferent, kk++)
                    {
                        isBank = string.Compare(rr.GetStringValue("KIND") ?? "", "Bank", true) == 0;
                        if (isBank)
                        {
                            break;
                        }
                    }
                    if (!isBank && bankKeyword)
                    {
                        isBank = true;
                    }
                    if (!isBank && uris != null && urisKeys.Contains("ИНН"))
                    {
                        return(null);
                    }
                    if ((lastUri != null && lastUri.Scheme == "К/С" && t.Previous != null) && t.Previous.IsValue("В", null))
                    {
                        corOrg = r;
                        t1     = t;
                    }
                    else if (org == null || ((!orgIsBank && isBank)))
                    {
                        org       = r;
                        t1        = t;
                        orgIsBank = isBank;
                        if (isBank)
                        {
                            continue;
                        }
                    }
                    if (uris == null && !keyWord)
                    {
                        return(null);
                    }
                    continue;
                }
                if (r is Pullenti.Ner.Uri.UriReferent)
                {
                    Pullenti.Ner.Uri.UriReferent u = r as Pullenti.Ner.Uri.UriReferent;
                    if (uris == null)
                    {
                        if (!_isBankReq(u.Scheme))
                        {
                            return(null);
                        }
                        if (u.Scheme == "ИНН" && t.IsNewlineAfter)
                        {
                            return(null);
                        }
                        uris     = new List <Pullenti.Ner.Uri.UriReferent>();
                        urisKeys = new List <string>();
                    }
                    else
                    {
                        if (!_isBankReq(u.Scheme))
                        {
                            break;
                        }
                        if (urisKeys.Contains(u.Scheme))
                        {
                            break;
                        }
                        if (u.Scheme == "ИНН")
                        {
                            if (empty > 0)
                            {
                                break;
                            }
                        }
                    }
                    urisKeys.Add(u.Scheme);
                    uris.Add(u);
                    lastUri = u;
                    t1      = t;
                    empty   = 0;
                    continue;
                }
                else if (uris == null && !keyWord && !orgIsBank)
                {
                    return(null);
                }
                if (r != null && ((r.TypeName == "GEO" || r.TypeName == "ADDRESS")))
                {
                    empty++;
                    continue;
                }
                if (t is Pullenti.Ner.TextToken)
                {
                    if (t.IsValue("ПОЛНЫЙ", null) || t.IsValue("НАИМЕНОВАНИЕ", null) || t.IsValue("НАЗВАНИЕ", null))
                    {
                    }
                    else if (t.Chars.IsLetter)
                    {
                        Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                        if (tok != null)
                        {
                            t     = tok.EndToken;
                            empty = 0;
                        }
                        else
                        {
                            empty++;
                            if (t.IsNewlineBefore)
                            {
                                Pullenti.Ner.Core.NounPhraseToken nnn = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                                if (nnn != null && nnn.EndToken.Next != null && nnn.EndToken.Next.IsChar(':'))
                                {
                                    break;
                                }
                            }
                        }
                        if (uris == null)
                        {
                            break;
                        }
                    }
                }
                if (empty > 2)
                {
                    break;
                }
                if (empty > 0 && t.IsChar(':') && t.IsNewlineAfter)
                {
                    break;
                }
                if (((t is Pullenti.Ner.NumberToken) && t.IsNewlineBefore && t.Next != null) && !t.Next.Chars.IsLetter)
                {
                    break;
                }
            }
            if (uris == null)
            {
                return(null);
            }
            if (!urisKeys.Contains("Р/С") && !urisKeys.Contains("Л/С"))
            {
                return(null);
            }
            bool ok = false;

            if ((uris.Count < 2) && org == null)
            {
                return(null);
            }
            BankDataReferent bdr = new BankDataReferent();

            foreach (Pullenti.Ner.Uri.UriReferent u in uris)
            {
                bdr.AddSlot(BankDataReferent.ATTR_ITEM, u, false, 0);
            }
            if (org != null)
            {
                bdr.AddSlot(BankDataReferent.ATTR_BANK, org, false, 0);
            }
            if (corOrg != null)
            {
                bdr.AddSlot(BankDataReferent.ATTR_CORBANK, corOrg, false, 0);
            }
            Pullenti.Ner.Referent org0 = (t0.Previous == null ? null : t0.Previous.GetReferent());
            if (org0 != null && org0.TypeName == "ORGANIZATION")
            {
                foreach (Pullenti.Ner.Slot s in org0.Slots)
                {
                    if (s.Value is Pullenti.Ner.Uri.UriReferent)
                    {
                        Pullenti.Ner.Uri.UriReferent u = s.Value as Pullenti.Ner.Uri.UriReferent;
                        if (_isBankReq(u.Scheme))
                        {
                            if (!urisKeys.Contains(u.Scheme))
                            {
                                bdr.AddSlot(BankDataReferent.ATTR_ITEM, u, false, 0);
                            }
                        }
                    }
                }
            }
            return(new Pullenti.Ner.ReferentToken(bdr, t0, t1));
        }
예제 #10
0
 public ParenthesisToken(Pullenti.Ner.Token b, Pullenti.Ner.Token e) : base(b, e, null)
 {
 }
예제 #11
0
        public static ParenthesisToken TryAttach(Pullenti.Ner.Token t)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.Core.TerminToken tok = m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
            if (tok != null)
            {
                ParenthesisToken res = new ParenthesisToken(t, tok.EndToken);
                return(res);
            }
            if (!(t is Pullenti.Ner.TextToken))
            {
                return(null);
            }
            Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
            bool ok = false;

            Pullenti.Ner.Token t1;
            if (mc.IsAdverb)
            {
                ok = true;
            }
            else if (mc.IsAdjective)
            {
                if (t.Morph.ContainsAttr("сравн.", null) && t.Morph.ContainsAttr("кач.прил.", null))
                {
                    ok = true;
                }
            }
            if (ok && t.Next != null)
            {
                if (t.Next.IsChar(','))
                {
                    return(new ParenthesisToken(t, t));
                }
                t1 = t.Next;
                if (t1.GetMorphClassInDictionary() == Pullenti.Morph.MorphClass.Verb)
                {
                    if (t1.Morph.ContainsAttr("н.вр.", null) && t1.Morph.ContainsAttr("нес.в.", null) && t1.Morph.ContainsAttr("дейст.з.", null))
                    {
                        return(new ParenthesisToken(t, t1));
                    }
                }
            }
            t1 = null;
            if ((t.IsValue("В", null) && t.Next != null && t.Next.IsValue("СООТВЕТСТВИЕ", null)) && t.Next.Next != null && t.Next.Next.Morph.Class.IsPreposition)
            {
                t1 = t.Next.Next.Next;
            }
            else if (t.IsValue("СОГЛАСНО", null))
            {
                t1 = t.Next;
            }
            else if (t.IsValue("В", null) && t.Next != null)
            {
                if (t.Next.IsValue("СИЛА", null))
                {
                    t1 = t.Next.Next;
                }
                else if (t.Next.Morph.Class.IsAdjective || t.Next.Morph.Class.IsPronoun)
                {
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null)
                    {
                        if (npt.Noun.IsValue("ВИД", null) || npt.Noun.IsValue("СЛУЧАЙ", null) || npt.Noun.IsValue("СФЕРА", null))
                        {
                            return(new ParenthesisToken(t, npt.EndToken));
                        }
                    }
                }
            }
            if (t1 != null)
            {
                if (t1.Next != null)
                {
                    Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt1 != null)
                    {
                        if (npt1.Noun.IsValue("НОРМА", null) || npt1.Noun.IsValue("ПОЛОЖЕНИЕ", null) || npt1.Noun.IsValue("УКАЗАНИЕ", null))
                        {
                            t1 = npt1.EndToken.Next;
                        }
                    }
                }
                Pullenti.Ner.Referent r = t1.GetReferent();
                if (r != null)
                {
                    ParenthesisToken res = new ParenthesisToken(t, t1)
                    {
                        Ref = r
                    };
                    if (t1.Next != null && t1.Next.IsComma)
                    {
                        bool sila = false;
                        for (Pullenti.Ner.Token ttt = t1.Next.Next; ttt != null; ttt = ttt.Next)
                        {
                            if (ttt.IsValue("СИЛА", null) || ttt.IsValue("ДЕЙСТВИЕ", null))
                            {
                                sila = true;
                                continue;
                            }
                            if (ttt.IsComma)
                            {
                                if (sila)
                                {
                                    res.EndToken = ttt.Previous;
                                }
                                break;
                            }
                            if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(ttt, false, false))
                            {
                                break;
                            }
                        }
                    }
                    return(res);
                }
                Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    return(new ParenthesisToken(t, npt.EndToken));
                }
            }
            Pullenti.Ner.Token tt = t;
            if (tt.IsValue("НЕ", null) && t != null)
            {
                tt = tt.Next;
            }
            if (tt.Morph.Class.IsPreposition && tt != null)
            {
                tt = tt.Next;
                Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt1 != null)
                {
                    tt = npt1.EndToken;
                    if (tt.Next != null && tt.Next.IsComma)
                    {
                        return(new ParenthesisToken(t, tt.Next));
                    }
                    if (npt1.Noun.IsValue("ОЧЕРЕДЬ", null))
                    {
                        return(new ParenthesisToken(t, tt));
                    }
                }
            }
            if (t.IsValue("ВЕДЬ", null))
            {
                return(new ParenthesisToken(t, t));
            }
            return(null);
        }
예제 #12
0
        static Pullenti.Ner.Core.NumberExToken _correctMoney(Pullenti.Ner.Core.NumberExToken res, Pullenti.Ner.Token t1)
        {
            if (t1 == null)
            {
                return(null);
            }
            List <Pullenti.Ner.Core.TerminToken> toks = m_Postfixes.TryParseAll(t1, Pullenti.Ner.Core.TerminParseAttr.No);

            if (toks == null || toks.Count == 0)
            {
                return(null);
            }
            Pullenti.Ner.Token    tt = toks[0].EndToken.Next;
            Pullenti.Ner.Referent r  = (tt == null ? null : tt.GetReferent());
            string alpha2            = null;

            if (r != null && r.TypeName == "GEO")
            {
                alpha2 = r.GetStringValue("ALPHA2");
            }
            if (alpha2 != null && toks.Count > 0)
            {
                for (int i = toks.Count - 1; i >= 0; i--)
                {
                    if (!toks[i].Termin.CanonicText.StartsWith(alpha2))
                    {
                        toks.RemoveAt(i);
                    }
                }
                if (toks.Count == 0)
                {
                    toks = m_Postfixes.TryParseAll(t1, Pullenti.Ner.Core.TerminParseAttr.No);
                }
            }
            if (toks.Count > 1)
            {
                alpha2 = null;
                string str = toks[0].Termin.Terms[0].CanonicalText;
                if (str == "РУБЛЬ" || str == "RUBLE")
                {
                    alpha2 = "RU";
                }
                else if (str == "ДОЛЛАР" || str == "ДОЛАР" || str == "DOLLAR")
                {
                    alpha2 = "US";
                }
                else if (str == "ФУНТ" || str == "POUND")
                {
                    alpha2 = "UK";
                }
                if (alpha2 != null)
                {
                    for (int i = toks.Count - 1; i >= 0; i--)
                    {
                        if (!toks[i].Termin.CanonicText.StartsWith(alpha2) && toks[i].Termin.CanonicText != "GBP")
                        {
                            toks.RemoveAt(i);
                        }
                    }
                }
                alpha2 = null;
            }
            if (toks.Count < 1)
            {
                return(null);
            }
            res.ExTypParam = toks[0].Termin.CanonicText;
            if (alpha2 != null && tt != null)
            {
                res.EndToken = tt;
            }
            tt = res.EndToken.Next;
            if (tt != null && tt.IsCommaAnd)
            {
                tt = tt.Next;
            }
            if ((tt is Pullenti.Ner.NumberToken) && tt.Next != null && (tt.WhitespacesAfterCount < 4))
            {
                Pullenti.Ner.Token tt1 = tt.Next;
                if ((tt1 != null && tt1.IsChar('(') && (tt1.Next is Pullenti.Ner.NumberToken)) && tt1.Next.Next != null && tt1.Next.Next.IsChar(')'))
                {
                    if ((tt as Pullenti.Ner.NumberToken).Value == (tt1.Next as Pullenti.Ner.NumberToken).Value)
                    {
                        tt1 = tt1.Next.Next.Next;
                    }
                }
                Pullenti.Ner.Core.TerminToken tok = m_SmallMoney.TryParse(tt1, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok == null && tt1 != null && tt1.IsChar(')'))
                {
                    tok = m_SmallMoney.TryParse(tt1.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                }
                if (tok != null && (tt as Pullenti.Ner.NumberToken).IntValue != null)
                {
                    int max = (int)tok.Termin.Tag;
                    int val = (tt as Pullenti.Ner.NumberToken).IntValue.Value;
                    if (val < max)
                    {
                        double f = (double)val;
                        f /= max;
                        double f0  = res.RealValue - ((long)res.RealValue);
                        int    re0 = (int)(((f0 * 100) + 0.0001));
                        if (re0 > 0 && val != re0)
                        {
                            res.AltRestMoney = val;
                        }
                        else if (f0 == 0)
                        {
                            res.RealValue += f;
                        }
                        f0  = res.AltRealValue - ((long)res.AltRealValue);
                        re0 = (int)(((f0 * 100) + 0.0001));
                        if (re0 > 0 && val != re0)
                        {
                            res.AltRestMoney = val;
                        }
                        else if (f0 == 0)
                        {
                            res.AltRealValue += f;
                        }
                        res.EndToken = tok.EndToken;
                    }
                }
            }
            else if ((tt is Pullenti.Ner.TextToken) && tt.IsValue("НОЛЬ", null))
            {
                Pullenti.Ner.Core.TerminToken tok = m_SmallMoney.TryParse(tt.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok != null)
                {
                    res.EndToken = tok.EndToken;
                }
            }
            return(res);
        }
예제 #13
0
        public static Pullenti.Ner.Core.NumberExToken TryParseNumberWithPostfix(Pullenti.Ner.Token t)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.Token t0       = t;
            string             isDollar = null;

            if (t.LengthChar == 1 && t.Next != null)
            {
                if ((((isDollar = Pullenti.Ner.Core.NumberHelper.IsMoneyChar(t)))) != null)
                {
                    t = t.Next;
                }
            }
            Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
            if (nt == null)
            {
                if ((!(t.Previous is Pullenti.Ner.NumberToken) && t.IsChar('(') && (t.Next is Pullenti.Ner.NumberToken)) && t.Next.Next != null && t.Next.Next.IsChar(')'))
                {
                    Pullenti.Ner.Core.TerminToken toks1 = m_Postfixes.TryParse(t.Next.Next.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                    if (toks1 != null && ((Pullenti.Ner.Core.NumberExType)toks1.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money)
                    {
                        Pullenti.Ner.NumberToken        nt0 = t.Next as Pullenti.Ner.NumberToken;
                        Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, toks1.EndToken, nt0.Value, nt0.Typ, Pullenti.Ner.Core.NumberExType.Money)
                        {
                            AltRealValue = nt0.RealValue, Morph = toks1.BeginToken.Morph
                        };
                        return(_correctMoney(res, toks1.BeginToken));
                    }
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt == null || !tt.Morph.Class.IsAdjective)
                {
                    return(null);
                }
                string val = tt.Term;
                for (int i = 4; i < (val.Length - 5); i++)
                {
                    string v = val.Substring(0, i);
                    List <Pullenti.Ner.Core.Termin> li = Pullenti.Ner.Core.NumberHelper.m_Nums.FindTerminsByString(v, tt.Morph.Language);
                    if (li == null)
                    {
                        continue;
                    }
                    string vv = val.Substring(i);
                    List <Pullenti.Ner.Core.Termin> lii = m_Postfixes.FindTerminsByString(vv, tt.Morph.Language);
                    if (lii != null && lii.Count > 0)
                    {
                        Pullenti.Ner.Core.NumberExToken re = new Pullenti.Ner.Core.NumberExToken(t, t, ((int)li[0].Tag).ToString(), Pullenti.Ner.NumberSpellingType.Words, (Pullenti.Ner.Core.NumberExType)lii[0].Tag)
                        {
                            Morph = t.Morph
                        };
                        _correctExtTypes(re);
                        return(re);
                    }
                    break;
                }
                return(null);
            }
            if (t.Next == null && isDollar == null)
            {
                return(null);
            }
            double f = nt.RealValue;

            if (double.IsNaN(f))
            {
                return(null);
            }
            Pullenti.Ner.Token t1 = nt.Next;
            if (((t1 != null && t1.IsCharOf(",."))) || (((t1 is Pullenti.Ner.NumberToken) && (t1.WhitespacesBeforeCount < 3))))
            {
                double d;
                Pullenti.Ner.NumberToken tt11 = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(nt, false, false);
                if (tt11 != null)
                {
                    t1 = tt11.EndToken.Next;
                    f  = tt11.RealValue;
                }
            }
            if (t1 == null)
            {
                if (isDollar == null)
                {
                    return(null);
                }
            }
            else if ((t1.Next != null && t1.Next.IsValue("С", "З") && t1.Next.Next != null) && t1.Next.Next.IsValue("ПОЛОВИНА", null))
            {
                f += 0.5;
                t1 = t1.Next.Next;
            }
            if (t1 != null && t1.IsHiphen && t1.Next != null)
            {
                t1 = t1.Next;
            }
            bool   det  = false;
            double altf = f;

            if (((t1 is Pullenti.Ner.NumberToken) && t1.Previous != null && t1.Previous.IsHiphen) && (t1 as Pullenti.Ner.NumberToken).IntValue == 0 && t1.LengthChar == 2)
            {
                t1 = t1.Next;
            }
            if ((t1 != null && t1.Next != null && t1.IsChar('(')) && (((t1.Next is Pullenti.Ner.NumberToken) || t1.Next.IsValue("НОЛЬ", null))) && t1.Next.Next != null)
            {
                Pullenti.Ner.NumberToken nt1 = t1.Next as Pullenti.Ner.NumberToken;
                double val = (double)0;
                if (nt1 != null)
                {
                    val = nt1.RealValue;
                }
                if (Math.Floor(f) == Math.Floor(val))
                {
                    Pullenti.Ner.Token ttt = t1.Next.Next;
                    if (ttt.IsChar(')'))
                    {
                        t1  = ttt.Next;
                        det = true;
                        if ((t1 is Pullenti.Ner.NumberToken) && (t1 as Pullenti.Ner.NumberToken).IntValue != null && (t1 as Pullenti.Ner.NumberToken).IntValue.Value == 0)
                        {
                            t1 = t1.Next;
                        }
                    }
                    else if (((((ttt is Pullenti.Ner.NumberToken) && ((ttt as Pullenti.Ner.NumberToken).RealValue < 100) && ttt.Next != null) && ttt.Next.IsChar('/') && ttt.Next.Next != null) && ttt.Next.Next.GetSourceText() == "100" && ttt.Next.Next.Next != null) && ttt.Next.Next.Next.IsChar(')'))
                    {
                        int rest = GetDecimalRest100(f);
                        if ((ttt as Pullenti.Ner.NumberToken).IntValue != null && rest == (ttt as Pullenti.Ner.NumberToken).IntValue.Value)
                        {
                            t1  = ttt.Next.Next.Next.Next;
                            det = true;
                        }
                    }
                    else if ((ttt.IsValue("ЦЕЛЫХ", null) && (ttt.Next is Pullenti.Ner.NumberToken) && ttt.Next.Next != null) && ttt.Next.Next.Next != null && ttt.Next.Next.Next.IsChar(')'))
                    {
                        Pullenti.Ner.NumberToken num2 = ttt.Next as Pullenti.Ner.NumberToken;
                        altf = num2.RealValue;
                        if (ttt.Next.Next.IsValue("ДЕСЯТЫЙ", null))
                        {
                            altf /= 10;
                        }
                        else if (ttt.Next.Next.IsValue("СОТЫЙ", null))
                        {
                            altf /= 100;
                        }
                        else if (ttt.Next.Next.IsValue("ТЫСЯЧНЫЙ", null))
                        {
                            altf /= 1000;
                        }
                        else if (ttt.Next.Next.IsValue("ДЕСЯТИТЫСЯЧНЫЙ", null))
                        {
                            altf /= 10000;
                        }
                        else if (ttt.Next.Next.IsValue("СТОТЫСЯЧНЫЙ", null))
                        {
                            altf /= 100000;
                        }
                        else if (ttt.Next.Next.IsValue("МИЛЛИОННЫЙ", null))
                        {
                            altf /= 1000000;
                        }
                        if (altf < 1)
                        {
                            altf += val;
                            t1    = ttt.Next.Next.Next.Next;
                            det   = true;
                        }
                    }
                    else
                    {
                        Pullenti.Ner.Core.TerminToken toks1 = m_Postfixes.TryParse(ttt, Pullenti.Ner.Core.TerminParseAttr.No);
                        if (toks1 != null)
                        {
                            if (((Pullenti.Ner.Core.NumberExType)toks1.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money)
                            {
                                if (toks1.EndToken.Next != null && toks1.EndToken.Next.IsChar(')'))
                                {
                                    Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, toks1.EndToken.Next, nt.Value, nt.Typ, Pullenti.Ner.Core.NumberExType.Money)
                                    {
                                        RealValue = f, AltRealValue = altf, Morph = toks1.BeginToken.Morph
                                    };
                                    return(_correctMoney(res, toks1.BeginToken));
                                }
                            }
                        }
                        Pullenti.Ner.Core.NumberExToken res2 = TryParseNumberWithPostfix(t1.Next);
                        if (res2 != null && res2.EndToken.Next != null && res2.EndToken.Next.IsChar(')'))
                        {
                            res2.BeginToken   = t;
                            res2.EndToken     = res2.EndToken.Next;
                            res2.AltRealValue = res2.RealValue;
                            res2.RealValue    = f;
                            _correctExtTypes(res2);
                            if (res2.WhitespacesAfterCount < 2)
                            {
                                Pullenti.Ner.Core.TerminToken toks2 = m_Postfixes.TryParse(res2.EndToken.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                                if (toks2 != null)
                                {
                                    if (((Pullenti.Ner.Core.NumberExType)toks2.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money)
                                    {
                                        res2.EndToken = toks2.EndToken;
                                    }
                                }
                            }
                            return(res2);
                        }
                    }
                }
                else if (nt1 != null && nt1.Typ == Pullenti.Ner.NumberSpellingType.Words && nt.Typ == Pullenti.Ner.NumberSpellingType.Digit)
                {
                    altf = nt1.RealValue;
                    Pullenti.Ner.Token ttt = t1.Next.Next;
                    if (ttt.IsChar(')'))
                    {
                        t1  = ttt.Next;
                        det = true;
                    }
                    if (!det)
                    {
                        altf = f;
                    }
                }
            }
            if ((t1 != null && t1.IsChar('(') && t1.Next != null) && t1.Next.IsValue("СУММА", null))
            {
                Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t1, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                if (br != null)
                {
                    t1 = br.EndToken.Next;
                }
            }
            if (isDollar != null)
            {
                Pullenti.Ner.Token te = null;
                if (t1 != null)
                {
                    te = t1.Previous;
                }
                else
                {
                    for (t1 = t0; t1 != null; t1 = t1.Next)
                    {
                        if (t1.Next == null)
                        {
                            te = t1;
                        }
                    }
                }
                if (te == null)
                {
                    return(null);
                }
                if (te.IsHiphen && te.Next != null)
                {
                    if (te.Next.IsValue("МИЛЛИОННЫЙ", null))
                    {
                        f    *= 1000000;
                        altf *= 1000000;
                        te    = te.Next;
                    }
                    else if (te.Next.IsValue("МИЛЛИАРДНЫЙ", null))
                    {
                        f    *= 1000000000;
                        altf *= 1000000000;
                        te    = te.Next;
                    }
                }
                if (!te.IsWhitespaceAfter && (te.Next is Pullenti.Ner.TextToken))
                {
                    if (te.Next.IsValue("M", null))
                    {
                        f    *= 1000000;
                        altf *= 1000000;
                        te    = te.Next;
                    }
                    else if (te.Next.IsValue("BN", null))
                    {
                        f    *= 1000000000;
                        altf *= 1000000000;
                        te    = te.Next;
                    }
                }
                return(new Pullenti.Ner.Core.NumberExToken(t0, te, "", nt.Typ, Pullenti.Ner.Core.NumberExType.Money)
                {
                    RealValue = f, AltRealValue = altf, ExTypParam = isDollar
                });
            }
            if (t1 == null || ((t1.IsNewlineBefore && !det)))
            {
                return(null);
            }
            Pullenti.Ner.Core.TerminToken toks = m_Postfixes.TryParse(t1, Pullenti.Ner.Core.TerminParseAttr.No);
            if ((toks == null && det && (t1 is Pullenti.Ner.NumberToken)) && (t1 as Pullenti.Ner.NumberToken).Value == "0")
            {
                toks = m_Postfixes.TryParse(t1.Next, Pullenti.Ner.Core.TerminParseAttr.No);
            }
            if (toks == null && t1.IsChar('р'))
            {
                int cou = 10;
                for (Pullenti.Ner.Token ttt = t0.Previous; ttt != null && cou > 0; ttt = ttt.Previous, cou--)
                {
                    if (ttt.IsValue("СУММА", null) || ttt.IsValue("НАЛИЧНЫЙ", null) || ttt.IsValue("БАЛАНС", null))
                    {
                    }
                    else if (ttt.GetReferent() != null && ttt.GetReferent().TypeName == "MONEY")
                    {
                    }
                    else
                    {
                        continue;
                    }
                    toks = new Pullenti.Ner.Core.TerminToken(t1, t1)
                    {
                        Termin = m_Postfixes.FindTerminsByCanonicText("RUB")[0]
                    };
                    if (t1.Next != null && t1.Next.IsChar('.'))
                    {
                        toks.EndToken = t1.Next;
                    }
                    Pullenti.Ner.Core.NumberExType ty = (Pullenti.Ner.Core.NumberExType)toks.Termin.Tag;
                    return(new Pullenti.Ner.Core.NumberExToken(t, toks.EndToken, nt.Value, nt.Typ, ty)
                    {
                        RealValue = f, AltRealValue = altf, Morph = toks.BeginToken.Morph, ExTypParam = "RUB"
                    });
                }
            }
            if (toks != null)
            {
                t1 = toks.EndToken;
                if (!t1.IsChar('.') && t1.Next != null && t1.Next.IsChar('.'))
                {
                    if ((t1 is Pullenti.Ner.TextToken) && t1.IsValue(toks.Termin.Terms[0].CanonicalText, null))
                    {
                    }
                    else if (!t1.Chars.IsLetter)
                    {
                    }
                    else
                    {
                        t1 = t1.Next;
                    }
                }
                if (toks.Termin.CanonicText == "LTL")
                {
                    return(null);
                }
                if (toks.BeginToken == t1)
                {
                    if (t1.Morph.Class.IsPreposition || t1.Morph.Class.IsConjunction)
                    {
                        if (t1.IsWhitespaceBefore && t1.IsWhitespaceAfter)
                        {
                            return(null);
                        }
                    }
                }
                Pullenti.Ner.Core.NumberExType  ty  = (Pullenti.Ner.Core.NumberExType)toks.Termin.Tag;
                Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, t1, nt.Value, nt.Typ, ty)
                {
                    RealValue = f, AltRealValue = altf, Morph = toks.BeginToken.Morph
                };
                if (ty != Pullenti.Ner.Core.NumberExType.Money)
                {
                    _correctExtTypes(res);
                    return(res);
                }
                return(_correctMoney(res, toks.BeginToken));
            }
            Pullenti.Ner.Core.NumberExToken pfx = _attachSpecPostfix(t1);
            if (pfx != null)
            {
                pfx.BeginToken   = t;
                pfx.Value        = nt.Value;
                pfx.Typ          = nt.Typ;
                pfx.RealValue    = f;
                pfx.AltRealValue = altf;
                return(pfx);
            }
            if (t1.Next != null && ((t1.Morph.Class.IsPreposition || t1.Morph.Class.IsConjunction)))
            {
                if (t1.IsValue("НА", null))
                {
                }
                else
                {
                    Pullenti.Ner.Core.NumberExToken nn = TryParseNumberWithPostfix(t1.Next);
                    if (nn != null)
                    {
                        return new Pullenti.Ner.Core.NumberExToken(t, t, nt.Value, nt.Typ, nn.ExTyp)
                               {
                                   RealValue = f, AltRealValue = altf, ExTyp2 = nn.ExTyp2, ExTypParam = nn.ExTypParam
                               }
                    }
                    ;
                }
            }
            if (!t1.IsWhitespaceAfter && (t1.Next is Pullenti.Ner.NumberToken) && (t1 is Pullenti.Ner.TextToken))
            {
                string term = (t1 as Pullenti.Ner.TextToken).Term;
                Pullenti.Ner.Core.NumberExType ty = Pullenti.Ner.Core.NumberExType.Undefined;
                if (term == "СМХ" || term == "CMX")
                {
                    ty = Pullenti.Ner.Core.NumberExType.Santimeter;
                }
                else if (term == "MX" || term == "МХ")
                {
                    ty = Pullenti.Ner.Core.NumberExType.Meter;
                }
                else if (term == "MMX" || term == "ММХ")
                {
                    ty = Pullenti.Ner.Core.NumberExType.Millimeter;
                }
                if (ty != Pullenti.Ner.Core.NumberExType.Undefined)
                {
                    return new Pullenti.Ner.Core.NumberExToken(t, t1, nt.Value, nt.Typ, ty)
                           {
                               RealValue = f, AltRealValue = altf, MultAfter = true
                           }
                }
                ;
            }
            return(null);
        }
예제 #14
0
        static VerbPhraseToken TryParseRu(Pullenti.Ner.Token t, bool canBePartition, bool canBeAdjPartition, bool forceParse)
        {
            VerbPhraseToken res = null;

            Pullenti.Ner.Token t0         = t;
            Pullenti.Ner.Token not        = null;
            bool             hasVerb      = false;
            bool             verbBeBefore = false;
            PrepositionToken prep         = null;

            for (; t != null; t = t.Next)
            {
                if (!(t is Pullenti.Ner.TextToken))
                {
                    break;
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                bool isParticiple         = false;
                if (tt.Term == "НЕ")
                {
                    not = t;
                    continue;
                }
                int    ty   = 0;
                string norm = null;
                Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary();
                if (tt.Term == "НЕТ")
                {
                    if (hasVerb)
                    {
                        break;
                    }
                    ty = 1;
                }
                else if (tt.Term == "ДОПУСТИМО")
                {
                    ty = 3;
                }
                else if (mc.IsAdverb && !mc.IsVerb)
                {
                    ty = 2;
                }
                else if (tt.IsPureVerb || tt.IsVerbBe)
                {
                    ty = 1;
                    if (hasVerb)
                    {
                        if (!tt.Morph.ContainsAttr("инф.", null))
                        {
                            if (verbBeBefore)
                            {
                            }
                            else
                            {
                                break;
                            }
                        }
                    }
                }
                else if (mc.IsVerb)
                {
                    if (mc.IsPreposition || mc.IsMisc || mc.IsPronoun)
                    {
                    }
                    else if (mc.IsNoun)
                    {
                        if (tt.Term == "СТАЛИ" || tt.Term == "СТЕКЛО" || tt.Term == "БЫЛИ")
                        {
                            ty = 1;
                        }
                        else if (!tt.Chars.IsAllLower && !MiscHelper.CanBeStartOfSentence(tt))
                        {
                            ty = 1;
                        }
                        else if (mc.IsAdjective && canBePartition)
                        {
                            ty = 1;
                        }
                        else if (forceParse)
                        {
                            ty = 1;
                        }
                    }
                    else if (mc.IsProper)
                    {
                        if (tt.Chars.IsAllLower)
                        {
                            ty = 1;
                        }
                    }
                    else
                    {
                        ty = 1;
                    }
                    if (mc.IsAdjective)
                    {
                        isParticiple = true;
                    }
                    if (!tt.Morph.Case.IsUndefined)
                    {
                        isParticiple = true;
                    }
                    if (!canBePartition && isParticiple)
                    {
                        break;
                    }
                    if (hasVerb)
                    {
                        if (tt.Morph.ContainsAttr("инф.", null))
                        {
                        }
                        else if (!isParticiple)
                        {
                        }
                        else
                        {
                            break;
                        }
                    }
                }
                else if ((mc.IsAdjective && tt.Morph.ContainsAttr("к.ф.", null) && tt.Term.EndsWith("О")) && NounPhraseHelper.TryParse(tt, NounPhraseParseAttr.No, 0, null) == null)
                {
                    ty = 2;
                }
                else if (mc.IsAdjective && ((canBePartition || canBeAdjPartition)))
                {
                    if (tt.Morph.ContainsAttr("к.ф.", null) && !canBeAdjPartition)
                    {
                        break;
                    }
                    norm = tt.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Masculine, false);
                    if (norm.EndsWith("ЙШИЙ"))
                    {
                    }
                    else
                    {
                        List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, null);
                        if (grs != null && grs.Count > 0)
                        {
                            bool hVerb = false;
                            bool hPart = false;
                            foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs)
                            {
                                foreach (Pullenti.Semantic.Utils.DerivateWord w in gr.Words)
                                {
                                    if (w.Class.IsAdjective && w.Class.IsVerb)
                                    {
                                        if (w.Spelling == norm)
                                        {
                                            hPart = true;
                                        }
                                    }
                                    else if (w.Class.IsVerb)
                                    {
                                        hVerb = true;
                                    }
                                }
                            }
                            if (hPart && hVerb)
                            {
                                ty = 3;
                            }
                            else if (canBeAdjPartition)
                            {
                                ty = 3;
                            }
                            if (ty != 3 && !string.IsNullOrEmpty(grs[0].Prefix) && norm.StartsWith(grs[0].Prefix))
                            {
                                hVerb = false;
                                hPart = false;
                                string norm1 = norm.Substring(grs[0].Prefix.Length);
                                grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm1, true, null);
                                if (grs != null && grs.Count > 0)
                                {
                                    foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs)
                                    {
                                        foreach (Pullenti.Semantic.Utils.DerivateWord w in gr.Words)
                                        {
                                            if (w.Class.IsAdjective && w.Class.IsVerb)
                                            {
                                                if (w.Spelling == norm1)
                                                {
                                                    hPart = true;
                                                }
                                            }
                                            else if (w.Class.IsVerb)
                                            {
                                                hVerb = true;
                                            }
                                        }
                                    }
                                }
                                if (hPart && hVerb)
                                {
                                    ty = 3;
                                }
                            }
                        }
                    }
                }
                if (ty == 0 && t == t0 && canBePartition)
                {
                    prep = PrepositionHelper.TryParse(t);
                    if (prep != null)
                    {
                        t = prep.EndToken;
                        continue;
                    }
                }
                if (ty == 0)
                {
                    break;
                }
                if (res == null)
                {
                    res = new VerbPhraseToken(t0, t);
                }
                res.EndToken = t;
                VerbPhraseItemToken it = new VerbPhraseItemToken(t, t)
                {
                    Morph = new Pullenti.Ner.MorphCollection(t.Morph)
                };
                if (not != null)
                {
                    it.BeginToken = not;
                    it.Not        = true;
                    not           = null;
                }
                it.IsAdverb = ty == 2;
                if (prep != null && !t.Morph.Case.IsUndefined && res.Items.Count == 0)
                {
                    if (((prep.NextCase & t.Morph.Case)).IsUndefined)
                    {
                        return(null);
                    }
                    it.Morph.RemoveItems(prep.NextCase);
                    res.Preposition = prep;
                }
                if (norm == null)
                {
                    norm = t.GetNormalCaseText((ty == 3 ? Pullenti.Morph.MorphClass.Adjective : (ty == 2 ? Pullenti.Morph.MorphClass.Adverb : Pullenti.Morph.MorphClass.Verb)), Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Masculine, false);
                    if (ty == 1 && !tt.Morph.Case.IsUndefined)
                    {
                        Pullenti.Morph.MorphWordForm mi = new Pullenti.Morph.MorphWordForm()
                        {
                            Case = Pullenti.Morph.MorphCase.Nominative, Number = Pullenti.Morph.MorphNumber.Singular, Gender = Pullenti.Morph.MorphGender.Masculine
                        };
                        foreach (Pullenti.Morph.MorphBaseInfo mit in tt.Morph.Items)
                        {
                            if (mit is Pullenti.Morph.MorphWordForm)
                            {
                                mi.Misc = (mit as Pullenti.Morph.MorphWordForm).Misc;
                                break;
                            }
                        }
                        string nnn = Pullenti.Morph.MorphologyService.GetWordform("КК" + (t as Pullenti.Ner.TextToken).Term, mi);
                        if (nnn != null)
                        {
                            norm = nnn.Substring(2);
                        }
                    }
                }
                it.Normal = norm;
                res.Items.Add(it);
                if (!hasVerb && ((ty == 1 || ty == 3)))
                {
                    res.Morph = it.Morph;
                    hasVerb   = true;
                }
                if (ty == 1 || ty == 3)
                {
                    if (ty == 1 && tt.IsVerbBe)
                    {
                        verbBeBefore = true;
                    }
                    else
                    {
                        verbBeBefore = false;
                    }
                }
            }
            if (!hasVerb)
            {
                return(null);
            }
            for (int i = res.Items.Count - 1; i > 0; i--)
            {
                if (res.Items[i].IsAdverb)
                {
                    res.Items.RemoveAt(i);
                    res.EndToken = res.Items[i - 1].EndToken;
                }
                else
                {
                    break;
                }
            }
            return(res);
        }
예제 #15
0
        public static DefinitionWithNumericToken TryParse(Pullenti.Ner.Token t)
        {
            if (!Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t))
            {
                return(null);
            }
            Pullenti.Ner.Token tt = t;
            Pullenti.Ner.Core.NounPhraseToken noun = null;
            Pullenti.Ner.NumberToken          num  = null;
            for (; tt != null; tt = tt.Next)
            {
                if (tt != t && Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt))
                {
                    return(null);
                }
                if (!(tt is Pullenti.Ner.NumberToken))
                {
                    continue;
                }
                if (tt.WhitespacesAfterCount > 2 || tt == t)
                {
                    continue;
                }
                if (tt.Morph.Class.IsAdjective)
                {
                    continue;
                }
                Pullenti.Ner.Core.NounPhraseToken nn = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (nn == null)
                {
                    continue;
                }
                num  = tt as Pullenti.Ner.NumberToken;
                noun = nn;
                break;
            }
            if (num == null || num.IntValue == null)
            {
                return(null);
            }
            DefinitionWithNumericToken res = new DefinitionWithNumericToken(t, noun.EndToken);

            res.Number          = num.IntValue.Value;
            res.NumberBeginChar = num.BeginChar;
            res.NumberEndChar   = num.EndChar;
            res.Noun            = noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
            res.NounsGenetive   = noun.GetMorphVariant(Pullenti.Morph.MorphCase.Genitive, true) ?? res.Noun;
            res.Text            = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, num.Previous, Pullenti.Ner.Core.GetTextAttr.KeepQuotes | Pullenti.Ner.Core.GetTextAttr.KeepRegister);
            if (num.IsWhitespaceBefore)
            {
                res.Text += " ";
            }
            res.NumberSubstring = Pullenti.Ner.Core.MiscHelper.GetTextValue(num, noun.EndToken, Pullenti.Ner.Core.GetTextAttr.KeepQuotes | Pullenti.Ner.Core.GetTextAttr.KeepRegister);
            res.Text           += res.NumberSubstring;
            for (tt = noun.EndToken; tt != null; tt = tt.Next)
            {
                if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt))
                {
                    break;
                }
                res.EndToken = tt;
            }
            if (res.EndToken != noun.EndToken)
            {
                if (noun.IsWhitespaceAfter)
                {
                    res.Text += " ";
                }
                res.Text += Pullenti.Ner.Core.MiscHelper.GetTextValue(noun.EndToken.Next, res.EndToken, Pullenti.Ner.Core.GetTextAttr.KeepQuotes | Pullenti.Ner.Core.GetTextAttr.KeepRegister);
            }
            return(res);
        }
예제 #16
0
        public override Pullenti.Ner.ReferentToken ProcessReferent(Pullenti.Ner.Token begin, Pullenti.Ner.Token end)
        {
            List <Pullenti.Ner.Weapon.Internal.WeaponItemToken> its = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParseList(begin, 10);

            if (its == null)
            {
                return(null);
            }
            List <Pullenti.Ner.ReferentToken> rr = this.TryAttach(its, true);

            if (rr != null && rr.Count > 0)
            {
                return(rr[0]);
            }
            return(null);
        }
예제 #17
0
 public static Pullenti.Semantic.SemObject CreateNounGroup(Pullenti.Semantic.SemGraph gr, Pullenti.Ner.Core.NounPhraseToken npt)
 {
     Pullenti.Ner.Token          noun = npt.Noun.BeginToken;
     Pullenti.Semantic.SemObject sem  = new Pullenti.Semantic.SemObject(gr);
     sem.Tokens.Add(npt.Noun);
     sem.Typ = Pullenti.Semantic.SemObjectType.Noun;
     if (npt.Noun.Morph.Class.IsPersonalPronoun)
     {
         sem.Typ = Pullenti.Semantic.SemObjectType.PersonalPronoun;
     }
     else if (npt.Noun.Morph.Class.IsPronoun)
     {
         sem.Typ = Pullenti.Semantic.SemObjectType.Pronoun;
     }
     if (npt.Noun.BeginToken != npt.Noun.EndToken)
     {
         sem.Morph.NormalCase = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
         sem.Morph.NormalFull = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
         sem.Morph.Class      = Pullenti.Morph.MorphClass.Noun;
         sem.Morph.Number     = npt.Morph.Number;
         sem.Morph.Gender     = npt.Morph.Gender;
         sem.Morph.Case       = npt.Morph.Case;
     }
     else if (noun is Pullenti.Ner.TextToken)
     {
         foreach (Pullenti.Morph.MorphBaseInfo wf in noun.Morph.Items)
         {
             if (wf.CheckAccord(npt.Morph, false, false) && (wf is Pullenti.Morph.MorphWordForm))
             {
                 _setMorph(sem, wf as Pullenti.Morph.MorphWordForm);
                 break;
             }
         }
         if (sem.Morph.NormalCase == null)
         {
             sem.Morph.NormalCase = noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
             sem.Morph.NormalFull = noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
         }
         List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(sem.Morph.NormalFull, true, null);
         if (grs != null && grs.Count > 0)
         {
             sem.Concept = grs[0];
         }
     }
     else if (noun is Pullenti.Ner.ReferentToken)
     {
         Pullenti.Ner.Referent r = (noun as Pullenti.Ner.ReferentToken).Referent;
         if (r == null)
         {
             return(null);
         }
         sem.Morph.NormalFull = (sem.Morph.NormalCase = r.ToString());
         sem.Concept          = r;
     }
     else if (noun is Pullenti.Ner.NumberToken)
     {
         Pullenti.Ner.NumberToken num = noun as Pullenti.Ner.NumberToken;
         sem.Morph.Gender = noun.Morph.Gender;
         sem.Morph.Number = noun.Morph.Number;
         if (num.IntValue != null)
         {
             sem.Morph.NormalCase = Pullenti.Ner.Core.NumberHelper.GetNumberAdjective(num.IntValue.Value, noun.Morph.Gender, noun.Morph.Number);
             sem.Morph.NormalFull = Pullenti.Ner.Core.NumberHelper.GetNumberAdjective(num.IntValue.Value, Pullenti.Morph.MorphGender.Masculine, Pullenti.Morph.MorphNumber.Singular);
         }
         else
         {
             sem.Morph.NormalFull = (sem.Morph.NormalCase = noun.GetSourceText().ToUpper());
         }
     }
     noun.Tag = sem;
     if (npt.Adjectives.Count > 0)
     {
         foreach (Pullenti.Ner.MetaToken a in npt.Adjectives)
         {
             if (npt.MultiNouns && a != npt.Adjectives[0])
             {
                 break;
             }
             Pullenti.Semantic.SemObject asem = CreateNptAdj(gr, npt, a);
             if (asem != null)
             {
                 gr.AddLink(Pullenti.Semantic.SemLinkType.Detail, sem, asem, "какой", false, null);
             }
         }
     }
     if (npt.InternalNoun != null)
     {
         Pullenti.Semantic.SemObject intsem = CreateNounGroup(gr, npt.InternalNoun);
         if (intsem != null)
         {
             gr.AddLink(Pullenti.Semantic.SemLinkType.Detail, sem, intsem, null, false, null);
         }
     }
     gr.Objects.Add(sem);
     return(sem);
 }
예제 #18
0
        List <Pullenti.Ner.ReferentToken> TryAttach(List <Pullenti.Ner.Weapon.Internal.WeaponItemToken> its, bool attach)
        {
            WeaponReferent tr = new WeaponReferent();
            int            i;

            Pullenti.Ner.Token t1 = null;
            Pullenti.Ner.Weapon.Internal.WeaponItemToken noun  = null;
            Pullenti.Ner.Weapon.Internal.WeaponItemToken brand = null;
            Pullenti.Ner.Weapon.Internal.WeaponItemToken model = null;
            for (i = 0; i < its.Count; i++)
            {
                if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Noun)
                {
                    if (its.Count == 1)
                    {
                        return(null);
                    }
                    if (tr.FindSlot(WeaponReferent.ATTR_TYPE, null, true) != null)
                    {
                        if (tr.FindSlot(WeaponReferent.ATTR_TYPE, its[i].Value, true) == null)
                        {
                            break;
                        }
                    }
                    if (!its[i].IsInternal)
                    {
                        noun = its[i];
                    }
                    tr.AddSlot(WeaponReferent.ATTR_TYPE, its[i].Value, false, 0);
                    if (its[i].AltValue != null)
                    {
                        tr.AddSlot(WeaponReferent.ATTR_TYPE, its[i].AltValue, false, 0);
                    }
                    t1 = its[i].EndToken;
                    continue;
                }
                if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Brand)
                {
                    if (tr.FindSlot(WeaponReferent.ATTR_BRAND, null, true) != null)
                    {
                        if (tr.FindSlot(WeaponReferent.ATTR_BRAND, its[i].Value, true) == null)
                        {
                            break;
                        }
                    }
                    if (!its[i].IsInternal)
                    {
                        if (noun != null && noun.IsDoubt)
                        {
                            noun.IsDoubt = false;
                        }
                    }
                    brand = its[i];
                    tr.AddSlot(WeaponReferent.ATTR_BRAND, its[i].Value, false, 0);
                    t1 = its[i].EndToken;
                    continue;
                }
                if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Model)
                {
                    if (tr.FindSlot(WeaponReferent.ATTR_MODEL, null, true) != null)
                    {
                        if (tr.FindSlot(WeaponReferent.ATTR_MODEL, its[i].Value, true) == null)
                        {
                            break;
                        }
                    }
                    model = its[i];
                    tr.AddSlot(WeaponReferent.ATTR_MODEL, its[i].Value, false, 0);
                    if (its[i].AltValue != null)
                    {
                        tr.AddSlot(WeaponReferent.ATTR_MODEL, its[i].AltValue, false, 0);
                    }
                    t1 = its[i].EndToken;
                    continue;
                }
                if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Name)
                {
                    if (tr.FindSlot(WeaponReferent.ATTR_NAME, null, true) != null)
                    {
                        break;
                    }
                    tr.AddSlot(WeaponReferent.ATTR_NAME, its[i].Value, false, 0);
                    if (its[i].AltValue != null)
                    {
                        tr.AddSlot(WeaponReferent.ATTR_NAME, its[i].AltValue, false, 0);
                    }
                    t1 = its[i].EndToken;
                    continue;
                }
                if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Number)
                {
                    if (tr.FindSlot(WeaponReferent.ATTR_NUMBER, null, true) != null)
                    {
                        break;
                    }
                    tr.AddSlot(WeaponReferent.ATTR_NUMBER, its[i].Value, false, 0);
                    if (its[i].AltValue != null)
                    {
                        tr.AddSlot(WeaponReferent.ATTR_NUMBER, its[i].AltValue, false, 0);
                    }
                    t1 = its[i].EndToken;
                    continue;
                }
                if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Caliber)
                {
                    if (tr.FindSlot(WeaponReferent.ATTR_CALIBER, null, true) != null)
                    {
                        break;
                    }
                    tr.AddSlot(WeaponReferent.ATTR_CALIBER, its[i].Value, false, 0);
                    if (its[i].AltValue != null)
                    {
                        tr.AddSlot(WeaponReferent.ATTR_CALIBER, its[i].AltValue, false, 0);
                    }
                    t1 = its[i].EndToken;
                    continue;
                }
                if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Developer)
                {
                    tr.AddSlot(WeaponReferent.ATTR_REF, its[i].Ref, false, 0);
                    t1 = its[i].EndToken;
                    continue;
                }
                if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Date)
                {
                    if (tr.FindSlot(WeaponReferent.ATTR_DATE, null, true) != null)
                    {
                        break;
                    }
                    tr.AddSlot(WeaponReferent.ATTR_DATE, its[i].Ref, true, 0);
                    t1 = its[i].EndToken;
                    continue;
                }
            }
            bool           hasGoodNoun = (noun == null ? false : !noun.IsDoubt);
            WeaponReferent prev        = null;

            if (noun == null)
            {
                for (Pullenti.Ner.Token tt = its[0].BeginToken.Previous; tt != null; tt = tt.Previous)
                {
                    if ((((prev = tt.GetReferent() as WeaponReferent))) != null)
                    {
                        List <Pullenti.Ner.Slot> addSlots = new List <Pullenti.Ner.Slot>();
                        foreach (Pullenti.Ner.Slot s in prev.Slots)
                        {
                            if (s.TypeName == WeaponReferent.ATTR_TYPE)
                            {
                                tr.AddSlot(s.TypeName, s.Value, false, 0);
                            }
                            else if (s.TypeName == WeaponReferent.ATTR_BRAND || s.TypeName == WeaponReferent.ATTR_BRAND || s.TypeName == WeaponReferent.ATTR_MODEL)
                            {
                                if (tr.FindSlot(s.TypeName, null, true) == null)
                                {
                                    addSlots.Add(s);
                                }
                            }
                        }
                        foreach (Pullenti.Ner.Slot s in addSlots)
                        {
                            tr.AddSlot(s.TypeName, s.Value, false, 0);
                        }
                        hasGoodNoun = true;
                        break;
                    }
                    else if ((tt is Pullenti.Ner.TextToken) && ((!tt.Chars.IsLetter || tt.Morph.Class.IsConjunction)))
                    {
                    }
                    else
                    {
                        break;
                    }
                }
            }
            if (noun == null && model != null)
            {
                int cou = 0;
                for (Pullenti.Ner.Token tt = its[0].BeginToken.Previous; tt != null && (cou < 100); tt = tt.Previous, cou++)
                {
                    if ((((prev = tt.GetReferent() as WeaponReferent))) != null)
                    {
                        if (prev.FindSlot(WeaponReferent.ATTR_MODEL, model.Value, true) == null)
                        {
                            continue;
                        }
                        List <Pullenti.Ner.Slot> addSlots = new List <Pullenti.Ner.Slot>();
                        foreach (Pullenti.Ner.Slot s in prev.Slots)
                        {
                            if (s.TypeName == WeaponReferent.ATTR_TYPE)
                            {
                                tr.AddSlot(s.TypeName, s.Value, false, 0);
                            }
                            else if (s.TypeName == WeaponReferent.ATTR_BRAND || s.TypeName == WeaponReferent.ATTR_BRAND)
                            {
                                if (tr.FindSlot(s.TypeName, null, true) == null)
                                {
                                    addSlots.Add(s);
                                }
                            }
                        }
                        foreach (Pullenti.Ner.Slot s in addSlots)
                        {
                            tr.AddSlot(s.TypeName, s.Value, false, 0);
                        }
                        hasGoodNoun = true;
                        break;
                    }
                }
            }
            if (hasGoodNoun)
            {
            }
            else if (noun != null)
            {
                if (model != null || ((brand != null && !brand.IsDoubt)))
                {
                }
                else
                {
                    return(null);
                }
            }
            else
            {
                if (model == null)
                {
                    return(null);
                }
                int  cou = 0;
                bool ok  = false;
                for (Pullenti.Ner.Token tt = t1.Previous; tt != null && (cou < 20); tt = tt.Previous, cou++)
                {
                    if ((tt.IsValue("ОРУЖИЕ", null) || tt.IsValue("ВООРУЖЕНИЕ", null) || tt.IsValue("ВЫСТРЕЛ", null)) || tt.IsValue("ВЫСТРЕЛИТЬ", null))
                    {
                        ok = true;
                        break;
                    }
                }
                if (!ok)
                {
                    return(null);
                }
            }
            List <Pullenti.Ner.ReferentToken> res = new List <Pullenti.Ner.ReferentToken>();

            res.Add(new Pullenti.Ner.ReferentToken(tr, its[0].BeginToken, t1));
            return(res);
        }
예제 #19
0
        public static List <Line> Parse(Pullenti.Ner.Token t0, int maxLines, int maxChars, int maxEndChar)
        {
            List <Line> res        = new List <Line>();
            int         totalChars = 0;

            for (Pullenti.Ner.Token t = t0; t != null; t = t.Next)
            {
                if (maxEndChar > 0)
                {
                    if (t.BeginChar > maxEndChar)
                    {
                        break;
                    }
                }
                Pullenti.Ner.Token t1;
                for (t1 = t; t1 != null && t1.Next != null; t1 = t1.Next)
                {
                    if (t1.IsNewlineAfter)
                    {
                        if (t1.Next == null || Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t1.Next))
                        {
                            break;
                        }
                    }
                    if (t1 == t && t.IsNewlineBefore && (t.GetReferent() is Pullenti.Ner.Person.PersonReferent))
                    {
                        if (t1.Next == null)
                        {
                            continue;
                        }
                        if ((t1.Next is Pullenti.Ner.TextToken) && t1.Next.Chars.IsLetter && !t1.Next.Chars.IsAllLower)
                        {
                            break;
                        }
                    }
                }
                if (t1 == null)
                {
                    t1 = t;
                }
                TitleItemToken tit = TitleItemToken.TryAttach(t);
                if (tit != null)
                {
                    if (tit.Typ == TitleItemToken.Types.Keywords)
                    {
                        break;
                    }
                }
                Pullenti.Ner.Core.Internal.BlockTitleToken bl = Pullenti.Ner.Core.Internal.BlockTitleToken.TryAttach(t, false, null);
                if (bl != null)
                {
                    if (bl.Typ != Pullenti.Ner.Core.Internal.BlkTyps.Undefined)
                    {
                        break;
                    }
                }
                Line l = new Line(t, t1);
                res.Add(l);
                totalChars += l.CharsCount;
                if (res.Count >= maxLines || totalChars >= maxChars)
                {
                    break;
                }
                t = t1;
            }
            return(res);
        }
예제 #20
0
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData     ad     = kit.GetAnalyzerData(this);
            Pullenti.Ner.Core.TerminCollection models = new Pullenti.Ner.Core.TerminCollection();
            Dictionary <string, List <Pullenti.Ner.Referent> > objsByModel = new Dictionary <string, List <Pullenti.Ner.Referent> >();

            Pullenti.Ner.Core.TerminCollection objByNames = new Pullenti.Ner.Core.TerminCollection();
            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                List <Pullenti.Ner.Weapon.Internal.WeaponItemToken> its = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParseList(t, 10);
                if (its == null)
                {
                    continue;
                }
                List <Pullenti.Ner.ReferentToken> rts = this.TryAttach(its, false);
                if (rts != null)
                {
                    foreach (Pullenti.Ner.ReferentToken rt in rts)
                    {
                        rt.Referent = ad.RegisterReferent(rt.Referent);
                        kit.EmbedToken(rt);
                        t = rt;
                        foreach (Pullenti.Ner.Slot s in rt.Referent.Slots)
                        {
                            if (s.TypeName == WeaponReferent.ATTR_MODEL)
                            {
                                string mod = s.Value.ToString();
                                for (int k = 0; k < 2; k++)
                                {
                                    if (!char.IsDigit(mod[0]))
                                    {
                                        List <Pullenti.Ner.Referent> li;
                                        if (!objsByModel.TryGetValue(mod, out li))
                                        {
                                            objsByModel.Add(mod, (li = new List <Pullenti.Ner.Referent>()));
                                        }
                                        if (!li.Contains(rt.Referent))
                                        {
                                            li.Add(rt.Referent);
                                        }
                                        models.AddString(mod, li, null, false);
                                    }
                                    if (k > 0)
                                    {
                                        break;
                                    }
                                    string brand = rt.Referent.GetStringValue(WeaponReferent.ATTR_BRAND);
                                    if (brand == null)
                                    {
                                        break;
                                    }
                                    mod = string.Format("{0} {1}", brand, mod);
                                }
                            }
                            else if (s.TypeName == WeaponReferent.ATTR_NAME)
                            {
                                objByNames.Add(new Pullenti.Ner.Core.Termin(s.Value.ToString())
                                {
                                    Tag = rt.Referent
                                });
                            }
                        }
                    }
                }
            }
            if (objsByModel.Count == 0 && objByNames.Termins.Count == 0)
            {
                return;
            }
            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 10);
                if (br != null)
                {
                    Pullenti.Ner.Core.TerminToken toks = objByNames.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                    if (toks != null && toks.EndToken.Next == br.EndToken)
                    {
                        Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(toks.Termin.Tag as Pullenti.Ner.Referent, br.BeginToken, br.EndToken);
                        kit.EmbedToken(rt0);
                        t = rt0;
                        continue;
                    }
                }
                if (!(t is Pullenti.Ner.TextToken))
                {
                    continue;
                }
                if (!t.Chars.IsLetter)
                {
                    continue;
                }
                Pullenti.Ner.Core.TerminToken tok = models.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok == null)
                {
                    if (!t.Chars.IsAllLower)
                    {
                        tok = objByNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                    }
                    if (tok == null)
                    {
                        continue;
                    }
                }
                if (!tok.IsWhitespaceAfter)
                {
                    if (tok.EndToken.Next == null || !tok.EndToken.Next.IsCharOf(",.)"))
                    {
                        if (!Pullenti.Ner.Core.BracketHelper.IsBracket(tok.EndToken.Next, false))
                        {
                            continue;
                        }
                    }
                }
                Pullenti.Ner.Referent        tr = null;
                List <Pullenti.Ner.Referent> li = tok.Termin.Tag as List <Pullenti.Ner.Referent>;
                if (li != null && li.Count == 1)
                {
                    tr = li[0];
                }
                else
                {
                    tr = tok.Termin.Tag as Pullenti.Ner.Referent;
                }
                if (tr != null)
                {
                    Pullenti.Ner.Weapon.Internal.WeaponItemToken tit = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParse(tok.BeginToken.Previous, null, false, true);
                    if (tit != null && tit.Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Brand)
                    {
                        tr.AddSlot(WeaponReferent.ATTR_BRAND, tit.Value, false, 0);
                        tok.BeginToken = tit.BeginToken;
                    }
                    Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(tr, tok.BeginToken, tok.EndToken);
                    kit.EmbedToken(rt0);
                    t = rt0;
                    continue;
                }
            }
        }
예제 #21
0
        public AnalysisKit(Pullenti.Ner.SourceOfAnalysis sofa = null, bool onlyTokenizing = false, Pullenti.Morph.MorphLang lang = null, ProgressChangedEventHandler progress = null)
        {
            if (sofa == null)
            {
                return;
            }
            m_Sofa    = sofa;
            StartDate = DateTime.Now;
            List <Pullenti.Morph.MorphToken> tokens = Pullenti.Morph.MorphologyService.Process(sofa.Text, lang, progress);

            Pullenti.Ner.Token t0 = null;
            if (tokens != null)
            {
                for (int ii = 0; ii < tokens.Count; ii++)
                {
                    Pullenti.Morph.MorphToken mt = tokens[ii];
                    if (mt.BeginChar == 733860)
                    {
                    }
                    Pullenti.Ner.TextToken tt = new Pullenti.Ner.TextToken(mt, this);
                    if (sofa.CorrectionDict != null)
                    {
                        string corw;
                        if (sofa.CorrectionDict.TryGetValue(mt.Term, out corw))
                        {
                            List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null);
                            if (ccc != null && ccc.Count == 1)
                            {
                                Pullenti.Ner.TextToken tt1 = new Pullenti.Ner.TextToken(ccc[0], this, tt.BeginChar, tt.EndChar)
                                {
                                    Term0 = tt.Term
                                };
                                tt1.Chars = tt.Chars;
                                tt        = tt1;
                                if (CorrectedTokens == null)
                                {
                                    CorrectedTokens = new Dictionary <Pullenti.Ner.Token, string>();
                                }
                                CorrectedTokens.Add(tt, tt.GetSourceText());
                            }
                        }
                    }
                    if (t0 == null)
                    {
                        FirstToken = tt;
                    }
                    else
                    {
                        t0.Next = tt;
                    }
                    t0 = tt;
                }
            }
            if (sofa.ClearDust)
            {
                this.ClearDust();
            }
            if (sofa.DoWordsMergingByMorph)
            {
                this.CorrectWordsByMerging(lang);
            }
            if (sofa.DoWordCorrectionByMorph)
            {
                this.CorrectWordsByMorph(lang);
            }
            this.MergeLetters();
            this.DefineBaseLanguage();
            if (sofa.CreateNumberTokens)
            {
                for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next)
                {
                    Pullenti.Ner.NumberToken nt = NumberHelper.TryParseNumber(t);
                    if (nt == null)
                    {
                        continue;
                    }
                    this.EmbedToken(nt);
                    t = nt;
                }
            }
            if (onlyTokenizing)
            {
                return;
            }
            for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next)
            {
                if (t.Morph.Class.IsPreposition)
                {
                    continue;
                }
                Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                if (mc.IsUndefined && t.Chars.IsCyrillicLetter && t.LengthChar > 4)
                {
                    string             tail = sofa.Text.Substring(t.EndChar - 1, 2);
                    Pullenti.Ner.Token tte  = null;
                    Pullenti.Ner.Token tt   = t.Previous;
                    if (tt != null && ((tt.IsCommaAnd || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction)))
                    {
                        tt = tt.Previous;
                    }
                    if ((tt != null && !tt.GetMorphClassInDictionary().IsUndefined&& ((tt.Morph.Class.Value & t.Morph.Class.Value)) != 0) && tt.LengthChar > 4)
                    {
                        string tail2 = sofa.Text.Substring(tt.EndChar - 1, 2);
                        if (tail2 == tail)
                        {
                            tte = tt;
                        }
                    }
                    if (tte == null)
                    {
                        tt = t.Next;
                        if (tt != null && ((tt.IsCommaAnd || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction)))
                        {
                            tt = tt.Next;
                        }
                        if ((tt != null && !tt.GetMorphClassInDictionary().IsUndefined&& ((tt.Morph.Class.Value & t.Morph.Class.Value)) != 0) && tt.LengthChar > 4)
                        {
                            string tail2 = sofa.Text.Substring(tt.EndChar - 1, 2);
                            if (tail2 == tail)
                            {
                                tte = tt;
                            }
                        }
                    }
                    if (tte != null)
                    {
                        t.Morph.RemoveItemsEx(tte.Morph, tte.GetMorphClassInDictionary());
                    }
                }
                continue;
            }
            this.CreateStatistics();
        }
예제 #22
0
        static BookLinkToken _tryParse(Pullenti.Ner.Token t, int lev)
        {
            if (t == null || lev > 3)
            {
                return(null);
            }
            if (t.IsChar('['))
            {
                BookLinkToken re = _tryParse(t.Next, lev + 1);
                if (re != null && re.EndToken.Next != null && re.EndToken.Next.IsChar(']'))
                {
                    re.BeginToken = t;
                    re.EndToken   = re.EndToken.Next;
                    return(re);
                }
                if (re != null && re.EndToken.IsChar(']'))
                {
                    re.BeginToken = t;
                    return(re);
                }
                if (re != null)
                {
                    if (re.Typ == BookLinkTyp.Sostavitel || re.Typ == BookLinkTyp.Editors)
                    {
                        return(re);
                    }
                }
                Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                if (br != null)
                {
                    if ((br.EndToken.Previous is Pullenti.Ner.NumberToken) && (br.LengthChar < 30))
                    {
                        return new BookLinkToken(t, br.EndToken)
                               {
                                   Typ = BookLinkTyp.Number, Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(br.BeginToken.Next, br.EndToken.Previous, Pullenti.Ner.Core.GetTextAttr.No)
                               }
                    }
                    ;
                }
            }
            Pullenti.Ner.Token t0 = t;
            if (t is Pullenti.Ner.ReferentToken)
            {
                if (t.GetReferent() is Pullenti.Ner.Person.PersonReferent)
                {
                    return(TryParseAuthor(t, Pullenti.Ner.Person.Internal.FioTemplateType.Undefined));
                }
                if (t.GetReferent() is Pullenti.Ner.Geo.GeoReferent)
                {
                    return new BookLinkToken(t, t)
                           {
                               Typ = BookLinkTyp.Geo, Ref = t.GetReferent()
                           }
                }
                ;
                if (t.GetReferent() is Pullenti.Ner.Date.DateReferent)
                {
                    Pullenti.Ner.Date.DateReferent dr = t.GetReferent() as Pullenti.Ner.Date.DateReferent;

                    if (dr.Slots.Count == 1 && dr.Year > 0)
                    {
                        return new BookLinkToken(t, t)
                               {
                                   Typ = BookLinkTyp.Year, Value = dr.Year.ToString()
                               }
                    }
                    ;
                    if (dr.Year > 0 && t.Previous != null && t.Previous.IsComma)
                    {
                        return new BookLinkToken(t, t)
                               {
                                   Typ = BookLinkTyp.Year, Value = dr.Year.ToString()
                               }
                    }
                    ;
                }
                if (t.GetReferent() is Pullenti.Ner.Org.OrganizationReferent)
                {
                    Pullenti.Ner.Org.OrganizationReferent org = t.GetReferent() as Pullenti.Ner.Org.OrganizationReferent;
                    if (org.Kind == Pullenti.Ner.Org.OrganizationKind.Press)
                    {
                        return new BookLinkToken(t, t)
                               {
                                   Typ = BookLinkTyp.Press, Ref = org
                               }
                    }
                    ;
                }
                if (t.GetReferent() is Pullenti.Ner.Uri.UriReferent)
                {
                    Pullenti.Ner.Uri.UriReferent uri = t.GetReferent() as Pullenti.Ner.Uri.UriReferent;
                    if ((uri.Scheme == "http" || uri.Scheme == "https" || uri.Scheme == "ftp") || uri.Scheme == null)
                    {
                        return new BookLinkToken(t, t)
                               {
                                   Typ = BookLinkTyp.Url, Ref = uri
                               }
                    }
                    ;
                }
            }
            Pullenti.Ner.Core.TerminToken tok = m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
            if (tok != null)
            {
                BookLinkTyp typ = (BookLinkTyp)tok.Termin.Tag;
                bool        ok  = true;
                if (typ == BookLinkTyp.Type || typ == BookLinkTyp.NameTail || typ == BookLinkTyp.ElectronRes)
                {
                    if (t.Previous != null && ((t.Previous.IsCharOf(".:[") || t.Previous.IsHiphen)))
                    {
                    }
                    else
                    {
                        ok = false;
                    }
                }
                if (ok)
                {
                    return new BookLinkToken(t, tok.EndToken)
                           {
                               Typ = typ, Value = tok.Termin.CanonicText
                           }
                }
                ;
                if (typ == BookLinkTyp.ElectronRes)
                {
                    for (Pullenti.Ner.Token tt = tok.EndToken.Next; tt != null; tt = tt.Next)
                    {
                        if ((tt is Pullenti.Ner.TextToken) && !tt.Chars.IsLetter)
                        {
                            continue;
                        }
                        if (tt.GetReferent() is Pullenti.Ner.Uri.UriReferent)
                        {
                            return new BookLinkToken(t, tt)
                                   {
                                       Typ = BookLinkTyp.ElectronRes, Ref = tt.GetReferent()
                                   }
                        }
                        ;
                        break;
                    }
                }
            }
            if (t.IsChar('/'))
            {
                BookLinkToken res = new BookLinkToken(t, t)
                {
                    Typ = BookLinkTyp.Delimeter, Value = "/"
                };
                if (t.Next != null && t.Next.IsChar('/'))
                {
                    res.EndToken = t.Next;

                    res.Value = "//";
                }
                if (!t.IsWhitespaceBefore && !t.IsWhitespaceAfter)
                {
                    int  coo = 3;
                    bool no  = true;
                    for (Pullenti.Ner.Token tt = t.Next; tt != null && coo > 0; tt = tt.Next, coo--)
                    {
                        BookLinkToken vvv = TryParse(tt, lev + 1);
                        if (vvv != null && vvv.Typ != BookLinkTyp.Number)
                        {
                            no = false;

                            break;
                        }
                    }
                    if (no)
                    {
                        return(null);
                    }
                }
                return(res);
            }
            if ((t is Pullenti.Ner.NumberToken) && (t as Pullenti.Ner.NumberToken).IntValue != null && (t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit)
            {
                BookLinkToken res = new BookLinkToken(t, t)
                {
                    Typ = BookLinkTyp.Number, Value = (t as Pullenti.Ner.NumberToken).Value.ToString()
                };
                int val = (t as Pullenti.Ner.NumberToken).IntValue.Value;
                if (val >= 1930 && (val < 2030))
                {
                    res.Typ = BookLinkTyp.Year;
                }
                if (t.Next != null && t.Next.IsChar('.'))
                {
                    res.EndToken = t.Next;
                }
                else if ((t.Next != null && t.Next.LengthChar == 1 && !t.Next.Chars.IsLetter) && t.Next.IsWhitespaceAfter)
                {
                    res.EndToken = t.Next;
                }
                else if (t.Next is Pullenti.Ner.TextToken)
                {
                    string term = (t.Next as Pullenti.Ner.TextToken).Term;
                    if (((term == "СТР" || term == "C" || term == "С") || term == "P" || term == "S") || term == "PAGES")
                    {
                        res.EndToken = t.Next;
                        res.Typ      = BookLinkTyp.Pages;
                        res.Value    = (t as Pullenti.Ner.NumberToken).Value.ToString();
                    }
                }
                return(res);
            }
            if (t is Pullenti.Ner.TextToken)
            {
                string term = (t as Pullenti.Ner.TextToken).Term;
                if (((((((term == "СТР" || term == "C" || term == "С") || term == "ТОМ" || term == "T") || term == "Т" || term == "P") || term == "PP" || term == "V") || term == "VOL" || term == "S") || term == "СТОР" || t.IsValue("PAGE", null)) || t.IsValue("СТРАНИЦА", "СТОРІНКА"))
                {
                    Pullenti.Ner.Token tt = t.Next;
                    while (tt != null)
                    {
                        if (tt.IsCharOf(".:~"))
                        {
                            tt = tt.Next;
                        }
                        else
                        {
                            break;
                        }
                    }
                    if (tt is Pullenti.Ner.NumberToken)
                    {
                        BookLinkToken res = new BookLinkToken(t, tt)
                        {
                            Typ = BookLinkTyp.PageRange
                        };
                        Pullenti.Ner.Token tt0 = tt;
                        Pullenti.Ner.Token tt1 = tt;
                        for (tt = tt.Next; tt != null; tt = tt.Next)
                        {
                            if (tt.IsCharOf(",") || tt.IsHiphen)
                            {
                                if (tt.Next is Pullenti.Ner.NumberToken)
                                {
                                    tt           = tt.Next;
                                    res.EndToken = tt;
                                    tt1          = tt;
                                    continue;
                                }
                            }
                            break;
                        }
                        res.Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(tt0, tt1, Pullenti.Ner.Core.GetTextAttr.No);
                        return(res);
                    }
                }
                if ((term == "M" || term == "М" || term == "СПБ") || term == "K" || term == "К")
                {
                    if (t.Next != null && t.Next.IsCharOf(":;"))
                    {
                        BookLinkToken re = new BookLinkToken(t, t.Next)
                        {
                            Typ = BookLinkTyp.Geo
                        };
                        return(re);
                    }
                    if (t.Next != null && t.Next.IsCharOf("."))
                    {
                        BookLinkToken res = new BookLinkToken(t, t.Next)
                        {
                            Typ = BookLinkTyp.Geo
                        };
                        if (t.Next.Next != null && t.Next.Next.IsCharOf(":;"))
                        {
                            res.EndToken = t.Next.Next;
                        }
                        else if (t.Next.Next != null && (t.Next.Next is Pullenti.Ner.NumberToken))
                        {
                        }
                        else if (t.Next.Next != null && t.Next.Next.IsComma && (t.Next.Next.Next is Pullenti.Ner.NumberToken))
                        {
                        }
                        else
                        {
                            return(null);
                        }
                        return(res);
                    }
                }
                if (term == "ПЕР" || term == "ПЕРЕВ" || term == "ПЕРЕВОД")
                {
                    Pullenti.Ner.Token tt = t;
                    if (tt.Next != null && tt.Next.IsChar('.'))
                    {
                        tt = tt.Next;
                    }
                    if (tt.Next != null && ((tt.Next.IsValue("C", null) || tt.Next.IsValue("С", null))))
                    {
                        tt = tt.Next;
                        if (tt.Next == null || tt.WhitespacesAfterCount > 2)
                        {
                            return(null);
                        }
                        BookLinkToken re = new BookLinkToken(t, tt.Next)
                        {
                            Typ = BookLinkTyp.Translate
                        };
                        return(re);
                    }
                }
                if (term == "ТАМ" || term == "ТАМЖЕ")
                {
                    BookLinkToken res = new BookLinkToken(t, t)
                    {
                        Typ = BookLinkTyp.Tamze
                    };
                    if (t.Next != null && t.Next.IsValue("ЖЕ", null))
                    {
                        res.EndToken = t.Next;
                    }
                    return(res);
                }
                if (((term == "СМ" || term == "CM" || term == "НАПР") || term == "НАПРИМЕР" || term == "SEE") || term == "ПОДРОБНЕЕ" || term == "ПОДРОБНО")
                {
                    BookLinkToken res = new BookLinkToken(t, t)
                    {
                        Typ = BookLinkTyp.See
                    };
                    for (t = t.Next; t != null; t = t.Next)
                    {
                        if (t.IsCharOf(".:") || t.IsValue("ALSO", null))
                        {
                            res.EndToken = t;
                            continue;
                        }
                        if (t.IsValue("В", null) || t.IsValue("IN", null))
                        {
                            res.EndToken = t;
                            continue;
                        }
                        BookLinkToken vvv = _tryParse(t, lev + 1);
                        if (vvv != null && vvv.Typ == BookLinkTyp.See)
                        {
                            res.EndToken = vvv.EndToken;
                            break;
                        }
                        break;
                    }
                    return(res);
                }
                if (term == "БОЛЕЕ")
                {
                    BookLinkToken vvv = _tryParse(t.Next, lev + 1);
                    if (vvv != null && vvv.Typ == BookLinkTyp.See)
                    {
                        vvv.BeginToken = t;
                        return(vvv);
                    }
                }
                Pullenti.Ner.Token no = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(t);
                if (no is Pullenti.Ner.NumberToken)
                {
                    return new BookLinkToken(t, no)
                           {
                               Typ = BookLinkTyp.N
                           }
                }
                ;
                if (((term == "B" || term == "В")) && (t.Next is Pullenti.Ner.NumberToken) && (t.Next.Next is Pullenti.Ner.TextToken))
                {
                    string term2 = (t.Next.Next as Pullenti.Ner.TextToken).Term;

                    if (((term2 == "Т" || term2 == "T" || term2.StartsWith("ТОМ")) || term2 == "TT" || term2 == "ТТ") || term2 == "КН" || term2.StartsWith("КНИГ"))
                    {
                        return new BookLinkToken(t, t.Next.Next)
                               {
                                   Typ = BookLinkTyp.Volume
                               }
                    }
                    ;
                }
            }
            if (t.IsChar('('))
            {
                if (((t.Next is Pullenti.Ner.NumberToken) && (t.Next as Pullenti.Ner.NumberToken).IntValue != null && t.Next.Next != null) && t.Next.Next.IsChar(')'))
                {
                    int num = (t.Next as Pullenti.Ner.NumberToken).IntValue.Value;
                    if (num > 1900 && num <= 2040)
                    {
                        if (num <= DateTime.Now.Year)
                        {
                            return new BookLinkToken(t, t.Next.Next)
                                   {
                                       Typ = BookLinkTyp.Year, Value = num.ToString()
                                   }
                        }
                        ;
                    }
                }
                if (((t.Next is Pullenti.Ner.ReferentToken) && (t.Next.GetReferent() is Pullenti.Ner.Date.DateReferent) && t.Next.Next != null) && t.Next.Next.IsChar(')'))
                {
                    int num = (t.Next.GetReferent() as Pullenti.Ner.Date.DateReferent).Year;
                    if (num > 0)
                    {
                        return new BookLinkToken(t, t.Next.Next)
                               {
                                   Typ = BookLinkTyp.Year, Value = num.ToString()
                               }
                    }
                    ;
                }
            }
            return(null);
        }
예제 #23
0
 void CorrectWordsByMorph(Pullenti.Morph.MorphLang lang)
 {
     for (Pullenti.Ner.Token tt = FirstToken; tt != null; tt = tt.Next)
     {
         if (!(tt is Pullenti.Ner.TextToken))
         {
             continue;
         }
         if (tt.Morph.ContainsAttr("прдктв.", null))
         {
             continue;
         }
         Pullenti.Morph.MorphClass dd = tt.GetMorphClassInDictionary();
         if (!dd.IsUndefined || (tt.LengthChar < 4))
         {
             continue;
         }
         if (tt.Morph.Class.IsProperSurname && !tt.Chars.IsAllLower)
         {
             continue;
         }
         if (tt.Chars.IsAllUpper)
         {
             continue;
         }
         string corw = Pullenti.Morph.MorphologyService.CorrectWord((tt as Pullenti.Ner.TextToken).Term, (tt.Morph.Language.IsUndefined ? lang : tt.Morph.Language));
         if (corw == null)
         {
             continue;
         }
         List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null);
         if (ccc == null || ccc.Count != 1)
         {
             continue;
         }
         Pullenti.Ner.TextToken tt1 = new Pullenti.Ner.TextToken(ccc[0], this, tt.BeginChar, tt.EndChar)
         {
             Chars = tt.Chars, Term0 = (tt as Pullenti.Ner.TextToken).Term
         };
         Pullenti.Morph.MorphClass mc = tt1.GetMorphClassInDictionary();
         if (mc.IsProperSurname)
         {
             continue;
         }
         if (tt == FirstToken)
         {
             FirstToken = tt1;
         }
         else
         {
             tt.Previous.Next = tt1;
         }
         tt1.Next = tt.Next;
         tt       = tt1;
         if (CorrectedTokens == null)
         {
             CorrectedTokens = new Dictionary <Pullenti.Ner.Token, string>();
         }
         CorrectedTokens.Add(tt, tt.GetSourceText());
     }
 }
예제 #24
0
 public BookLinkToken(Pullenti.Ner.Token b, Pullenti.Ner.Token e) : base(b, e, null)
 {
 }
예제 #25
0
 public NamedItemToken(Pullenti.Ner.Token b, Pullenti.Ner.Token e) : base(b, e, null)
 {
 }
예제 #26
0
 public static BookLinkToken TryParseAuthor(Pullenti.Ner.Token t, Pullenti.Ner.Person.Internal.FioTemplateType prevPersTemplate = Pullenti.Ner.Person.Internal.FioTemplateType.Undefined)
 {
     if (t == null)
     {
         return(null);
     }
     Pullenti.Ner.ReferentToken rtp = Pullenti.Ner.Person.Internal.PersonItemToken.TryParsePerson(t, prevPersTemplate);
     if (rtp != null)
     {
         BookLinkToken re;
         if (rtp.Data == null)
         {
             re = new BookLinkToken(t, (rtp == t ? t : rtp.EndToken))
             {
                 Typ = BookLinkTyp.Person, Ref = rtp.Referent
             }
         }
         ;
         else
         {
             re = new BookLinkToken(t, rtp.EndToken)
             {
                 Typ = BookLinkTyp.Person, Tok = rtp
             }
         };
         re.PersonTemplate = (Pullenti.Ner.Person.Internal.FioTemplateType)rtp.MiscAttrs;
         for (Pullenti.Ner.Token tt = rtp.BeginToken; tt != null && tt.EndChar <= rtp.EndChar; tt = tt.Next)
         {
             if (!(tt.GetReferent() is Pullenti.Ner.Person.PersonPropertyReferent))
             {
                 continue;
             }
             Pullenti.Ner.ReferentToken rt = tt as Pullenti.Ner.ReferentToken;
             if (rt.BeginToken.Chars.IsCapitalUpper && tt != rtp.BeginToken)
             {
                 re.StartOfName = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(rt, Pullenti.Ner.Core.GetTextAttr.KeepRegister);
                 break;
             }
             return(null);
         }
         return(re);
     }
     if (t.IsChar('['))
     {
         BookLinkToken re = TryParseAuthor(t.Next, Pullenti.Ner.Person.Internal.FioTemplateType.Undefined);
         if (re != null && re.EndToken.Next != null && re.EndToken.Next.IsChar(']'))
         {
             re.BeginToken = t;
             re.EndToken   = re.EndToken.Next;
             return(re);
         }
     }
     if (((t.IsValue("И", null) || t.IsValue("ET", null))) && t.Next != null)
     {
         if (t.Next.IsValue("ДРУГИЕ", null) || t.Next.IsValue("ДР", null) || t.Next.IsValue("AL", null))
         {
             BookLinkToken res = new BookLinkToken(t, t.Next)
             {
                 Typ = BookLinkTyp.AndOthers
             };
             if (t.Next.Next != null && t.Next.Next.IsChar('.'))
             {
                 res.EndToken = res.EndToken.Next;
             }
             return(res);
         }
     }
     return(null);
 }
예제 #27
0
 static void _corrPrevNext(Pullenti.Ner.MetaToken mt, Pullenti.Ner.Token prev, Pullenti.Ner.Token next)
 {
     mt.BeginToken.m_Previous = prev;
     mt.EndToken.m_Next       = next;
     for (Pullenti.Ner.Token t = mt.BeginToken; t != null && t.EndChar <= mt.EndChar; t = t.Next)
     {
         if (t is Pullenti.Ner.MetaToken)
         {
             _corrPrevNext(t as Pullenti.Ner.MetaToken, t.Previous, t.Next);
         }
     }
 }
예제 #28
0
 public DefinitionWithNumericToken(Pullenti.Ner.Token b, Pullenti.Ner.Token e) : base(b, e, null)
 {
 }
예제 #29
0
 private TitleNameToken(Pullenti.Ner.Token begin, Pullenti.Ner.Token end) : base(begin, end, null)
 {
 }
예제 #30
0
 public ConjunctionToken(Pullenti.Ner.Token b, Pullenti.Ner.Token e) : base(b, e, null)
 {
 }