Ejemplo n.º 1
0
        bool CalcRankAndValue(int minNewlinesCount)
        {
            Rank = 0;
            if (BeginToken.Chars.IsAllLower)
            {
                Rank -= 30;
            }
            int words      = 0;
            int upWords    = 0;
            int notwords   = 0;
            int lineNumber = 0;

            Pullenti.Ner.Token tstart = BeginToken;
            Pullenti.Ner.Token tend   = EndToken;
            for (Pullenti.Ner.Token t = BeginToken; t != EndToken.Next && t != null && t.EndChar <= EndToken.EndChar; t = t.Next)
            {
                if (t.IsNewlineBefore)
                {
                }
                TitleItemToken tit = TitleItemToken.TryAttach(t);
                if (tit != null)
                {
                    if (tit.Typ == TitleItemToken.Types.Theme || tit.Typ == TitleItemToken.Types.TypAndTheme)
                    {
                        if (t != BeginToken)
                        {
                            if (lineNumber > 0)
                            {
                                return(false);
                            }
                            words  = (upWords = (notwords = 0));
                            tstart = tit.EndToken.Next;
                        }
                        t = tit.EndToken;
                        if (t.Next == null)
                        {
                            return(false);
                        }
                        if (t.Next.Chars.IsLetter && t.Next.Chars.IsAllLower)
                        {
                            Rank += 20;
                        }
                        else
                        {
                            Rank += 100;
                        }
                        tstart = t.Next;
                        if (tit.Typ == TitleItemToken.Types.TypAndTheme)
                        {
                            TypeValue = tit.Value;
                        }
                        continue;
                    }
                    if (tit.Typ == TitleItemToken.Types.Typ)
                    {
                        if (t == BeginToken)
                        {
                            if (tit.EndToken.IsNewlineAfter)
                            {
                                TypeValue = tit.Value;
                                Rank     += 5;
                                tstart    = tit.EndToken.Next;
                            }
                        }
                        t = tit.EndToken;
                        words++;
                        if (tit.BeginToken != tit.EndToken)
                        {
                            words++;
                        }
                        if (tit.Chars.IsAllUpper)
                        {
                            upWords++;
                        }
                        continue;
                    }
                    if (tit.Typ == TitleItemToken.Types.Dust || tit.Typ == TitleItemToken.Types.Speciality)
                    {
                        if (t == BeginToken)
                        {
                            return(false);
                        }
                        Rank -= 20;
                        if (tit.Typ == TitleItemToken.Types.Speciality)
                        {
                            Speciality = tit.Value;
                        }
                        t = tit.EndToken;
                        continue;
                    }
                    if (tit.Typ == TitleItemToken.Types.Consultant || tit.Typ == TitleItemToken.Types.Boss || tit.Typ == TitleItemToken.Types.Editor)
                    {
                        t = tit.EndToken;
                        if (t.Next != null && ((t.Next.IsCharOf(":") || t.Next.IsHiphen || t.WhitespacesAfterCount > 4)))
                        {
                            Rank -= 10;
                        }
                        else
                        {
                            Rank -= 2;
                        }
                        continue;
                    }
                    return(false);
                }
                Pullenti.Ner.Booklink.Internal.BookLinkToken blt = Pullenti.Ner.Booklink.Internal.BookLinkToken.TryParse(t, 0);
                if (blt != null)
                {
                    if (blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.Misc || blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.N || blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.Pages)
                    {
                        Rank -= 10;
                    }
                    else if (blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.N || blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.PageRange)
                    {
                        Rank -= 20;
                    }
                }
                if (t == BeginToken && Pullenti.Ner.Booklink.Internal.BookLinkToken.TryParseAuthor(t, Pullenti.Ner.Person.Internal.FioTemplateType.Undefined) != null)
                {
                    Rank -= 20;
                }
                if (t.IsNewlineBefore && t != BeginToken)
                {
                    lineNumber++;
                    if (lineNumber > 4)
                    {
                        return(false);
                    }
                    if (t.Chars.IsAllLower)
                    {
                        Rank += 10;
                    }
                    else if (t.Previous.IsChar('.'))
                    {
                        Rank -= 10;
                    }
                    else if (t.Previous.IsCharOf(",-"))
                    {
                        Rank += 10;
                    }
                    else
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Previous, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                        if (npt != null && npt.EndChar >= t.EndChar)
                        {
                            Rank += 10;
                        }
                    }
                }
                if (t != BeginToken && t.NewlinesBeforeCount > minNewlinesCount)
                {
                    Rank -= (t.NewlinesBeforeCount - minNewlinesCount);
                }
                Pullenti.Ner.Core.BracketSequenceToken bst = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                if (bst != null && bst.IsQuoteType && bst.EndToken.EndChar <= EndToken.EndChar)
                {
                    if (words == 0)
                    {
                        tstart = bst.BeginToken;
                        Rank  += 10;
                        if (bst.EndToken == EndToken)
                        {
                            tend  = EndToken;
                            Rank += 10;
                        }
                    }
                }
                List <Pullenti.Ner.Referent> rli = t.GetReferents();
                if (rli != null)
                {
                    foreach (Pullenti.Ner.Referent r in rli)
                    {
                        if (r is Pullenti.Ner.Org.OrganizationReferent)
                        {
                            if (t.IsNewlineBefore)
                            {
                                Rank -= 10;
                            }
                            else
                            {
                                Rank -= 4;
                            }
                            continue;
                        }
                        if ((r is Pullenti.Ner.Geo.GeoReferent) || (r is Pullenti.Ner.Person.PersonReferent))
                        {
                            if (t.IsNewlineBefore)
                            {
                                Rank -= 5;
                                if (t.IsNewlineAfter || t.Next == null)
                                {
                                    Rank -= 20;
                                }
                                else if (t.Next.IsHiphen || (t.Next is Pullenti.Ner.NumberToken) || (t.Next.GetReferent() is Pullenti.Ner.Date.DateReferent))
                                {
                                    Rank -= 20;
                                }
                                else if (t != BeginToken)
                                {
                                    Rank -= 20;
                                }
                            }
                            continue;
                        }
                        if ((r is Pullenti.Ner.Geo.GeoReferent) || (r is Pullenti.Ner.Denomination.DenominationReferent))
                        {
                            continue;
                        }
                        if ((r is Pullenti.Ner.Uri.UriReferent) || (r is Pullenti.Ner.Phone.PhoneReferent))
                        {
                            return(false);
                        }
                        if (t.IsNewlineBefore)
                        {
                            Rank -= 4;
                        }
                        else
                        {
                            Rank -= 2;
                        }
                        if (t == BeginToken && (EndToken.GetReferent() is Pullenti.Ner.Person.PersonReferent))
                        {
                            Rank -= 10;
                        }
                    }
                    words++;
                    if (t.Chars.IsAllUpper)
                    {
                        upWords++;
                    }
                    if (t == BeginToken)
                    {
                        if (t.IsNewlineAfter)
                        {
                            Rank -= 10;
                        }
                        else if (t.Next != null && t.Next.IsChar('.') && t.Next.IsNewlineAfter)
                        {
                            Rank -= 10;
                        }
                    }
                    continue;
                }
                if (t is Pullenti.Ner.NumberToken)
                {
                    if ((t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words)
                    {
                        words++;
                        if (t.Chars.IsAllUpper)
                        {
                            upWords++;
                        }
                    }
                    else
                    {
                        notwords++;
                    }
                    continue;
                }
                Pullenti.Ner.Person.Internal.PersonAttrToken pat = Pullenti.Ner.Person.Internal.PersonAttrToken.TryAttach(t, null, Pullenti.Ner.Person.Internal.PersonAttrToken.PersonAttrAttachAttrs.No);
                if (pat != null)
                {
                    if (t.IsNewlineBefore)
                    {
                        if (!pat.Morph.Case.IsUndefined && !pat.Morph.Case.IsNominative)
                        {
                        }
                        else if (pat.Chars.IsAllUpper)
                        {
                        }
                        else
                        {
                            Rank -= 20;
                        }
                    }
                    else if (t.Chars.IsAllLower)
                    {
                        Rank--;
                    }
                    for (; t != null; t = t.Next)
                    {
                        words++;
                        if (t.Chars.IsAllUpper)
                        {
                            upWords++;
                        }
                        if (t == pat.EndToken)
                        {
                            break;
                        }
                    }
                    continue;
                }
                Pullenti.Ner.Org.Internal.OrgItemTypeToken oitt = Pullenti.Ner.Org.Internal.OrgItemTypeToken.TryAttach(t, true, null);
                if (oitt != null)
                {
                    if (oitt.Morph.Number != Pullenti.Morph.MorphNumber.Plural && !oitt.IsDoubtRootWord)
                    {
                        if (!oitt.Morph.Case.IsUndefined && !oitt.Morph.Case.IsNominative)
                        {
                            words++;
                            if (t.Chars.IsAllUpper)
                            {
                                upWords++;
                            }
                        }
                        else
                        {
                            Rank -= 4;
                            if (t == BeginToken)
                            {
                                Rank -= 5;
                            }
                        }
                    }
                    else
                    {
                        words += 1;
                        if (t.Chars.IsAllUpper)
                        {
                            upWords++;
                        }
                    }
                    t = oitt.EndToken;
                    continue;
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt != null)
                {
                    if (tt.IsChar('©'))
                    {
                        Rank -= 10;
                    }
                    if (tt.IsChar('_'))
                    {
                        Rank--;
                    }
                    if (tt.Chars.IsLetter)
                    {
                        if (tt.LengthChar > 2)
                        {
                            words++;
                            if (t.Chars.IsAllUpper)
                            {
                                upWords++;
                            }
                        }
                    }
                    else if (!tt.IsChar(','))
                    {
                        notwords++;
                    }
                    if (tt.IsPureVerb)
                    {
                        {
                            Rank -= 30;
                            words--;
                        }
                        break;
                    }
                    if (tt == EndToken)
                    {
                        if (tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction)
                        {
                            Rank -= 10;
                        }
                        else if (tt.IsChar('.'))
                        {
                            Rank += 5;
                        }
                    }
                    else if (tt.IsCharOf("._"))
                    {
                        Rank -= 5;
                    }
                }
            }
            Rank += words;
            Rank -= notwords;
            if ((words < 1) && (Rank < 50))
            {
                return(false);
            }
            if (tstart == null || tend == null)
            {
                return(false);
            }
            if (tstart.EndChar > tend.EndChar)
            {
                return(false);
            }
            TitleItemToken tit1 = TitleItemToken.TryAttach(EndToken.Next);

            if (tit1 != null && ((tit1.Typ == TitleItemToken.Types.Typ || tit1.Typ == TitleItemToken.Types.Speciality)))
            {
                if (tit1.EndToken.IsNewlineAfter)
                {
                    Rank += 15;
                }
                else
                {
                    Rank += 10;
                }
                if (tit1.Typ == TitleItemToken.Types.Speciality)
                {
                    Speciality = tit1.Value;
                }
            }
            if (upWords > 4 && upWords > ((int)((0.8 * words))))
            {
                if (tstart.Previous != null && (tstart.Previous.GetReferent() is Pullenti.Ner.Person.PersonReferent))
                {
                    Rank += (5 + upWords);
                }
            }
            BeginNameToken = tstart;
            EndNameToken   = tend;
            return(true);
        }
Ejemplo n.º 2
0
        public static TitleItemToken TryAttach(Pullenti.Ner.Token t)
        {
            Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
            if (tt != null)
            {
                Pullenti.Ner.Token t1 = (Pullenti.Ner.Token)tt;
                if (tt.Term == "ТЕМА")
                {
                    TitleItemToken tit = TryAttach(tt.Next);
                    if (tit != null && tit.Typ == Types.Typ)
                    {
                        t1 = tit.EndToken;
                        if (t1.Next != null && t1.Next.IsChar(':'))
                        {
                            t1 = t1.Next;
                        }
                        return(new TitleItemToken(t, t1, Types.TypAndTheme)
                        {
                            Value = tit.Value
                        });
                    }
                    if (tt.Next != null && tt.Next.IsChar(':'))
                    {
                        t1 = tt.Next;
                    }
                    return(new TitleItemToken(tt, t1, Types.Theme));
                }
                if (tt.Term == "ПО" || tt.Term == "НА")
                {
                    if (tt.Next != null && tt.Next.IsValue("ТЕМА", null))
                    {
                        t1 = tt.Next;
                        if (t1.Next != null && t1.Next.IsChar(':'))
                        {
                            t1 = t1.Next;
                        }
                        return(new TitleItemToken(tt, t1, Types.Theme));
                    }
                }
                if (tt.Term == "ПЕРЕВОД" || tt.Term == "ПЕР")
                {
                    Pullenti.Ner.Token tt2 = tt.Next;
                    if (tt2 != null && tt2.IsChar('.'))
                    {
                        tt2 = tt2.Next;
                    }
                    if (tt2 is Pullenti.Ner.TextToken)
                    {
                        if ((tt2 as Pullenti.Ner.TextToken).Term == "C" || (tt2 as Pullenti.Ner.TextToken).Term == "С")
                        {
                            tt2 = tt2.Next;
                            if (tt2 is Pullenti.Ner.TextToken)
                            {
                                return(new TitleItemToken(t, tt2, Types.Translate));
                            }
                        }
                    }
                }
                if (tt.Term == "СЕКЦИЯ" || tt.Term == "SECTION" || tt.Term == "СЕКЦІЯ")
                {
                    t1 = tt.Next;
                    if (t1 != null && t1.IsChar(':'))
                    {
                        t1 = t1.Next;
                    }
                    Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t1, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                    if (br != null)
                    {
                        t1 = br.EndToken;
                    }
                    else if (t1 != tt.Next)
                    {
                        for (; t1 != null; t1 = t1.Next)
                        {
                            if (t1.IsNewlineAfter)
                            {
                                break;
                            }
                        }
                        if (t1 == null)
                        {
                            return(null);
                        }
                    }
                    if (t1 != tt.Next)
                    {
                        return(new TitleItemToken(tt, t1, Types.Dust));
                    }
                }
                t1 = null;
                if (tt.IsValue("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ"))
                {
                    t1 = tt.Next;
                }
                else if (tt.Morph.Class.IsPreposition && tt.Next != null && tt.Next.IsValue("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ"))
                {
                    t1 = tt.Next.Next;
                }
                else if (tt.IsChar('/') && tt.IsNewlineBefore)
                {
                    t1 = tt.Next;
                }
                if (t1 != null)
                {
                    if (t1.IsCharOf(":") || t1.IsHiphen)
                    {
                        t1 = t1.Next;
                    }
                    TitleItemToken spec = TryAttachSpeciality(t1, true);
                    if (spec != null)
                    {
                        spec.BeginToken = t;
                        return(spec);
                    }
                }
            }
            TitleItemToken sss = TryAttachSpeciality(t, false);

            if (sss != null)
            {
                return(sss);
            }
            if (t is Pullenti.Ner.ReferentToken)
            {
                return(null);
            }
            Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
            if (npt != null)
            {
                string s = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
                Pullenti.Ner.Core.TerminToken tok = m_Termins.TryParse(npt.EndToken, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok != null)
                {
                    Types ty = (Types)tok.Termin.Tag;
                    if (ty == Types.Typ)
                    {
                        TitleItemToken tit = TryAttach(tok.EndToken.Next);
                        if (tit != null && tit.Typ == Types.Theme)
                        {
                            return new TitleItemToken(npt.BeginToken, tit.EndToken, Types.TypAndTheme)
                                   {
                                       Value = s
                                   }
                        }
                        ;
                        if (s == "РАБОТА" || s == "РОБОТА" || s == "ПРОЕКТ")
                        {
                            return(null);
                        }
                        Pullenti.Ner.Token t1 = tok.EndToken;
                        if (s == "ДИССЕРТАЦИЯ" || s == "ДИСЕРТАЦІЯ")
                        {
                            int err = 0;
                            for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next)
                            {
                                if (ttt.Morph.Class.IsPreposition)
                                {
                                    continue;
                                }
                                if (ttt.IsValue("СОИСКАНИЕ", ""))
                                {
                                    continue;
                                }
                                Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(ttt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                                if (npt1 != null && npt1.Noun.IsValue("СТЕПЕНЬ", "СТУПІНЬ"))
                                {
                                    t1 = (ttt = npt1.EndToken);
                                    continue;
                                }
                                Pullenti.Ner.ReferentToken rt = t1.Kit.ProcessReferent("PERSON", ttt);
                                if (rt != null && (rt.Referent is Pullenti.Ner.Person.PersonPropertyReferent))
                                {
                                    Pullenti.Ner.Person.PersonPropertyReferent ppr = rt.Referent as Pullenti.Ner.Person.PersonPropertyReferent;
                                    if (ppr.Name == "доктор наук")
                                    {
                                        t1 = rt.EndToken;
                                        s  = "ДОКТОРСКАЯ ДИССЕРТАЦИЯ";
                                        break;
                                    }
                                    else if (ppr.Name == "кандидат наук")
                                    {
                                        t1 = rt.EndToken;
                                        s  = "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ";
                                        break;
                                    }
                                    else if (ppr.Name == "магистр")
                                    {
                                        t1 = rt.EndToken;
                                        s  = "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ";
                                        break;
                                    }
                                }
                                if (ttt.IsValue("ДОКТОР", null) || ttt.IsValue("КАНДИДАТ", null) || ttt.IsValue("МАГИСТР", "МАГІСТР"))
                                {
                                    t1   = ttt;
                                    npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(ttt.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                                    if (npt1 != null && npt1.EndToken.IsValue("НАУК", null))
                                    {
                                        t1 = npt1.EndToken;
                                    }
                                    s = (ttt.IsValue("МАГИСТР", "МАГІСТР") ? "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ" : (ttt.IsValue("ДОКТОР", null) ? "ДОКТОРСКАЯ ДИССЕРТАЦИЯ" : "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ"));
                                    break;
                                }
                                if ((++err) > 3)
                                {
                                    break;
                                }
                            }
                        }
                        if (t1.Next != null && t1.Next.IsChar('.'))
                        {
                            t1 = t1.Next;
                        }
                        if (s.EndsWith("ОТЧЕТ") && t1.Next != null && t1.Next.IsValue("О", null))
                        {
                            Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1.Next, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null);
                            if (npt1 != null && npt1.Morph.Case.IsPrepositional)
                            {
                                t1 = npt1.EndToken;
                            }
                        }
                        return(new TitleItemToken(npt.BeginToken, t1, ty)
                        {
                            Value = s
                        });
                    }
                }
            }
            Pullenti.Ner.Core.TerminToken tok1 = m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
            if (tok1 != null)
            {
                Pullenti.Ner.Token t1 = tok1.EndToken;
                TitleItemToken     re = new TitleItemToken(tok1.BeginToken, t1, (Types)tok1.Termin.Tag);
                return(re);
            }
            if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t, false, false))
            {
                tok1 = m_Termins.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok1 != null && Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(tok1.EndToken.Next, false, null, false))
                {
                    Pullenti.Ner.Token t1 = tok1.EndToken.Next;
                    return(new TitleItemToken(tok1.BeginToken, t1, (Types)tok1.Termin.Tag));
                }
            }
            return(null);
        }
Ejemplo n.º 3
0
        public static List <Line> Parse(Pullenti.Ner.Token t0, int maxLines, int maxChars, int maxEndChar)
        {
            List <Line> res        = new List <Line>();
            int         totalChars = 0;

            for (Pullenti.Ner.Token t = t0; t != null; t = t.Next)
            {
                if (maxEndChar > 0)
                {
                    if (t.BeginChar > maxEndChar)
                    {
                        break;
                    }
                }
                Pullenti.Ner.Token t1;
                for (t1 = t; t1 != null && t1.Next != null; t1 = t1.Next)
                {
                    if (t1.IsNewlineAfter)
                    {
                        if (t1.Next == null || Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t1.Next))
                        {
                            break;
                        }
                    }
                    if (t1 == t && t.IsNewlineBefore && (t.GetReferent() is Pullenti.Ner.Person.PersonReferent))
                    {
                        if (t1.Next == null)
                        {
                            continue;
                        }
                        if ((t1.Next is Pullenti.Ner.TextToken) && t1.Next.Chars.IsLetter && !t1.Next.Chars.IsAllLower)
                        {
                            break;
                        }
                    }
                }
                if (t1 == null)
                {
                    t1 = t;
                }
                TitleItemToken tit = TitleItemToken.TryAttach(t);
                if (tit != null)
                {
                    if (tit.Typ == TitleItemToken.Types.Keywords)
                    {
                        break;
                    }
                }
                Pullenti.Ner.Core.Internal.BlockTitleToken bl = Pullenti.Ner.Core.Internal.BlockTitleToken.TryAttach(t, false, null);
                if (bl != null)
                {
                    if (bl.Typ != Pullenti.Ner.Core.Internal.BlkTyps.Undefined)
                    {
                        break;
                    }
                }
                Line l = new Line(t, t1);
                res.Add(l);
                totalChars += l.CharsCount;
                if (res.Count >= maxLines || totalChars >= maxChars)
                {
                    break;
                }
                t = t1;
            }
            return(res);
        }