Пример #1
0
        /// <summary>
        /// Попытаться выделить предлог с указанного токена
        /// </summary>
        /// <param name="t">начальный токен</param>
        /// <return>результат или null</return>
        public static PrepositionToken TryParse(Pullenti.Ner.Token t)
        {
            if (!(t is Pullenti.Ner.TextToken))
            {
                return(null);
            }
            TerminToken tok = m_Ontology.TryParse(t, TerminParseAttr.No);

            if (tok != null)
            {
                return new PrepositionToken(t, tok.EndToken)
                       {
                           Normal = tok.Termin.CanonicText, NextCase = (Pullenti.Morph.MorphCase)tok.Termin.Tag
                       }
            }
            ;
            Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
            if (!mc.IsPreposition)
            {
                return(null);
            }
            PrepositionToken res = new PrepositionToken(t, t);

            res.Normal   = t.GetNormalCaseText(Pullenti.Morph.MorphClass.Preposition, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
            res.NextCase = Pullenti.Morph.LanguageHelper.GetCaseAfterPreposition(res.Normal);
            if ((t.Next != null && t.Next.IsHiphen && !t.IsWhitespaceAfter) && (t.Next.Next is Pullenti.Ner.TextToken) && t.Next.Next.GetMorphClassInDictionary().IsPreposition)
            {
                res.EndToken = t.Next.Next;
            }
            return(res);
        }
Пример #2
0
 public string GetWordform(string word, Pullenti.Morph.MorphClass cla, Pullenti.Morph.MorphGender gender, Pullenti.Morph.MorphCase cas, Pullenti.Morph.MorphNumber num, Pullenti.Morph.MorphLang lang, Pullenti.Morph.MorphWordForm addInfo)
 {
     if (Pullenti.Morph.LanguageHelper.IsCyrillicChar(word[0]))
     {
         if (m_EngineRu.Language.IsRu && lang.IsRu)
         {
             return(m_EngineRu.GetWordform(word, cla, gender, cas, num, addInfo));
         }
         if (m_EngineUa.Language.IsUa && lang.IsUa)
         {
             return(m_EngineUa.GetWordform(word, cla, gender, cas, num, addInfo));
         }
         if (m_EngineBy.Language.IsBy && lang.IsBy)
         {
             return(m_EngineBy.GetWordform(word, cla, gender, cas, num, addInfo));
         }
         if (m_EngineKz.Language.IsKz && lang.IsKz)
         {
             return(m_EngineKz.GetWordform(word, cla, gender, cas, num, addInfo));
         }
         return(m_EngineRu.GetWordform(word, cla, gender, cas, num, addInfo));
     }
     else
     {
         return(m_EngineEn.GetWordform(word, cla, gender, cas, num, addInfo));
     }
 }
Пример #3
0
        internal bool Deserialize(ByteArrayWrapper str, ref int pos)
        {
            int id = str.DeserializeShort(ref pos);

            if (id <= 0)
            {
                return(false);
            }
            MiscInfoId = (short)id;
            int iii = str.DeserializeShort(ref pos);

            Pullenti.Morph.MorphClass mc = new Pullenti.Morph.MorphClass();
            mc.Value = (short)iii;
            if (mc.IsMisc && mc.IsProper)
            {
                mc.IsMisc = false;
            }
            Class = mc;
            byte bbb;

            bbb    = str.DeserializeByte(ref pos);
            Gender = (Pullenti.Morph.MorphGender)bbb;
            bbb    = str.DeserializeByte(ref pos);
            Number = (Pullenti.Morph.MorphNumber)bbb;
            bbb    = str.DeserializeByte(ref pos);
            Pullenti.Morph.MorphCase mca = new Pullenti.Morph.MorphCase();
            mca.Value = bbb;
            Case      = mca;
            string s = str.DeserializeString(ref pos);

            NormalTail     = s;
            s              = str.DeserializeString(ref pos);
            FullNormalTail = s;
            return(true);
        }
Пример #4
0
 /// <summary>
 /// Удалить элементы, не соответствующие классу
 /// </summary>
 public void RemoveItems(Pullenti.Morph.MorphClass cl, bool eq = false)
 {
     if (m_Items == null)
     {
         return;
     }
     for (int i = m_Items.Count - 1; i >= 0; i--)
     {
         bool ok = false;
         if (((m_Items[i].Class.Value & cl.Value)) == 0)
         {
             ok = true;
         }
         else if (eq && m_Items[i].Class.Value != cl.Value)
         {
             ok = true;
         }
         if (ok)
         {
             m_Items.RemoveAt(i);
             m_NeedRecalc = true;
         }
     }
     m_NeedRecalc = true;
 }
Пример #5
0
        internal void Deserialize(Stream stream)
        {
            m_Class = new Pullenti.Morph.MorphClass()
            {
                Value = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeShort(stream)
            };
            m_Case = new Pullenti.Morph.MorphCase()
            {
                Value = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeShort(stream)
            };
            m_Gender   = (Pullenti.Morph.MorphGender)Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeShort(stream);
            m_Number   = (Pullenti.Morph.MorphNumber)Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeShort(stream);
            m_Voice    = (Pullenti.Morph.MorphVoice)Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeShort(stream);
            m_Language = new Pullenti.Morph.MorphLang()
            {
                Value = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeShort(stream)
            };
            int cou = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeInt(stream);

            m_Items = new List <Pullenti.Morph.MorphBaseInfo>();
            for (int i = 0; i < cou; i++)
            {
                Pullenti.Morph.MorphBaseInfo it = this.DeserializeItem(stream);
                if (it != null)
                {
                    m_Items.Add(it);
                }
            }
            m_NeedRecalc = false;
        }
Пример #6
0
 public override Pullenti.Morph.MorphClass GetMorphClassInDictionary()
 {
     Pullenti.Morph.MorphClass res = new Pullenti.Morph.MorphClass();
     foreach (Pullenti.Morph.MorphBaseInfo wf in Morph.Items)
     {
         if ((wf is Pullenti.Morph.MorphWordForm) && (wf as Pullenti.Morph.MorphWordForm).IsInDictionary)
         {
             res |= wf.Class;
         }
     }
     return(res);
 }
Пример #7
0
 public override bool ContainsAttr(string attrValue, Pullenti.Morph.MorphClass cla = null)
 {
     foreach (Pullenti.Morph.MorphBaseInfo it in Items)
     {
         if (cla != null && cla.Value != 0 && ((it.Class.Value & cla.Value)) == 0)
         {
             continue;
         }
         if (it.ContainsAttr(attrValue, cla))
         {
             return(true);
         }
     }
     return(false);
 }
Пример #8
0
        static AutoannoSentToken TryParse(Pullenti.Ner.Token t)
        {
            if (t == null || !Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t))
            {
                return(null);
            }
            AutoannoSentToken res = new AutoannoSentToken(t, t);
            bool hasVerb          = false;

            for (; t != null; t = t.Next)
            {
                if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t) && t != res.BeginToken)
                {
                    break;
                }
                Pullenti.Ner.Referent r = t.GetReferent();
                if (r is Pullenti.Ner.Keyword.KeywordReferent)
                {
                    res.Rank += (r as Pullenti.Ner.Keyword.KeywordReferent).Rank;
                    if ((r as Pullenti.Ner.Keyword.KeywordReferent).Typ == Pullenti.Ner.Keyword.KeywordType.Predicate)
                    {
                        hasVerb = true;
                    }
                }
                else if (t is Pullenti.Ner.TextToken)
                {
                    Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                    if (mc.IsPronoun || mc.IsPersonalPronoun)
                    {
                        res.Rank -= 1;
                    }
                    else if (t.LengthChar > 1)
                    {
                        res.Rank -= 0.1;
                    }
                }
                res.EndToken = t;
            }
            if (!hasVerb)
            {
                res.Rank /= 3;
            }
            res.Value = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(res, Pullenti.Ner.Core.GetTextAttr.KeepRegister | Pullenti.Ner.Core.GetTextAttr.KeepQuotes);
            return(res);
        }
Пример #9
0
        public override string GetNormalCaseText(Pullenti.Morph.MorphClass mc = null, Pullenti.Morph.MorphNumber num = Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender gender = Pullenti.Morph.MorphGender.Undefined, bool keepChars = false)
        {
            GetTextAttr attr = GetTextAttr.No;

            if (num == Pullenti.Morph.MorphNumber.Singular)
            {
                attr |= GetTextAttr.FirstNounGroupToNominativeSingle;
            }
            else
            {
                attr |= GetTextAttr.FirstNounGroupToNominative;
            }
            if (keepChars)
            {
                attr |= GetTextAttr.KeepRegister;
            }
            return(MiscHelper.GetTextValue(BeginToken, EndToken, attr));
        }
Пример #10
0
 public MorphCollection(MorphCollection source = null)
 {
     if (source == null)
     {
         return;
     }
     foreach (Pullenti.Morph.MorphBaseInfo it in source.Items)
     {
         Pullenti.Morph.MorphBaseInfo mi = null;
         if (it is Pullenti.Morph.MorphWordForm)
         {
             Pullenti.Morph.MorphWordForm wf = new Pullenti.Morph.MorphWordForm();
             wf.CopyFromWordForm(it as Pullenti.Morph.MorphWordForm);
             mi = wf;
         }
         else
         {
             mi = new Pullenti.Morph.MorphBaseInfo();
             mi.CopyFrom(it);
         }
         if (m_Items == null)
         {
             m_Items = new List <Pullenti.Morph.MorphBaseInfo>();
         }
         m_Items.Add(mi);
     }
     m_Class = new Pullenti.Morph.MorphClass()
     {
         Value = source.m_Class.Value
     };
     m_Gender = source.m_Gender;
     m_Case   = new Pullenti.Morph.MorphCase()
     {
         Value = source.m_Case.Value
     };
     m_Number   = source.m_Number;
     m_Language = new Pullenti.Morph.MorphLang()
     {
         Value = source.m_Language.Value
     };
     m_Voice      = source.m_Voice;
     m_NeedRecalc = false;
 }
Пример #11
0
        public override string GetNormalCaseText(Pullenti.Morph.MorphClass mc = null, Pullenti.Morph.MorphNumber num = Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender gender = Pullenti.Morph.MorphGender.Undefined, bool keepChars = false)
        {
            string res = Normal;

            if (keepChars)
            {
                if (Chars.IsAllLower)
                {
                    res = res.ToLower();
                }
                else if (Chars.IsAllUpper)
                {
                }
                else if (Chars.IsCapitalUpper)
                {
                    res = MiscHelper.ConvertFirstCharUpperAndOtherLower(res);
                }
            }
            return(res);
        }
Пример #12
0
        /// <summary>
        /// Получить слова однокоренное слово заданной части речи.
        /// Например, для существительного "ГЛАГОЛ" вариант прилагательного: "ГЛАГОЛЬНЫЙ"
        /// </summary>
        /// <param name="word">слово в верхнем регистре и нормальной форме</param>
        /// <param name="cla">нужная часть речи</param>
        /// <param name="lang">возможный язык</param>
        /// <return>вариант или null при ненахождении</return>
        public static string GetWordClassVar(string word, Pullenti.Morph.MorphClass cla, Pullenti.Morph.MorphLang lang = null)
        {
            List <DerivateGroup> grs = m_DerRu.Find(word, false, lang);

            if (grs == null)
            {
                return(null);
            }
            foreach (DerivateGroup g in grs)
            {
                foreach (DerivateWord w in g.Words)
                {
                    if (w.Class == cla)
                    {
                        return(w.Spelling);
                    }
                }
            }
            return(null);
        }
Пример #13
0
        public override string GetNormalCaseText(Pullenti.Morph.MorphClass mc = null, Pullenti.Morph.MorphNumber num = Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender gender = Pullenti.Morph.MorphGender.Undefined, bool keepChars = false)
        {
            if (gender == Pullenti.Morph.MorphGender.Undefined)
            {
                gender = Source.Morph.Gender;
            }
            StringBuilder res = new StringBuilder();

            for (int k = AdjIndex1; k <= AdjIndex2; k++)
            {
                string adj = Source.Adjectives[k].GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Pronoun, num, gender, keepChars);
                if (adj == null || adj == "?")
                {
                    adj = MiscHelper.GetTextValueOfMetaToken(Source.Adjectives[k], (keepChars ? GetTextAttr.KeepRegister : GetTextAttr.No));
                }
                res.AppendFormat("{0} ", adj ?? "?");
            }
            string noun = null;

            if ((Source.Noun.BeginToken is Pullenti.Ner.ReferentToken) && Source.BeginToken == Source.Noun.EndToken)
            {
                noun = Source.Noun.BeginToken.GetNormalCaseText(null, num, gender, keepChars);
            }
            else
            {
                Pullenti.Morph.MorphClass cas = Pullenti.Morph.MorphClass.Noun | Pullenti.Morph.MorphClass.Pronoun;
                if (mc != null && !mc.IsUndefined)
                {
                    cas = mc;
                }
                noun = Source.Noun.GetNormalCaseText(cas, num, gender, keepChars);
            }
            if (noun == null || noun == "?")
            {
                noun = Source.Noun.GetNormalCaseText(null, num, Pullenti.Morph.MorphGender.Undefined, false);
            }
            res.Append(noun ?? "?");
            return(res.ToString());
        }
Пример #14
0
        public static List <SentItem> ParseNearItems(Pullenti.Ner.Token t, Pullenti.Ner.Token t1, int lev, List <SentItem> prev)
        {
            if (lev > 100)
            {
                return(null);
            }
            if (t == null || t.BeginChar > t1.EndChar)
            {
                return(null);
            }
            List <SentItem> res = new List <SentItem>();

            if (t is Pullenti.Ner.ReferentToken)
            {
                res.Add(new SentItem(t as Pullenti.Ner.MetaToken));
                return(res);
            }
            DelimToken delim = DelimToken.TryParse(t);

            if (delim != null)
            {
                res.Add(new SentItem(delim));
                return(res);
            }
            Pullenti.Ner.Core.ConjunctionToken conj = Pullenti.Ner.Core.ConjunctionHelper.TryParse(t);
            if (conj != null)
            {
                res.Add(new SentItem(conj));
                return(res);
            }
            Pullenti.Ner.Core.PrepositionToken prep = Pullenti.Ner.Core.PrepositionHelper.TryParse(t);
            Pullenti.Ner.Token t111 = (prep == null ? t : prep.EndToken.Next);
            if ((t111 is Pullenti.Ner.NumberToken) && ((t111.Morph.Class.IsAdjective && !t111.Morph.Class.IsNoun)))
            {
                t111 = null;
            }
            Pullenti.Ner.Measure.Internal.NumbersWithUnitToken num = (t111 == null ? null : Pullenti.Ner.Measure.Internal.NumbersWithUnitToken.TryParse(t111, null, false, false, false, false));
            if (num != null)
            {
                if (num.Units.Count == 0)
                {
                    Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(num.EndToken.Next, m_NptAttrs, 0, null);
                    if (npt1 == null && num.EndToken.Next != null && num.EndToken.Next.IsValue("РАЗ", null))
                    {
                        npt1      = new Pullenti.Ner.Core.NounPhraseToken(num.EndToken.Next, num.EndToken.Next);
                        npt1.Noun = new Pullenti.Ner.MetaToken(num.EndToken.Next, num.EndToken.Next);
                    }
                    if (npt1 != null && prep != null)
                    {
                        if (npt1.Noun.EndToken.IsValue("РАЗ", null))
                        {
                            npt1.Morph.RemoveItems(prep.NextCase);
                        }
                        else if (((npt1.Morph.Case & prep.NextCase)).IsUndefined)
                        {
                            npt1 = null;
                        }
                        else
                        {
                            npt1.Morph.RemoveItems(prep.NextCase);
                        }
                    }
                    if ((npt1 != null && npt1.EndToken.IsValue("ОНИ", null) && npt1.Preposition != null) && npt1.Preposition.Normal == "ИЗ")
                    {
                        npt1.Morph       = new Pullenti.Ner.MorphCollection(num.EndToken.Morph);
                        npt1.Preposition = null;
                        string   nn  = num.ToString();
                        SentItem si1 = new SentItem(npt1);
                        if (nn == "1" && (num.EndToken is Pullenti.Ner.NumberToken) && (num.EndToken as Pullenti.Ner.NumberToken).EndToken.IsValue("ОДИН", null))
                        {
                            Pullenti.Semantic.SemAttribute a = new Pullenti.Semantic.SemAttribute()
                            {
                                Typ = Pullenti.Semantic.SemAttributeType.OneOf, Spelling = (num.EndToken as Pullenti.Ner.NumberToken).EndToken.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false)
                            };
                            SemAttributeEx aex = new SemAttributeEx(num)
                            {
                                Attr = a
                            };
                            si1.Attrs = new List <SemAttributeEx>();
                            si1.Attrs.Add(aex);
                        }
                        else
                        {
                            si1.Quant = new Pullenti.Semantic.SemQuantity(nn, num.BeginToken, num.EndToken);
                        }
                        if (prep != null)
                        {
                            si1.Prep = prep.Normal;
                        }
                        res.Add(si1);
                        return(res);
                    }
                    if (npt1 != null)
                    {
                        SentItem si1 = new SentItem(npt1)
                        {
                            Quant = new Pullenti.Semantic.SemQuantity(num.ToString(), num.BeginToken, num.EndToken)
                        };
                        if (prep != null)
                        {
                            si1.Prep = prep.Normal;
                        }
                        if (npt1.EndToken.IsValue("РАЗ", null))
                        {
                            si1.Typ = SentItemType.Formula;
                        }
                        if (((npt1.Morph.Number & Pullenti.Morph.MorphNumber.Plural)) == Pullenti.Morph.MorphNumber.Undefined && si1.Quant.Spelling != "1")
                        {
                            bool ok = false;
                            if (si1.Quant.Spelling.EndsWith("1"))
                            {
                                ok = true;
                            }
                            else if (si1.Typ == SentItemType.Formula)
                            {
                                ok = true;
                            }
                            else if (si1.Quant.Spelling.EndsWith("2") && npt1.Morph.Case.IsGenitive)
                            {
                                ok = true;
                            }
                            else if (si1.Quant.Spelling.EndsWith("3") && npt1.Morph.Case.IsGenitive)
                            {
                                ok = true;
                            }
                            else if (si1.Quant.Spelling.EndsWith("4") && npt1.Morph.Case.IsGenitive)
                            {
                                ok = true;
                            }
                            if (ok)
                            {
                                npt1.Morph        = new Pullenti.Ner.MorphCollection();
                                npt1.Morph.Number = Pullenti.Morph.MorphNumber.Plural;
                            }
                        }
                        res.Add(si1);
                        return(res);
                    }
                }
                num.BeginToken = t;
                num.Morph      = new Pullenti.Ner.MorphCollection(num.EndToken.Morph);
                SentItem si = new SentItem(num);
                if (prep != null)
                {
                    si.Prep = prep.Normal;
                }
                res.Add(si);
                if (si.Prep == "НА")
                {
                    AdverbToken aa = AdverbToken.TryParse(si.EndToken.Next);
                    if (aa != null && ((aa.Typ == Pullenti.Semantic.SemAttributeType.Less || aa.Typ == Pullenti.Semantic.SemAttributeType.Great)))
                    {
                        si.AddAttr(aa);
                        si.EndToken = aa.EndToken;
                    }
                }
                return(res);
            }
            Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
            AdverbToken adv = AdverbToken.TryParse(t);

            Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, m_NptAttrs, 0, null);
            if (npt != null && (npt.EndToken is Pullenti.Ner.TextToken) && (npt.EndToken as Pullenti.Ner.TextToken).Term == "БЫЛИ")
            {
                npt = null;
            }
            if (npt != null && adv != null)
            {
                if (adv.EndChar > npt.EndChar)
                {
                    npt = null;
                }
                else if (adv.EndChar == npt.EndChar)
                {
                    res.Add(new SentItem(npt));
                    res.Add(new SentItem(adv));
                    return(res);
                }
            }
            if (npt != null && npt.Adjectives.Count == 0)
            {
                if (npt.EndToken.IsValue("КОТОРЫЙ", null) && t.Previous != null && t.Previous.IsCommaAnd)
                {
                    List <SentItem> res1 = ParseSubsent(npt, t1, lev + 1, prev);
                    if (res1 != null)
                    {
                        return(res1);
                    }
                }
                if (npt.EndToken.IsValue("СКОЛЬКО", null))
                {
                    Pullenti.Ner.Token tt1 = npt.EndToken.Next;
                    if (tt1 != null && tt1.IsValue("ВСЕГО", null))
                    {
                        tt1 = tt1.Next;
                    }
                    Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt1, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt1 != null && !npt1.Morph.Case.IsUndefined && prep != null)
                    {
                        if (((prep.NextCase & npt1.Morph.Case)).IsUndefined)
                        {
                            npt1 = null;
                        }
                        else
                        {
                            npt1.Morph.RemoveItems(prep.NextCase);
                        }
                    }
                    if (npt1 != null)
                    {
                        npt1.BeginToken  = npt.BeginToken;
                        npt1.Preposition = npt.Preposition;
                        npt1.Adjectives.Add(new Pullenti.Ner.MetaToken(npt.EndToken, npt.EndToken));
                        npt = npt1;
                    }
                }
                if (npt.EndToken.Morph.Class.IsAdjective)
                {
                    if (Pullenti.Ner.Core.VerbPhraseHelper.TryParse(t, true, false, false) != null)
                    {
                        npt = null;
                    }
                }
            }
            Pullenti.Ner.Core.VerbPhraseToken vrb = null;
            if (npt != null && npt.Adjectives.Count > 0)
            {
                vrb = Pullenti.Ner.Core.VerbPhraseHelper.TryParse(t, true, false, false);
                if (vrb != null && vrb.FirstVerb.IsParticiple)
                {
                    npt = null;
                }
            }
            else if (adv == null || npt != null)
            {
                vrb = Pullenti.Ner.Core.VerbPhraseHelper.TryParse(t, true, false, false);
            }
            if (npt != null)
            {
                res.Add(new SentItem(npt));
            }
            if (vrb != null && !vrb.FirstVerb.IsParticiple && !vrb.FirstVerb.IsDeeParticiple)
            {
                List <Pullenti.Morph.MorphWordForm> vars = new List <Pullenti.Morph.MorphWordForm>();
                foreach (Pullenti.Morph.MorphBaseInfo wf in vrb.FirstVerb.Morph.Items)
                {
                    if (wf.Class.IsVerb && (wf is Pullenti.Morph.MorphWordForm) && (wf as Pullenti.Morph.MorphWordForm).IsInDictionary)
                    {
                        vars.Add(wf as Pullenti.Morph.MorphWordForm);
                    }
                }
                if (vars.Count < 2)
                {
                    res.Add(new SentItem(vrb));
                }
                else
                {
                    vrb.FirstVerb.VerbMorph = vars[0];
                    res.Add(new SentItem(vrb));
                    for (int i = 1; i < vars.Count; i++)
                    {
                        vrb = Pullenti.Ner.Core.VerbPhraseHelper.TryParse(t, false, false, false);
                        if (vrb == null)
                        {
                            break;
                        }
                        vrb.FirstVerb.VerbMorph = vars[i];
                        res.Add(new SentItem(vrb));
                    }
                    if (vars[0].Misc.Mood == Pullenti.Morph.MorphMood.Imperative && vars[1].Misc.Mood != Pullenti.Morph.MorphMood.Imperative)
                    {
                        SentItem rr = res[0];
                        res[0] = res[1];
                        res[1] = rr;
                    }
                }
                return(res);
            }
            if (vrb != null)
            {
                List <SentItem> res1 = ParseParticiples(vrb, t1, lev + 1);
                if (res1 != null)
                {
                    res.AddRange(res1);
                }
            }
            if (res.Count > 0)
            {
                return(res);
            }
            if (adv != null)
            {
                if (adv.Typ == Pullenti.Semantic.SemAttributeType.Other)
                {
                    Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(adv.EndToken.Next, m_NptAttrs, 0, null);
                    if (npt1 != null && npt1.EndToken.IsValue("ОНИ", null) && npt1.Preposition != null)
                    {
                        SentItem si1 = new SentItem(npt1);
                        Pullenti.Semantic.SemAttribute a = new Pullenti.Semantic.SemAttribute()
                        {
                            Typ = Pullenti.Semantic.SemAttributeType.Other, Spelling = adv.EndToken.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)
                        };
                        SemAttributeEx aex = new SemAttributeEx(num)
                        {
                            Attr = a
                        };
                        si1.Attrs = new List <SemAttributeEx>();
                        si1.Attrs.Add(aex);
                        if (prep != null)
                        {
                            si1.Prep = prep.Normal;
                        }
                        res.Add(si1);
                        return(res);
                    }
                    for (int i = prev.Count - 1; i >= 0; i--)
                    {
                        if (prev[i].Attrs != null)
                        {
                            foreach (SemAttributeEx a in prev[i].Attrs)
                            {
                                if (a.Attr.Typ == Pullenti.Semantic.SemAttributeType.OneOf)
                                {
                                    SentItem si1 = new SentItem(prev[i].Source);
                                    Pullenti.Semantic.SemAttribute aa = new Pullenti.Semantic.SemAttribute()
                                    {
                                        Typ = Pullenti.Semantic.SemAttributeType.Other, Spelling = adv.EndToken.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)
                                    };
                                    SemAttributeEx aex = new SemAttributeEx(adv)
                                    {
                                        Attr = aa
                                    };
                                    si1.Attrs = new List <SemAttributeEx>();
                                    si1.Attrs.Add(aex);
                                    if (prep != null)
                                    {
                                        si1.Prep = prep.Normal;
                                    }
                                    si1.BeginToken = adv.BeginToken;
                                    si1.EndToken   = adv.EndToken;
                                    res.Add(si1);
                                    return(res);
                                }
                            }
                        }
                    }
                }
                res.Add(new SentItem(adv));
                return(res);
            }
            if (mc.IsAdjective)
            {
                npt = new Pullenti.Ner.Core.NounPhraseToken(t, t)
                {
                    Morph = new Pullenti.Ner.MorphCollection(t.Morph)
                };
                npt.Noun = new Pullenti.Ner.MetaToken(t, t);
                res.Add(new SentItem(npt));
                return(res);
            }
            return(null);
        }
Пример #15
0
        public static MeasureToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection addUnits, bool canBeSet = true, bool canUnitsAbsent = false, bool isResctriction = false, bool isSubval = false)
        {
            if (!(t is Pullenti.Ner.TextToken))
            {
                return(null);
            }
            if (t.IsTableControlChar)
            {
                return(null);
            }
            Pullenti.Ner.Token     t0  = t;
            Pullenti.Ner.MetaToken whd = null;
            int minmax = 0;

            Pullenti.Ner.Token tt = NumbersWithUnitToken._isMinOrMax(t0, ref minmax);
            if (tt != null)
            {
                t = tt.Next;
            }
            Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition | Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreBrackets, 0, null);
            if (npt == null)
            {
                whd = NumbersWithUnitToken._tryParseWHL(t);
                if (whd != null)
                {
                    npt = new Pullenti.Ner.Core.NounPhraseToken(t0, whd.EndToken);
                }
                else if (t0.IsValue("КПД", null))
                {
                    npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0);
                }
                else if ((t0 is Pullenti.Ner.TextToken) && t0.LengthChar > 3 && t0.GetMorphClassInDictionary().IsUndefined)
                {
                    npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0);
                }
                else if (t0.IsValue("T", null) && t0.Chars.IsAllLower)
                {
                    npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0);
                    t   = t0;
                    if (t.Next != null && t.Next.IsChar('='))
                    {
                        npt.EndToken = t.Next;
                    }
                }
                else if ((t0 is Pullenti.Ner.TextToken) && t0.Chars.IsLetter && isSubval)
                {
                    if (NumbersWithUnitToken.TryParse(t, addUnits, false, false, false, false) != null)
                    {
                        return(null);
                    }
                    npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0);
                    for (t = t0.Next; t != null; t = t.Next)
                    {
                        if (t.WhitespacesBeforeCount > 2)
                        {
                            break;
                        }
                        else if (!(t is Pullenti.Ner.TextToken))
                        {
                            break;
                        }
                        else if (!t.Chars.IsLetter)
                        {
                            Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                            if (br != null)
                            {
                                npt.EndToken = (t = br.EndToken);
                            }
                            else
                            {
                                break;
                            }
                        }
                        else if (NumbersWithUnitToken.TryParse(t, addUnits, false, false, false, false) != null)
                        {
                            break;
                        }
                        else
                        {
                            npt.EndToken = t;
                        }
                    }
                }
                else
                {
                    return(null);
                }
            }
            else if (Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t, true, false) != null)
            {
                return(null);
            }
            else
            {
                Pullenti.Ner.Date.Internal.DateItemToken dtok = Pullenti.Ner.Date.Internal.DateItemToken.TryAttach(t, null, false);
                if (dtok != null)
                {
                    return(null);
                }
            }
            Pullenti.Ner.Token t1 = npt.EndToken;
            t = npt.EndToken;
            Pullenti.Ner.MetaToken name = new Pullenti.Ner.MetaToken(npt.BeginToken, npt.EndToken)
            {
                Morph = npt.Morph
            };
            List <UnitToken>    units     = null;
            List <UnitToken>    units2    = null;
            List <MeasureToken> internals = new List <MeasureToken>();
            bool not = false;

            for (tt = t1.Next; tt != null; tt = tt.Next)
            {
                if (tt.IsNewlineBefore)
                {
                    break;
                }
                if (tt.IsTableControlChar)
                {
                    break;
                }
                Pullenti.Ner.Token tt2 = NumbersWithUnitToken._isMinOrMax(tt, ref minmax);
                if (tt2 != null)
                {
                    t1 = (t = (tt = tt2));
                    continue;
                }
                if ((tt.IsValue("БЫТЬ", null) || tt.IsValue("ДОЛЖЕН", null) || tt.IsValue("ДОЛЖНЫЙ", null)) || tt.IsValue("МОЖЕТ", null) || ((tt.IsValue("СОСТАВЛЯТЬ", null) && !tt.GetMorphClassInDictionary().IsAdjective)))
                {
                    t1 = (t = tt);
                    if (tt.Previous.IsValue("НЕ", null))
                    {
                        not = true;
                    }
                    continue;
                }
                Pullenti.Ner.MetaToken www = NumbersWithUnitToken._tryParseWHL(tt);
                if (www != null)
                {
                    whd = www;
                    t1  = (t = (tt = www.EndToken));
                    continue;
                }
                if (tt.IsValue("ПРИ", null))
                {
                    MeasureToken mt1 = TryParse(tt.Next, addUnits, false, false, true, false);
                    if (mt1 != null)
                    {
                        internals.Add(mt1);
                        t1 = (t = (tt = mt1.EndToken));
                        continue;
                    }
                    NumbersWithUnitToken n1 = NumbersWithUnitToken.TryParse(tt.Next, addUnits, false, false, false, false);
                    if (n1 != null && n1.Units.Count > 0)
                    {
                        mt1 = new MeasureToken(n1.BeginToken, n1.EndToken)
                        {
                            Nums = n1
                        };
                        internals.Add(mt1);
                        t1 = (t = (tt = mt1.EndToken));
                        continue;
                    }
                }
                if (tt.IsValue("ПО", null) && tt.Next != null && tt.Next.IsValue("U", null))
                {
                    t1 = (t = (tt = tt.Next));
                    continue;
                }
                if (internals.Count > 0)
                {
                    if (tt.IsChar(':'))
                    {
                        break;
                    }
                    MeasureToken mt1 = TryParse(tt.Next, addUnits, false, false, true, false);
                    if (mt1 != null && mt1.Reliable)
                    {
                        internals.Add(mt1);
                        t1 = (t = (tt = mt1.EndToken));
                        continue;
                    }
                }
                if ((tt is Pullenti.Ner.NumberToken) && (tt as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words)
                {
                    Pullenti.Ner.Core.NounPhraseToken npt3 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective, 0, null);
                    if (npt3 != null)
                    {
                        t1 = (tt = npt3.EndToken);
                        if (internals.Count == 0)
                        {
                            name.EndToken = t1;
                        }
                        continue;
                    }
                }
                if (((tt.IsHiphen && !tt.IsWhitespaceBefore && !tt.IsWhitespaceAfter) && (tt.Next is Pullenti.Ner.NumberToken) && (tt.Previous is Pullenti.Ner.TextToken)) && tt.Previous.Chars.IsAllUpper)
                {
                    t1 = (tt = (t = tt.Next));
                    if (internals.Count == 0)
                    {
                        name.EndToken = t1;
                    }
                    continue;
                }
                if (((tt is Pullenti.Ner.NumberToken) && !tt.IsWhitespaceBefore && (tt.Previous is Pullenti.Ner.TextToken)) && tt.Previous.Chars.IsAllUpper)
                {
                    t1 = (t = tt);
                    if (internals.Count == 0)
                    {
                        name.EndToken = t1;
                    }
                    continue;
                }
                if ((((tt is Pullenti.Ner.NumberToken) && !tt.IsWhitespaceAfter && tt.Next.IsHiphen) && !tt.Next.IsWhitespaceAfter && (tt.Next.Next is Pullenti.Ner.TextToken)) && tt.Next.Next.LengthChar > 2)
                {
                    t1 = (t = (tt = tt.Next.Next));
                    Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt1 != null && npt1.EndChar > tt.EndChar)
                    {
                        t1 = (t = (tt = npt1.EndToken));
                    }
                    if (internals.Count == 0)
                    {
                        name.EndToken = t1;
                    }
                    continue;
                }
                if ((tt is Pullenti.Ner.NumberToken) && tt.Previous != null)
                {
                    if (tt.Previous.IsValue("USB", null))
                    {
                        t1 = (t = tt);
                        if (internals.Count == 0)
                        {
                            name.EndToken = t1;
                        }
                        for (Pullenti.Ner.Token ttt = tt.Next; ttt != null; ttt = ttt.Next)
                        {
                            if (ttt.IsWhitespaceBefore)
                            {
                                break;
                            }
                            if (ttt.IsCharOf(",:"))
                            {
                                break;
                            }
                            t1 = (t = (tt = ttt));
                            if (internals.Count == 0)
                            {
                                name.EndToken = t1;
                            }
                        }
                        continue;
                    }
                }
                NumbersWithUnitToken mt0 = NumbersWithUnitToken.TryParse(tt, addUnits, false, false, false, false);
                if (mt0 != null)
                {
                    Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null);
                    if (npt1 != null && npt1.EndChar > mt0.EndChar)
                    {
                        t1 = (t = (tt = npt1.EndToken));
                        if (internals.Count == 0)
                        {
                            name.EndToken = t1;
                        }
                        continue;
                    }
                    break;
                }
                if (((tt.IsComma || tt.IsChar('('))) && tt.Next != null)
                {
                    www = NumbersWithUnitToken._tryParseWHL(tt.Next);
                    if (www != null)
                    {
                        whd = www;
                        t1  = (t = (tt = www.EndToken));
                        if (tt.Next != null && tt.Next.IsComma)
                        {
                            t1 = (tt = tt.Next);
                        }
                        if (tt.Next != null && tt.Next.IsChar(')'))
                        {
                            t1 = (tt = tt.Next);
                            continue;
                        }
                    }
                    List <UnitToken> uu = UnitToken.TryParseList(tt.Next, addUnits, false);
                    if (uu != null)
                    {
                        t1    = (t = uu[uu.Count - 1].EndToken);
                        units = uu;
                        if (tt.IsChar('(') && t1.Next != null && t1.Next.IsChar(')'))
                        {
                            t1 = (t = (tt = t1.Next));
                            continue;
                        }
                        else if (t1.Next != null && t1.Next.IsChar('('))
                        {
                            uu = UnitToken.TryParseList(t1.Next.Next, addUnits, false);
                            if (uu != null && uu[uu.Count - 1].EndToken.Next != null && uu[uu.Count - 1].EndToken.Next.IsChar(')'))
                            {
                                units2 = uu;
                                t1     = (t = (tt = uu[uu.Count - 1].EndToken.Next));
                                continue;
                            }
                            www = NumbersWithUnitToken._tryParseWHL(t1.Next);
                            if (www != null)
                            {
                                whd = www;
                                t1  = (t = (tt = www.EndToken));
                                continue;
                            }
                        }
                        if (uu != null && uu.Count > 0 && !uu[0].IsDoubt)
                        {
                            break;
                        }
                        if (t1.Next != null)
                        {
                            if (t1.Next.IsTableControlChar || t1.IsNewlineAfter)
                            {
                                break;
                            }
                        }
                        units = null;
                    }
                }
                if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(tt, false, false) && !(tt.Next is Pullenti.Ner.NumberToken))
                {
                    Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                    if (br != null)
                    {
                        t1 = (t = (tt = br.EndToken));
                        continue;
                    }
                }
                if (tt.IsValue("НЕ", null) && tt.Next != null)
                {
                    Pullenti.Morph.MorphClass mc = tt.Next.GetMorphClassInDictionary();
                    if (mc.IsAdverb || mc.IsMisc)
                    {
                        break;
                    }
                    continue;
                }
                if (tt.IsValue("ЯМЗ", null))
                {
                }
                Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition | Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreBrackets | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns, 0, null);
                if (npt2 == null)
                {
                    if (tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction)
                    {
                        Pullenti.Ner.Core.TerminToken to = NumbersWithUnitToken.m_Termins.TryParse(tt, Pullenti.Ner.Core.TerminParseAttr.No);
                        if (to != null)
                        {
                            if ((to.EndToken.Next is Pullenti.Ner.TextToken) && to.EndToken.Next.IsLetters)
                            {
                            }
                            else
                            {
                                break;
                            }
                        }
                        t1 = tt;
                        continue;
                    }
                    Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary();
                    if (((tt is Pullenti.Ner.TextToken) && tt.Chars.IsLetter && tt.LengthChar > 1) && (((tt.Chars.IsAllUpper || mc.IsAdverb || mc.IsUndefined) || mc.IsAdjective)))
                    {
                        List <UnitToken> uu = UnitToken.TryParseList(tt, addUnits, false);
                        if (uu != null)
                        {
                            if (uu[0].LengthChar > 1 || uu.Count > 1)
                            {
                                units = uu;
                                t1    = (t = uu[uu.Count - 1].EndToken);
                                break;
                            }
                        }
                        t1 = (t = tt);
                        if (internals.Count == 0)
                        {
                            name.EndToken = tt;
                        }
                        continue;
                    }
                    if (tt.IsComma)
                    {
                        continue;
                    }
                    if (tt.IsChar('.'))
                    {
                        if (!Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt.Next))
                        {
                            continue;
                        }
                        List <UnitToken> uu = UnitToken.TryParseList(tt.Next, addUnits, false);
                        if (uu != null)
                        {
                            if (uu[0].LengthChar > 2 || uu.Count > 1)
                            {
                                units = uu;
                                t1    = (t = uu[uu.Count - 1].EndToken);
                                break;
                            }
                        }
                    }
                    break;
                }
                t1 = (t = (tt = npt2.EndToken));
                if (internals.Count > 0)
                {
                }
                else if (t.IsValue("ПРЕДЕЛ", null) || t.IsValue("ГРАНИЦА", null) || t.IsValue("ДИАПАЗОН", null))
                {
                }
                else if (t.Chars.IsLetter)
                {
                    name.EndToken = t1;
                }
            }
            Pullenti.Ner.Token t11 = t1;
            for (t1 = t1.Next; t1 != null; t1 = t1.Next)
            {
                if (t1.IsTableControlChar)
                {
                }
                else if (t1.IsCharOf(":,_"))
                {
                    if (isResctriction)
                    {
                        return(null);
                    }
                    Pullenti.Ner.MetaToken www = NumbersWithUnitToken._tryParseWHL(t1.Next);
                    if (www != null)
                    {
                        whd = www;
                        t1  = (t = www.EndToken);
                        continue;
                    }
                    List <UnitToken> uu = UnitToken.TryParseList(t1.Next, addUnits, false);
                    if (uu != null)
                    {
                        if (uu[0].LengthChar > 1 || uu.Count > 1)
                        {
                            units = uu;
                            t1    = (t = uu[uu.Count - 1].EndToken);
                            continue;
                        }
                    }
                    if (t1.IsChar(':'))
                    {
                        List <MeasureToken> li = new List <MeasureToken>();
                        for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next)
                        {
                            if (ttt.IsHiphen || ttt.IsTableControlChar)
                            {
                                continue;
                            }
                            if ((ttt is Pullenti.Ner.TextToken) && !ttt.Chars.IsLetter)
                            {
                                continue;
                            }
                            MeasureToken mt1 = TryParse(ttt, addUnits, true, true, false, true);
                            if (mt1 == null)
                            {
                                break;
                            }
                            li.Add(mt1);
                            ttt = mt1.EndToken;
                            if (ttt.Next != null && ttt.Next.IsChar(';'))
                            {
                                ttt = ttt.Next;
                            }
                            if (ttt.IsChar(';'))
                            {
                            }
                            else if (ttt.IsNewlineAfter && mt1.IsNewlineBefore)
                            {
                            }
                            else
                            {
                                break;
                            }
                        }
                        if (li.Count > 1)
                        {
                            MeasureToken res0 = new MeasureToken(t0, li[li.Count - 1].EndToken)
                            {
                                Internals = li, IsEmpty = true
                            };
                            if (internals != null && internals.Count > 0)
                            {
                                res0.InternalEx = internals[0];
                            }
                            string nam = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(name, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative);
                            li[0].BeginToken = t0;
                            foreach (MeasureToken v in li)
                            {
                                v.Name = string.Format("{0} ({1})", nam, v.Name ?? "").Trim();
                                if (v.Nums != null && v.Nums.Units.Count == 0 && units != null)
                                {
                                    v.Nums.Units = units;
                                }
                            }
                            return(res0);
                        }
                    }
                }
                else if (t1.IsHiphen && t1.IsWhitespaceAfter && t1.IsWhitespaceBefore)
                {
                }
                else if (t1.IsHiphen && t1.Next != null && t1.Next.IsChar('('))
                {
                }
                else
                {
                    break;
                }
            }
            if (t1 == null)
            {
                return(null);
            }
            List <NumbersWithUnitToken> mts = NumbersWithUnitToken.TryParseMulti(t1, addUnits, false, not, true, isResctriction);

            if (mts == null)
            {
                if (units != null && units.Count > 0)
                {
                    if (t1 == null || t1.Previous.IsChar(':'))
                    {
                        mts = new List <NumbersWithUnitToken>();
                        if (t1 == null)
                        {
                            for (t1 = t11; t1 != null && t1.Next != null; t1 = t1.Next)
                            {
                            }
                        }
                        else
                        {
                            t1 = t1.Previous;
                        }
                        mts.Add(new NumbersWithUnitToken(t0, t1)
                        {
                            SingleVal = double.NaN
                        });
                    }
                }
                if (mts == null)
                {
                    return(null);
                }
            }
            NumbersWithUnitToken mt = mts[0];

            if (mt.BeginToken == mt.EndToken && !(mt.BeginToken is Pullenti.Ner.NumberToken))
            {
                return(null);
            }
            if (!isSubval && name.BeginToken.Morph.Class.IsPreposition)
            {
                name.BeginToken = name.BeginToken.Next;
            }
            if (mt.WHL != null)
            {
                whd = mt.WHL;
            }
            for (int kk = 0; kk < 10; kk++)
            {
                if (whd != null && whd.EndToken == name.EndToken)
                {
                    name.EndToken = whd.BeginToken.Previous;
                    continue;
                }
                if (units != null)
                {
                    if (units[units.Count - 1].EndToken == name.EndToken)
                    {
                        name.EndToken = units[0].BeginToken.Previous;
                        continue;
                    }
                }
                break;
            }
            if (mts.Count > 1 && internals.Count == 0)
            {
                if (mt.Units.Count == 0)
                {
                    if (units != null)
                    {
                        foreach (NumbersWithUnitToken m in mts)
                        {
                            m.Units = units;
                        }
                    }
                }
                MeasureToken res1 = new MeasureToken(t0, mts[mts.Count - 1].EndToken)
                {
                    Morph = name.Morph, Reliable = true
                };
                res1.Name = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(name, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative);
                for (int k = 0; k < mts.Count; k++)
                {
                    MeasureToken ttt = new MeasureToken(mts[k].BeginToken, mts[k].EndToken)
                    {
                        Nums = mts[k]
                    };
                    if (whd != null)
                    {
                        List <string> nams = whd.Tag as List <string>;
                        if (k < nams.Count)
                        {
                            ttt.Name = nams[k];
                        }
                    }
                    res1.Internals.Add(ttt);
                }
                Pullenti.Ner.Token tt1 = res1.EndToken.Next;
                if (tt1 != null && tt1.IsChar('±'))
                {
                    NumbersWithUnitToken nn = NumbersWithUnitToken._tryParse(tt1, addUnits, true, false, false);
                    if (nn != null && nn.PlusMinusPercent)
                    {
                        res1.EndToken = nn.EndToken;
                        res1.Nums     = nn;
                        if (nn.Units.Count > 0 && units == null && mt.Units.Count == 0)
                        {
                            foreach (NumbersWithUnitToken m in mts)
                            {
                                m.Units = nn.Units;
                            }
                        }
                    }
                }
                return(res1);
            }
            if (!mt.IsWhitespaceBefore)
            {
                if (mt.BeginToken.Previous == null)
                {
                    return(null);
                }
                if (mt.BeginToken.Previous.IsCharOf(":),") || mt.BeginToken.Previous.IsTableControlChar || mt.BeginToken.Previous.IsValue("IP", null))
                {
                }
                else if (mt.BeginToken.IsHiphen && mt.Units.Count > 0 && !mt.Units[0].IsDoubt)
                {
                }
                else
                {
                    return(null);
                }
            }
            if (mt.Units.Count == 0 && units != null)
            {
                mt.Units = units;
                if (mt.DivNum != null && units.Count > 1 && mt.DivNum.Units.Count == 0)
                {
                    for (int i = 1; i < units.Count; i++)
                    {
                        if (units[i].Pow == -1)
                        {
                            for (int j = i; j < units.Count; j++)
                            {
                                mt.DivNum.Units.Add(units[j]);
                                units[j].Pow = -units[j].Pow;
                            }
                            mt.Units.RemoveRange(i, units.Count - i);
                            break;
                        }
                    }
                }
            }
            if ((minmax < 0) && mt.SingleVal != null)
            {
                mt.FromVal     = mt.SingleVal;
                mt.FromInclude = true;
                mt.SingleVal   = null;
            }
            if (minmax > 0 && mt.SingleVal != null)
            {
                mt.ToVal     = mt.SingleVal;
                mt.ToInclude = true;
                mt.SingleVal = null;
            }
            if (mt.Units.Count == 0)
            {
                units = UnitToken.TryParseList(mt.EndToken.Next, addUnits, true);
                if (units == null)
                {
                    if (canUnitsAbsent)
                    {
                    }
                    else
                    {
                        return(null);
                    }
                }
                else
                {
                    mt.Units = units;
                }
            }
            MeasureToken res = new MeasureToken(t0, mt.EndToken)
            {
                Morph = name.Morph, Internals = internals
            };

            if (((!t0.IsWhitespaceBefore && t0.Previous != null && t0 == name.BeginToken) && t0.Previous.IsHiphen && !t0.Previous.IsWhitespaceBefore) && (t0.Previous.Previous is Pullenti.Ner.TextToken))
            {
                name.BeginToken = (res.BeginToken = name.BeginToken.Previous.Previous);
            }
            res.Name = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(name, (!isSubval ? Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative : Pullenti.Ner.Core.GetTextAttr.No));
            res.Nums = mt;
            foreach (UnitToken u in res.Nums.Units)
            {
                if (u.Keyword != null)
                {
                    if (u.Keyword.BeginChar >= res.BeginChar)
                    {
                        res.Reliable = true;
                    }
                }
            }
            res._parseInternals(addUnits);
            if (res.Internals.Count > 0 || !canBeSet)
            {
                return(res);
            }
            t1 = res.EndToken.Next;
            if (t1 != null && t1.IsCommaAnd)
            {
                t1 = t1.Next;
            }
            List <NumbersWithUnitToken> mts1 = NumbersWithUnitToken.TryParseMulti(t1, addUnits, false, false, false, false);

            if ((mts1 != null && mts1.Count == 1 && (t1.WhitespacesBeforeCount < 3)) && mts1[0].Units.Count > 0 && !UnitToken.CanBeEquals(mts[0].Units, mts1[0].Units))
            {
                res.IsSet = true;
                res.Nums  = null;
                res.Internals.Add(new MeasureToken(mt.BeginToken, mt.EndToken)
                {
                    Nums = mt
                });
                res.Internals.Add(new MeasureToken(mts1[0].BeginToken, mts1[0].EndToken)
                {
                    Nums = mts1[0]
                });
                res.EndToken = mts1[0].EndToken;
            }
            return(res);
        }
Пример #16
0
        public static Pullenti.Ner.ReferentToken TryAttachOrg(Pullenti.Ner.Token t, bool canBeCyr = false)
        {
            if (t == null)
            {
                return(null);
            }
            bool br = false;

            if (t.IsChar('(') && t.Next != null)
            {
                t  = t.Next;
                br = true;
            }
            if (t is Pullenti.Ner.NumberToken)
            {
                if ((t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words && t.Morph.Class.IsAdjective && t.Chars.IsCapitalUpper)
                {
                }
                else
                {
                    return(null);
                }
            }
            else
            {
                if (t.Chars.IsAllLower)
                {
                    return(null);
                }
                if ((t.LengthChar < 3) && !t.Chars.IsLetter)
                {
                    return(null);
                }
                if (!t.Chars.IsLatinLetter)
                {
                    if (!canBeCyr || !t.Chars.IsCyrillicLetter)
                    {
                        return(null);
                    }
                }
            }
            Pullenti.Ner.Token t0 = t;
            Pullenti.Ner.Token t1 = t0;
            int            namWo  = 0;
            OrgItemEngItem tok    = null;

            Pullenti.Ner.Geo.GeoReferent geo    = null;
            OrgItemTypeToken             addTyp = null;

            for (; t != null; t = t.Next)
            {
                if (t != t0 && t.WhitespacesBeforeCount > 1)
                {
                    break;
                }
                if (t.IsChar(')'))
                {
                    break;
                }
                if (t.IsChar('(') && t.Next != null)
                {
                    if ((t.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent) && t.Next.Next != null && t.Next.Next.IsChar(')'))
                    {
                        geo = t.Next.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                        t   = t.Next.Next;
                        continue;
                    }
                    OrgItemTypeToken typ = OrgItemTypeToken.TryAttach(t.Next, true, null);
                    if ((typ != null && typ.EndToken.Next != null && typ.EndToken.Next.IsChar(')')) && typ.Chars.IsLatinLetter)
                    {
                        addTyp = typ;
                        t      = typ.EndToken.Next;
                        continue;
                    }
                    if (((t.Next is Pullenti.Ner.TextToken) && t.Next.Next != null && t.Next.Next.IsChar(')')) && t.Next.Chars.IsCapitalUpper)
                    {
                        t1 = (t = t.Next.Next);
                        continue;
                    }
                    break;
                }
                tok = TryAttach(t, canBeCyr);
                if (tok == null && t.IsCharOf(".,") && t.Next != null)
                {
                    tok = TryAttach(t.Next, canBeCyr);
                    if (tok == null && t.Next.IsCharOf(",."))
                    {
                        tok = TryAttach(t.Next.Next, canBeCyr);
                    }
                }
                if (tok != null)
                {
                    if (tok.LengthChar == 1 && t0.Chars.IsCyrillicLetter)
                    {
                        return(null);
                    }
                    break;
                }
                if (t.IsHiphen && !t.IsWhitespaceAfter && !t.IsWhitespaceBefore)
                {
                    continue;
                }
                if (t.IsCharOf("&+") || t.IsAnd)
                {
                    continue;
                }
                if (t.IsChar('.'))
                {
                    if (t.Previous != null && t.Previous.LengthChar == 1)
                    {
                        continue;
                    }
                    else if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t.Next))
                    {
                        break;
                    }
                }
                if (!t.Chars.IsLatinLetter)
                {
                    if (!canBeCyr || !t.Chars.IsCyrillicLetter)
                    {
                        break;
                    }
                }
                if (t.Chars.IsAllLower)
                {
                    if (t.Morph.Class.IsPreposition || t.Morph.Class.IsConjunction)
                    {
                        continue;
                    }
                    if (br)
                    {
                        continue;
                    }
                    break;
                }
                Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                if (mc.IsVerb)
                {
                    if (t.Next != null && t.Next.Morph.Class.IsPreposition)
                    {
                        break;
                    }
                }
                if (t.Next != null && t.Next.IsValue("OF", null))
                {
                    break;
                }
                if (t is Pullenti.Ner.TextToken)
                {
                    namWo++;
                }
                t1 = t;
            }
            if (tok == null)
            {
                return(null);
            }
            if (t0 == tok.BeginToken)
            {
                Pullenti.Ner.Core.BracketSequenceToken br2 = Pullenti.Ner.Core.BracketHelper.TryParse(tok.EndToken.Next, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                if (br2 != null)
                {
                    Pullenti.Ner.Org.OrganizationReferent org1 = new Pullenti.Ner.Org.OrganizationReferent();
                    if (tok.ShortValue != null)
                    {
                        org1.AddTypeStr(tok.ShortValue);
                    }
                    org1.AddTypeStr(tok.FullValue);
                    string nam1 = Pullenti.Ner.Core.MiscHelper.GetTextValue(br2.BeginToken, br2.EndToken, Pullenti.Ner.Core.GetTextAttr.No);
                    if (nam1 != null)
                    {
                        org1.AddName(nam1, true, null);
                        return(new Pullenti.Ner.ReferentToken(org1, t0, br2.EndToken));
                    }
                }
                return(null);
            }
            Pullenti.Ner.Org.OrganizationReferent org = new Pullenti.Ner.Org.OrganizationReferent();
            Pullenti.Ner.Token te = tok.EndToken;
            if (tok.IsBank)
            {
                t1 = tok.EndToken;
            }
            if (tok.FullValue == "company" && (tok.WhitespacesAfterCount < 3))
            {
                OrgItemEngItem tok1 = TryAttach(tok.EndToken.Next, canBeCyr);
                if (tok1 != null)
                {
                    t1  = tok.EndToken;
                    tok = tok1;
                    te  = tok.EndToken;
                }
            }
            if (tok.FullValue == "company")
            {
                if (namWo == 0)
                {
                    return(null);
                }
            }
            string nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, t1, Pullenti.Ner.Core.GetTextAttr.IgnoreArticles);

            if (nam == "STOCK" && tok.FullValue == "company")
            {
                return(null);
            }
            string altNam = null;

            if (string.IsNullOrEmpty(nam))
            {
                return(null);
            }
            if (nam.IndexOf('(') > 0)
            {
                int i1 = nam.IndexOf('(');
                int i2 = nam.IndexOf(')');
                if (i1 < i2)
                {
                    altNam = nam;
                    string tai = null;
                    if ((i2 + 1) < nam.Length)
                    {
                        tai = nam.Substring(i2).Trim();
                    }
                    nam = nam.Substring(0, i1).Trim();
                    if (tai != null)
                    {
                        nam = string.Format("{0} {1}", nam, tai);
                    }
                }
            }
            if (tok.IsBank)
            {
                org.AddTypeStr((tok.Kit.BaseLanguage.IsEn ? "bank" : "банк"));
                org.AddProfile(Pullenti.Ner.Org.OrgProfile.Finance);
                if ((t1.Next != null && t1.Next.IsValue("OF", null) && t1.Next.Next != null) && t1.Next.Next.Chars.IsLatinLetter)
                {
                    OrgItemNameToken nam0 = OrgItemNameToken.TryAttach(t1.Next, null, false, false);
                    if (nam0 != null)
                    {
                        te = nam0.EndToken;
                    }
                    else
                    {
                        te = t1.Next.Next;
                    }
                    nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, te, Pullenti.Ner.Core.GetTextAttr.No);
                    if (te.GetReferent() is Pullenti.Ner.Geo.GeoReferent)
                    {
                        org.AddGeoObject(te.GetReferent() as Pullenti.Ner.Geo.GeoReferent);
                    }
                }
                else if (t0 == t1)
                {
                    return(null);
                }
            }
            else
            {
                if (tok.ShortValue != null)
                {
                    org.AddTypeStr(tok.ShortValue);
                }
                org.AddTypeStr(tok.FullValue);
            }
            if (string.IsNullOrEmpty(nam))
            {
                return(null);
            }
            org.AddName(nam, true, null);
            if (altNam != null)
            {
                org.AddName(altNam, true, null);
            }
            Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(org, t0, te);
            t = te;
            while (t.Next != null)
            {
                if (t.Next.IsCharOf(",."))
                {
                    t = t.Next;
                }
                else
                {
                    break;
                }
            }
            if (t.WhitespacesAfterCount < 2)
            {
                tok = TryAttach(t.Next, canBeCyr);
                if (tok != null)
                {
                    if (tok.ShortValue != null)
                    {
                        org.AddTypeStr(tok.ShortValue);
                    }
                    org.AddTypeStr(tok.FullValue);
                    res.EndToken = tok.EndToken;
                }
            }
            if (geo != null)
            {
                org.AddGeoObject(geo);
            }
            if (addTyp != null)
            {
                org.AddType(addTyp, false);
            }
            if (!br)
            {
                return(res);
            }
            t = res.EndToken;
            if (t.Next == null || t.Next.IsChar(')'))
            {
                res.EndToken = t.Next;
            }
            else
            {
                return(null);
            }
            return(res);
        }
Пример #17
0
        static WeaponItemToken _TryParse(Pullenti.Ner.Token t, WeaponItemToken prev, bool afterConj, bool attachHigh = false)
        {
            if (t == null)
            {
                return(null);
            }
            if (Pullenti.Ner.Core.BracketHelper.IsBracket(t, true))
            {
                WeaponItemToken wit = _TryParse(t.Next, prev, afterConj, attachHigh);
                if (wit != null)
                {
                    if (wit.EndToken.Next == null)
                    {
                        wit.BeginToken = t;
                        return(wit);
                    }
                    if (Pullenti.Ner.Core.BracketHelper.IsBracket(wit.EndToken.Next, true))
                    {
                        wit.BeginToken = t;
                        wit.EndToken   = wit.EndToken.Next;
                        return(wit);
                    }
                }
            }
            Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
            if (tok != null)
            {
                WeaponItemToken res = new WeaponItemToken(t, tok.EndToken);
                res.Typ = (Typs)tok.Termin.Tag;
                if (res.Typ == Typs.Noun)
                {
                    res.Value = tok.Termin.CanonicText;
                    if (tok.Termin.Tag2 != null)
                    {
                        res.IsDoubt = true;
                    }
                    for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next)
                    {
                        if (tt.WhitespacesBeforeCount > 2)
                        {
                            break;
                        }
                        WeaponItemToken wit = _TryParse(tt, null, false, false);
                        if (wit != null)
                        {
                            if (wit.Typ == Typs.Brand)
                            {
                                res.InnerTokens.Add(wit);
                                res.EndToken = (tt = wit.EndToken);
                                continue;
                            }
                            break;
                        }
                        if (!(tt is Pullenti.Ner.TextToken))
                        {
                            break;
                        }
                        Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary();
                        if (mc == Pullenti.Morph.MorphClass.Adjective)
                        {
                            if (res.AltValue == null)
                            {
                                res.AltValue = res.Value;
                            }
                            if (res.AltValue.EndsWith(res.Value))
                            {
                                res.AltValue = res.AltValue.Substring(0, res.AltValue.Length - res.Value.Length);
                            }
                            res.AltValue = string.Format("{0}{1} {2}", res.AltValue, (tt as Pullenti.Ner.TextToken).Term, res.Value);
                            res.EndToken = tt;
                            continue;
                        }
                        break;
                    }
                    return(res);
                }
                if (res.Typ == Typs.Brand || res.Typ == Typs.Name)
                {
                    res.Value = tok.Termin.CanonicText;
                    return(res);
                }
                if (res.Typ == Typs.Model)
                {
                    res.Value = tok.Termin.CanonicText;
                    if (tok.Termin.Tag2 is List <Pullenti.Ner.Core.Termin> )
                    {
                        List <Pullenti.Ner.Core.Termin> li = tok.Termin.Tag2 as List <Pullenti.Ner.Core.Termin>;
                        foreach (Pullenti.Ner.Core.Termin to in li)
                        {
                            WeaponItemToken wit = new WeaponItemToken(t, tok.EndToken)
                            {
                                Typ = (Typs)to.Tag, Value = to.CanonicText, IsInternal = tok.BeginToken == tok.EndToken
                            };
                            res.InnerTokens.Add(wit);
                            if (to.AdditionalVars != null && to.AdditionalVars.Count > 0)
                            {
                                wit.AltValue = to.AdditionalVars[0].CanonicText;
                            }
                        }
                    }
                    res._correctModel();
                    return(res);
                }
            }
            Pullenti.Ner.Token nnn = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(t);
            if (nnn != null)
            {
                Pullenti.Ner.Transport.Internal.TransItemToken tit = Pullenti.Ner.Transport.Internal.TransItemToken._attachNumber(nnn, true);
                if (tit != null)
                {
                    WeaponItemToken res = new WeaponItemToken(t, tit.EndToken)
                    {
                        Typ = Typs.Number
                    };
                    res.Value    = tit.Value;
                    res.AltValue = tit.AltValue;
                    return(res);
                }
            }
            if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && t.Chars.IsAllUpper) && (t.LengthChar < 4))
            {
                if ((t.Next != null && ((t.Next.IsHiphen || t.Next.IsChar('.'))) && (t.Next.WhitespacesAfterCount < 2)) && (t.Next.Next is Pullenti.Ner.NumberToken))
                {
                    WeaponItemToken res = new WeaponItemToken(t, t.Next)
                    {
                        Typ = Typs.Model, IsDoubt = true
                    };
                    res.Value = (t as Pullenti.Ner.TextToken).Term;
                    res._correctModel();
                    return(res);
                }
                if ((t.Next is Pullenti.Ner.NumberToken) && !t.IsWhitespaceAfter)
                {
                    WeaponItemToken res = new WeaponItemToken(t, t)
                    {
                        Typ = Typs.Model, IsDoubt = true
                    };
                    res.Value = (t as Pullenti.Ner.TextToken).Term;
                    res._correctModel();
                    return(res);
                }
                if ((t as Pullenti.Ner.TextToken).Term == "СП" && (t.WhitespacesAfterCount < 3) && (t.Next is Pullenti.Ner.TextToken))
                {
                    WeaponItemToken pp = _TryParse(t.Next, null, false, false);
                    if (pp != null && ((pp.Typ == Typs.Model || pp.Typ == Typs.Brand)))
                    {
                        WeaponItemToken res = new WeaponItemToken(t, t)
                        {
                            Typ = Typs.Noun
                        };
                        res.Value    = "ПИСТОЛЕТ";
                        res.AltValue = "СЛУЖЕБНЫЙ ПИСТОЛЕТ";
                        return(res);
                    }
                }
            }
            if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && !t.Chars.IsAllLower) && t.LengthChar > 2)
            {
                bool ok = false;
                if (prev != null && ((prev.Typ == Typs.Noun || prev.Typ == Typs.Model || prev.Typ == Typs.Brand)))
                {
                    ok = true;
                }
                else if (prev == null && t.Previous != null && t.Previous.IsCommaAnd)
                {
                    ok = true;
                }
                if (ok)
                {
                    WeaponItemToken res = new WeaponItemToken(t, t)
                    {
                        Typ = Typs.Name, IsDoubt = true
                    };
                    res.Value = (t as Pullenti.Ner.TextToken).Term;
                    if ((t.Next != null && t.Next.IsHiphen && (t.Next.Next is Pullenti.Ner.TextToken)) && t.Next.Next.Chars == t.Chars)
                    {
                        res.Value    = string.Format("{0}-{1}", res.Value, (t.Next.Next as Pullenti.Ner.TextToken).Term);
                        res.EndToken = t.Next.Next;
                    }
                    if (prev != null && prev.Typ == Typs.Noun)
                    {
                        res.Typ = Typs.Brand;
                    }
                    if (res.EndToken.Next != null && res.EndToken.Next.IsHiphen && (res.EndToken.Next.Next is Pullenti.Ner.NumberToken))
                    {
                        res.Typ = Typs.Model;
                        res._correctModel();
                    }
                    else if (!res.EndToken.IsWhitespaceAfter && (res.EndToken.Next is Pullenti.Ner.NumberToken))
                    {
                        res.Typ = Typs.Model;
                        res._correctModel();
                    }
                    return(res);
                }
            }
            if (t.IsValue("МАРКА", null))
            {
                WeaponItemToken res = _TryParse(t.Next, prev, afterConj, false);
                if (res != null && res.Typ == Typs.Brand)
                {
                    res.BeginToken = t;
                    return(res);
                }
                if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Next, true, false))
                {
                    Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t.Next, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                    if (br != null)
                    {
                        return new WeaponItemToken(t, br.EndToken)
                               {
                                   Typ = Typs.Brand, Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(br.BeginToken, br.EndToken, Pullenti.Ner.Core.GetTextAttr.No)
                               }
                    }
                    ;
                }
                if (((t is Pullenti.Ner.TextToken) && (t.Next is Pullenti.Ner.TextToken) && t.Next.LengthChar > 1) && !t.Next.Chars.IsAllLower)
                {
                    return new WeaponItemToken(t, t.Next)
                           {
                               Typ = Typs.Brand, Value = (t as Pullenti.Ner.TextToken).Term
                           }
                }
                ;
            }
            if (t.IsValue("КАЛИБР", "КАЛІБР"))
            {
                Pullenti.Ner.Token tt1 = t.Next;
                if (tt1 != null && ((tt1.IsHiphen || tt1.IsChar(':'))))
                {
                    tt1 = tt1.Next;
                }
                Pullenti.Ner.Measure.Internal.NumbersWithUnitToken num = Pullenti.Ner.Measure.Internal.NumbersWithUnitToken.TryParse(tt1, null, false, false, false, false);
                if (num != null && num.SingleVal != null)
                {
                    return new WeaponItemToken(t, num.EndToken)
                           {
                               Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value)
                           }
                }
                ;
            }
            if (t is Pullenti.Ner.NumberToken)
            {
                Pullenti.Ner.Measure.Internal.NumbersWithUnitToken num = Pullenti.Ner.Measure.Internal.NumbersWithUnitToken.TryParse(t, null, false, false, false, false);
                if (num != null && num.SingleVal != null)
                {
                    if (num.Units.Count == 1 && num.Units[0].Unit != null && num.Units[0].Unit.NameCyr == "мм")
                    {
                        return new WeaponItemToken(t, num.EndToken)
                               {
                                   Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value)
                               }
                    }
                    ;

                    if (num.EndToken.Next != null && num.EndToken.Next.IsValue("КАЛИБР", "КАЛІБР"))
                    {
                        return new WeaponItemToken(t, num.EndToken.Next)
                               {
                                   Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value)
                               }
                    }
                    ;
                }
            }
            if (t.IsValue("ПРОИЗВОДСТВО", "ВИРОБНИЦТВО"))
            {
                Pullenti.Ner.Token tt1 = t.Next;
                if (tt1 != null && ((tt1.IsHiphen || tt1.IsChar(':'))))
                {
                    tt1 = tt1.Next;
                }
                if (tt1 is Pullenti.Ner.ReferentToken)
                {
                    if ((tt1.GetReferent() is Pullenti.Ner.Org.OrganizationReferent) || (tt1.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
                    {
                        return new WeaponItemToken(t, tt1)
                               {
                                   Typ = Typs.Developer, Ref = tt1.GetReferent()
                               }
                    }
                    ;
                }
            }
            return(null);
        }

        void _correctModel()
        {
            Pullenti.Ner.Token tt = EndToken.Next;
            if (tt == null || tt.WhitespacesBeforeCount > 2)
            {
                return;
            }
            if (tt.IsValue(":\\/.", null) || tt.IsHiphen)
            {
                tt = tt.Next;
            }
            if (tt is Pullenti.Ner.NumberToken)
            {
                StringBuilder tmp = new StringBuilder();
                tmp.Append((tt as Pullenti.Ner.NumberToken).Value);
                bool isLat = Pullenti.Morph.LanguageHelper.IsLatinChar(Value[0]);
                EndToken = tt;
                for (tt = tt.Next; tt != null; tt = tt.Next)
                {
                    if ((tt is Pullenti.Ner.TextToken) && tt.LengthChar == 1 && tt.Chars.IsLetter)
                    {
                        if (!tt.IsWhitespaceBefore || ((tt.Previous != null && tt.Previous.IsHiphen)))
                        {
                            char ch = (tt as Pullenti.Ner.TextToken).Term[0];
                            EndToken = tt;
                            char ch2 = (char)0;
                            if (Pullenti.Morph.LanguageHelper.IsLatinChar(ch) && !isLat)
                            {
                                ch2 = Pullenti.Morph.LanguageHelper.GetCyrForLat(ch);
                                if (ch2 != ((char)0))
                                {
                                    ch = ch2;
                                }
                            }
                            else if (Pullenti.Morph.LanguageHelper.IsCyrillicChar(ch) && isLat)
                            {
                                ch2 = Pullenti.Morph.LanguageHelper.GetLatForCyr(ch);
                                if (ch2 != ((char)0))
                                {
                                    ch = ch2;
                                }
                            }
                            tmp.Append(ch);
                            continue;
                        }
                    }
                    break;
                }
                Value    = string.Format("{0}-{1}", Value, tmp.ToString());
                AltValue = Pullenti.Ner.Core.MiscHelper.CreateCyrLatAlternative(Value);
            }
            if (!EndToken.IsWhitespaceAfter && EndToken.Next != null && ((EndToken.Next.IsHiphen || EndToken.Next.IsCharOf("\\/"))))
            {
                if (!EndToken.Next.IsWhitespaceAfter && (EndToken.Next.Next is Pullenti.Ner.NumberToken))
                {
                    EndToken = EndToken.Next.Next;
                    Value    = string.Format("{0}-{1}", Value, (EndToken as Pullenti.Ner.NumberToken).Value);
                    if (AltValue != null)
                    {
                        AltValue = string.Format("{0}-{1}", AltValue, (EndToken as Pullenti.Ner.NumberToken).Value);
                    }
                }
            }
        }
Пример #18
0
 void CorrectWordsByMerging(Pullenti.Morph.MorphLang lang)
 {
     for (Pullenti.Ner.Token t = FirstToken; t != null && t.Next != null; t = t.Next)
     {
         if (!t.Chars.IsLetter || (t.LengthChar < 2))
         {
             continue;
         }
         Pullenti.Morph.MorphClass mc0 = t.GetMorphClassInDictionary();
         if (t.Morph.ContainsAttr("прдктв.", null))
         {
             continue;
         }
         Pullenti.Ner.Token t1 = t.Next;
         if (t1.IsHiphen && t1.Next != null && !t1.IsNewlineAfter)
         {
             t1 = t1.Next;
         }
         if (t1.LengthChar == 1)
         {
             continue;
         }
         if (!t1.Chars.IsLetter || !t.Chars.IsLetter || t1.Chars.IsLatinLetter != t.Chars.IsLatinLetter)
         {
             continue;
         }
         if (t1.Chars.IsAllUpper && !t.Chars.IsAllUpper)
         {
             continue;
         }
         else if (!t1.Chars.IsAllLower)
         {
             continue;
         }
         else if (t.Chars.IsAllUpper)
         {
             continue;
         }
         if (t1.Morph.ContainsAttr("прдктв.", null))
         {
             continue;
         }
         Pullenti.Morph.MorphClass mc1 = t1.GetMorphClassInDictionary();
         if (!mc1.IsUndefined && !mc0.IsUndefined)
         {
             continue;
         }
         if (((t as Pullenti.Ner.TextToken).Term.Length + (t1 as Pullenti.Ner.TextToken).Term.Length) < 6)
         {
             continue;
         }
         string corw = (t as Pullenti.Ner.TextToken).Term + (t1 as Pullenti.Ner.TextToken).Term;
         List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null);
         if (ccc == null || ccc.Count != 1)
         {
             continue;
         }
         if (corw == "ПОСТ" || corw == "ВРЕД")
         {
             continue;
         }
         Pullenti.Ner.TextToken tt = new Pullenti.Ner.TextToken(ccc[0], this, t.BeginChar, t1.EndChar);
         if (tt.GetMorphClassInDictionary().IsUndefined)
         {
             continue;
         }
         tt.Chars = t.Chars;
         if (t == FirstToken)
         {
             FirstToken = tt;
         }
         else
         {
             t.Previous.Next = tt;
         }
         if (t1.Next != null)
         {
             tt.Next = t1.Next;
         }
         t = tt;
     }
 }
Пример #19
0
        public override string GetNormalCaseText(Pullenti.Morph.MorphClass mc = null, Pullenti.Morph.MorphNumber num = Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender gender = Pullenti.Morph.MorphGender.Undefined, bool keepChars = false)
        {
            StringBuilder res = new StringBuilder();

            if (gender == Pullenti.Morph.MorphGender.Undefined)
            {
                gender = Morph.Gender;
            }
            if (Adverbs != null && Adverbs.Count > 0)
            {
                int i = 0;
                if (Adjectives.Count > 0)
                {
                    for (int j = 0; j < Adjectives.Count; j++)
                    {
                        for (; i < Adverbs.Count; i++)
                        {
                            if (Adverbs[i].BeginChar < Adjectives[j].BeginChar)
                            {
                                res.AppendFormat("{0} ", Adverbs[i].GetNormalCaseText(Pullenti.Morph.MorphClass.Adverb, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false));
                            }
                            else
                            {
                                break;
                            }
                        }
                        string s = Adjectives[j].GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Pronoun, num, gender, keepChars);
                        res.AppendFormat("{0} ", s ?? "?");
                    }
                }
                for (; i < Adverbs.Count; i++)
                {
                    res.AppendFormat("{0} ", Adverbs[i].GetNormalCaseText(Pullenti.Morph.MorphClass.Adverb, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false));
                }
            }
            else
            {
                foreach (Pullenti.Ner.MetaToken t in Adjectives)
                {
                    string s = t.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Pronoun, num, gender, keepChars);
                    res.AppendFormat("{0} ", s ?? "?");
                }
            }
            string r = null;

            if ((Noun.BeginToken is Pullenti.Ner.ReferentToken) && Noun.BeginToken == Noun.EndToken)
            {
                r = Noun.BeginToken.GetNormalCaseText(null, num, gender, keepChars);
            }
            else
            {
                Pullenti.Morph.MorphClass cas = Pullenti.Morph.MorphClass.Noun | Pullenti.Morph.MorphClass.Pronoun;
                if (mc != null && !mc.IsUndefined)
                {
                    cas = mc;
                }
                r = Noun.GetNormalCaseText(cas, num, gender, keepChars);
            }
            if (r == null || r == "?")
            {
                r = Noun.GetNormalCaseText(null, num, Pullenti.Morph.MorphGender.Undefined, false);
            }
            res.Append(r ?? Noun.ToString());
            return(res.ToString());
        }
Пример #20
0
            public static List <PersonItemToken> TryAttach(Pullenti.Ner.Token t)
            {
                List <PersonItemToken> res = new List <PersonItemToken>();

                for (; t != null; t = t.Next)
                {
                    if (t.IsNewlineBefore && res.Count > 0)
                    {
                        break;
                    }
                    Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                    if (tt == null)
                    {
                        break;
                    }
                    string s = tt.Term;
                    if (!char.IsLetter(s[0]))
                    {
                        break;
                    }
                    if (((s.Length == 1 || s == "ДЖ")) && !tt.Chars.IsAllLower)
                    {
                        Pullenti.Ner.Token t1 = t;
                        if (t1.Next != null && t1.Next.IsChar('.'))
                        {
                            t1 = t1.Next;
                        }
                        res.Add(new PersonItemToken(t, t1)
                        {
                            Typ = Pullenti.Ner.Org.Internal.OrgItemEponymToken.PersonItemType.Initial, Value = s
                        });
                        t = t1;
                        continue;
                    }
                    if (tt.IsAnd)
                    {
                        res.Add(new PersonItemToken(t, t)
                        {
                            Typ = Pullenti.Ner.Org.Internal.OrgItemEponymToken.PersonItemType.And
                        });
                        continue;
                    }
                    if (tt.Morph.Class.IsPronoun || tt.Morph.Class.IsPersonalPronoun)
                    {
                        break;
                    }
                    if (tt.Chars.IsAllLower)
                    {
                        Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary();
                        if (mc.IsPreposition || mc.IsVerb || mc.IsAdverb)
                        {
                            break;
                        }
                        Pullenti.Ner.Token t1 = t;
                        if (t1.Next != null && !t1.IsWhitespaceAfter && t1.Next.IsChar('.'))
                        {
                            t1 = t1.Next;
                        }
                        res.Add(new PersonItemToken(t, t1)
                        {
                            Typ = Pullenti.Ner.Org.Internal.OrgItemEponymToken.PersonItemType.LocaseWord, Value = s
                        });
                        t = t1;
                        continue;
                    }
                    if (tt.Morph.Class.IsProperName)
                    {
                        res.Add(new PersonItemToken(t, t)
                        {
                            Typ = Pullenti.Ner.Org.Internal.OrgItemEponymToken.PersonItemType.Name, Value = s
                        });
                    }
                    else if ((t.Next != null && t.Next.IsHiphen && (t.Next.Next is Pullenti.Ner.TextToken)) && !t.Next.IsWhitespaceAfter)
                    {
                        res.Add(new PersonItemToken(t, t.Next.Next)
                        {
                            Typ = Pullenti.Ner.Org.Internal.OrgItemEponymToken.PersonItemType.Surname, Value = string.Format("{0}-{1}", s, (t.Next.Next as Pullenti.Ner.TextToken).Term)
                        });
                        t = t.Next.Next;
                    }
                    else
                    {
                        res.Add(new PersonItemToken(t, t)
                        {
                            Typ = Pullenti.Ner.Org.Internal.OrgItemEponymToken.PersonItemType.Surname, Value = s
                        });
                    }
                }
                return(res.Count > 0 ? res : null);
            }
Пример #21
0
        void _recalc()
        {
            m_NeedRecalc = false;
            if (m_Items == null || m_Items.Count == 0)
            {
                return;
            }
            m_Class  = new Pullenti.Morph.MorphClass();
            m_Gender = Pullenti.Morph.MorphGender.Undefined;
            bool g = m_Gender == Pullenti.Morph.MorphGender.Undefined;

            m_Number = Pullenti.Morph.MorphNumber.Undefined;
            bool n = m_Number == Pullenti.Morph.MorphNumber.Undefined;

            m_Case = new Pullenti.Morph.MorphCase();
            bool ca = m_Case.IsUndefined;
            bool la = m_Language == null || m_Language.IsUndefined;

            m_Voice = Pullenti.Morph.MorphVoice.Undefined;
            bool verbHasUndef = false;

            if (m_Items != null)
            {
                foreach (Pullenti.Morph.MorphBaseInfo it in m_Items)
                {
                    m_Class.Value |= it.Class.Value;
                    if (g)
                    {
                        m_Gender |= it.Gender;
                    }
                    if (ca)
                    {
                        m_Case |= it.Case;
                    }
                    if (n)
                    {
                        m_Number |= it.Number;
                    }
                    if (la)
                    {
                        m_Language.Value |= it.Language.Value;
                    }
                    if (it.Class.IsVerb)
                    {
                        if (it is Pullenti.Morph.MorphWordForm)
                        {
                            Pullenti.Morph.MorphVoice v = (it as Pullenti.Morph.MorphWordForm).Misc.Voice;
                            if (v == Pullenti.Morph.MorphVoice.Undefined)
                            {
                                verbHasUndef = true;
                            }
                            else
                            {
                                m_Voice |= v;
                            }
                        }
                    }
                }
            }
            if (verbHasUndef)
            {
                m_Voice = Pullenti.Morph.MorphVoice.Undefined;
            }
        }
Пример #22
0
 /// <summary>
 /// Получить связанный с токеном текст в именительном падеже
 /// </summary>
 /// <param name="mc">желательная часть речи</param>
 /// <param name="num">желательное число</param>
 /// <param name="gender">желательный пол</param>
 /// <param name="keepChars">сохранять регистр символов (по умолчанию, всё в верхний)</param>
 /// <return>строка текста</return>
 public virtual string GetNormalCaseText(Pullenti.Morph.MorphClass mc = null, Pullenti.Morph.MorphNumber num = Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender gender = Pullenti.Morph.MorphGender.Undefined, bool keepChars = false)
 {
     return(this.ToString());
 }
Пример #23
0
 void CorrectWordsByMorph(Pullenti.Morph.MorphLang lang)
 {
     for (Pullenti.Ner.Token tt = FirstToken; tt != null; tt = tt.Next)
     {
         if (!(tt is Pullenti.Ner.TextToken))
         {
             continue;
         }
         if (tt.Morph.ContainsAttr("прдктв.", null))
         {
             continue;
         }
         Pullenti.Morph.MorphClass dd = tt.GetMorphClassInDictionary();
         if (!dd.IsUndefined || (tt.LengthChar < 4))
         {
             continue;
         }
         if (tt.Morph.Class.IsProperSurname && !tt.Chars.IsAllLower)
         {
             continue;
         }
         if (tt.Chars.IsAllUpper)
         {
             continue;
         }
         string corw = Pullenti.Morph.MorphologyService.CorrectWord((tt as Pullenti.Ner.TextToken).Term, (tt.Morph.Language.IsUndefined ? lang : tt.Morph.Language));
         if (corw == null)
         {
             continue;
         }
         List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null);
         if (ccc == null || ccc.Count != 1)
         {
             continue;
         }
         Pullenti.Ner.TextToken tt1 = new Pullenti.Ner.TextToken(ccc[0], this, tt.BeginChar, tt.EndChar)
         {
             Chars = tt.Chars, Term0 = (tt as Pullenti.Ner.TextToken).Term
         };
         Pullenti.Morph.MorphClass mc = tt1.GetMorphClassInDictionary();
         if (mc.IsProperSurname)
         {
             continue;
         }
         if (tt == FirstToken)
         {
             FirstToken = tt1;
         }
         else
         {
             tt.Previous.Next = tt1;
         }
         tt1.Next = tt.Next;
         tt       = tt1;
         if (CorrectedTokens == null)
         {
             CorrectedTokens = new Dictionary <Pullenti.Ner.Token, string>();
         }
         CorrectedTokens.Add(tt, tt.GetSourceText());
     }
 }
Пример #24
0
 /// <summary>
 /// Удалить элементы, не соответствующие заданным параметрам
 /// </summary>
 public void RemoveItemsListCla(ICollection <Pullenti.Morph.MorphBaseInfo> bis, Pullenti.Morph.MorphClass cla)
 {
     if (m_Items == null)
     {
         return;
     }
     for (int i = m_Items.Count - 1; i >= 0; i--)
     {
         if (cla != null && !cla.IsUndefined)
         {
             if (((m_Items[i].Class.Value & cla.Value)) == 0)
             {
                 if (((m_Items[i].Class.IsProper || m_Items[i].Class.IsNoun)) && ((cla.IsProper || cla.IsNoun)))
                 {
                 }
                 else
                 {
                     m_Items.RemoveAt(i);
                     m_NeedRecalc = true;
                     continue;
                 }
             }
         }
         bool ok = false;
         foreach (Pullenti.Morph.MorphBaseInfo it in bis)
         {
             if (!it.Case.IsUndefined && !m_Items[i].Case.IsUndefined)
             {
                 if (((m_Items[i].Case & it.Case)).IsUndefined)
                 {
                     continue;
                 }
             }
             if (it.Gender != Pullenti.Morph.MorphGender.Undefined && m_Items[i].Gender != Pullenti.Morph.MorphGender.Undefined)
             {
                 if (((it.Gender & m_Items[i].Gender)) == Pullenti.Morph.MorphGender.Undefined)
                 {
                     continue;
                 }
             }
             if (it.Number != Pullenti.Morph.MorphNumber.Undefined && m_Items[i].Number != Pullenti.Morph.MorphNumber.Undefined)
             {
                 if (((it.Number & m_Items[i].Number)) == Pullenti.Morph.MorphNumber.Undefined)
                 {
                     continue;
                 }
             }
             ok = true;
             break;
         }
         if (!ok)
         {
             m_Items.RemoveAt(i);
             m_NeedRecalc = true;
         }
     }
 }
Пример #25
0
        public static BlockLine Create(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection names)
        {
            if (t == null)
            {
                return(null);
            }
            BlockLine res = new BlockLine(t, t);

            for (Pullenti.Ner.Token tt = t; tt != null; tt = tt.Next)
            {
                if (tt != t && tt.IsNewlineBefore)
                {
                    break;
                }
                else
                {
                    res.EndToken = tt;
                }
            }
            int nums = 0;

            while (t != null && t.Next != null && t.EndChar <= res.EndChar)
            {
                if (t is Pullenti.Ner.NumberToken)
                {
                }
                else
                {
                    Pullenti.Ner.NumberToken rom = Pullenti.Ner.Core.NumberHelper.TryParseRoman(t);
                    if (rom != null && rom.EndToken.Next != null)
                    {
                        t = rom.EndToken;
                    }
                    else
                    {
                        break;
                    }
                }
                if (t.Next.IsChar('.'))
                {
                }
                else if ((t.Next is Pullenti.Ner.TextToken) && !t.Next.Chars.IsAllLower)
                {
                }
                else
                {
                    break;
                }
                res.NumberEnd = t;
                t             = t.Next;
                if (t.IsChar('.') && t.Next != null)
                {
                    res.NumberEnd = t;
                    t             = t.Next;
                }
                if (t.IsNewlineBefore)
                {
                    return(res);
                }
                nums++;
            }
            Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
            if (tok == null)
            {
                Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt1 != null && npt1.EndToken != npt1.BeginToken)
                {
                    tok = m_Ontology.TryParse(npt1.Noun.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No);
                }
            }
            if (tok != null)
            {
                if (t.Previous != null && t.Previous.IsChar(':'))
                {
                    tok = null;
                }
            }
            if (tok != null)
            {
                BlkTyps typ = (BlkTyps)tok.Termin.Tag;
                if (typ == BlkTyps.Conslusion)
                {
                    if (t.IsNewlineAfter)
                    {
                    }
                    else if (t.Next != null && t.Next.Morph.Class.IsPreposition && t.Next.Next != null)
                    {
                        Pullenti.Ner.Core.TerminToken tok2 = m_Ontology.TryParse(t.Next.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                        if (tok2 != null && ((BlkTyps)tok2.Termin.Tag) == BlkTyps.Chapter)
                        {
                        }
                        else
                        {
                            tok = null;
                        }
                    }
                    else
                    {
                        tok = null;
                    }
                }
                if (t.Kit.BaseLanguage != t.Morph.Language)
                {
                    tok = null;
                }
                if (typ == BlkTyps.Index && !t.IsValue("ОГЛАВЛЕНИЕ", null))
                {
                    if (!t.IsNewlineAfter && t.Next != null)
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                        if (npt != null && npt.IsNewlineAfter && npt.Morph.Case.IsGenitive)
                        {
                            tok = null;
                        }
                        else if (npt == null)
                        {
                            tok = null;
                        }
                    }
                }
                if ((typ == BlkTyps.Intro && tok != null && !tok.IsNewlineAfter) && t.IsValue("ВВЕДЕНИЕ", null))
                {
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null && npt.Morph.Case.IsGenitive)
                    {
                        tok = null;
                    }
                }
                if (tok != null)
                {
                    if (res.NumberEnd == null)
                    {
                        res.NumberEnd = tok.EndToken;
                        if (res.NumberEnd.EndChar > res.EndChar)
                        {
                            res.EndToken = res.NumberEnd;
                        }
                    }
                    res.Typ = typ;
                    t       = tok.EndToken;
                    if (t.Next != null && t.Next.IsCharOf(":."))
                    {
                        t            = t.Next;
                        res.EndToken = t;
                    }
                    if (t.IsNewlineAfter || t.Next == null)
                    {
                        return(res);
                    }
                    t = t.Next;
                }
            }
            if (t.IsChar('§') && (t.Next is Pullenti.Ner.NumberToken))
            {
                res.Typ       = BlkTyps.Chapter;
                res.NumberEnd = t;
                t             = t.Next;
            }
            if (names != null)
            {
                Pullenti.Ner.Core.TerminToken tok2 = names.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok2 != null && tok2.EndToken.IsNewlineAfter)
                {
                    res.EndToken    = tok2.EndToken;
                    res.IsExistName = true;
                    if (res.Typ == BlkTyps.Undefined)
                    {
                        BlockLine li2 = Create((res.NumberEnd == null ? null : res.NumberEnd.Next), null);
                        if (li2 != null && ((li2.Typ == BlkTyps.Literature || li2.Typ == BlkTyps.Intro || li2.Typ == BlkTyps.Conslusion)))
                        {
                            res.Typ = li2.Typ;
                        }
                        else
                        {
                            res.Typ = BlkTyps.Chapter;
                        }
                    }
                    return(res);
                }
            }
            Pullenti.Ner.Token t1 = res.EndToken;
            if ((((t1 is Pullenti.Ner.NumberToken) || t1.IsChar('.'))) && t1.Previous != null)
            {
                t1 = t1.Previous;
                if (t1.IsChar('.'))
                {
                    res.HasContentItemTail = true;
                    for (; t1 != null && t1.BeginChar > res.BeginChar; t1 = t1.Previous)
                    {
                        if (!t1.IsChar('.'))
                        {
                            break;
                        }
                    }
                }
            }
            res.IsAllUpper = true;
            for (; t != null && t.EndChar <= t1.EndChar; t = t.Next)
            {
                if (!(t is Pullenti.Ner.TextToken) || !t.Chars.IsLetter)
                {
                    res.NotWords++;
                }
                else
                {
                    Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                    if (mc.IsUndefined)
                    {
                        res.NotWords++;
                    }
                    else if (t.LengthChar > 2)
                    {
                        res.Words++;
                    }
                    if (!t.Chars.IsAllUpper)
                    {
                        res.IsAllUpper = false;
                    }
                    if ((t as Pullenti.Ner.TextToken).IsPureVerb)
                    {
                        if (!(t as Pullenti.Ner.TextToken).Term.EndsWith("ING"))
                        {
                            res.HasVerb = true;
                        }
                    }
                }
            }
            if (res.Typ == BlkTyps.Undefined)
            {
                Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse((res.NumberEnd == null ? res.BeginToken : res.NumberEnd.Next), Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    if (npt.Noun.IsValue("ХАРАКТЕРИСТИКА", null) || npt.Noun.IsValue("СОДЕРЖАНИЕ", "ЗМІСТ"))
                    {
                        bool ok = true;
                        for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next)
                        {
                            if (tt.IsChar('.'))
                            {
                                continue;
                            }
                            Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                            if (npt2 == null || !npt2.Morph.Case.IsGenitive)
                            {
                                ok = false;
                                break;
                            }
                            tt = npt2.EndToken;
                            if (tt.EndChar > res.EndChar)
                            {
                                res.EndToken = tt;
                                if (!tt.IsNewlineAfter)
                                {
                                    for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next)
                                    {
                                        if (res.EndToken.IsNewlineAfter)
                                        {
                                            break;
                                        }
                                    }
                                }
                            }
                        }
                        if (ok)
                        {
                            res.Typ         = BlkTyps.Intro;
                            res.IsExistName = true;
                        }
                    }
                    else if (npt.Noun.IsValue("ВЫВОД", "ВИСНОВОК") || npt.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ"))
                    {
                        bool ok = true;
                        for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next)
                        {
                            if (tt.IsCharOf(",.") || tt.IsAnd)
                            {
                                continue;
                            }
                            Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                            if (npt1 != null)
                            {
                                if (npt1.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ") || npt1.Noun.IsValue("РЕКОМЕНДАЦИЯ", "РЕКОМЕНДАЦІЯ") || npt1.Noun.IsValue("ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ"))
                                {
                                    tt = npt1.EndToken;
                                    if (tt.EndChar > res.EndChar)
                                    {
                                        res.EndToken = tt;
                                        if (!tt.IsNewlineAfter)
                                        {
                                            for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next)
                                            {
                                                if (res.EndToken.IsNewlineAfter)
                                                {
                                                    break;
                                                }
                                            }
                                        }
                                    }
                                    continue;
                                }
                            }
                            ok = false;
                            break;
                        }
                        if (ok)
                        {
                            res.Typ         = BlkTyps.Conslusion;
                            res.IsExistName = true;
                        }
                    }
                    if (res.Typ == BlkTyps.Undefined && npt != null && npt.EndChar <= res.EndChar)
                    {
                        bool ok   = false;
                        int  publ = 0;
                        if (_isPub(npt))
                        {
                            ok   = true;
                            publ = 1;
                        }
                        else if ((npt.Noun.IsValue("СПИСОК", null) || npt.Noun.IsValue("УКАЗАТЕЛЬ", "ПОКАЖЧИК") || npt.Noun.IsValue("ПОЛОЖЕНИЕ", "ПОЛОЖЕННЯ")) || npt.Noun.IsValue("ВЫВОД", "ВИСНОВОК") || npt.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ"))
                        {
                            if (npt.EndChar == res.EndChar)
                            {
                                return(null);
                            }
                            ok = true;
                        }
                        if (ok)
                        {
                            if (npt.BeginToken == npt.EndToken && npt.Noun.IsValue("СПИСОК", null) && npt.EndChar == res.EndChar)
                            {
                                ok = false;
                            }
                            for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next)
                            {
                                if (tt.IsCharOf(",.:") || tt.IsAnd || tt.Morph.Class.IsPreposition)
                                {
                                    continue;
                                }
                                if (tt.IsValue("ОТРАЖЕНЫ", "ВІДОБРАЖЕНІ"))
                                {
                                    continue;
                                }
                                npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                                if (npt == null)
                                {
                                    ok = false;
                                    break;
                                }
                                if (((_isPub(npt) || npt.Noun.IsValue("РАБОТА", "РОБОТА") || npt.Noun.IsValue("ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ")) || npt.Noun.IsValue("АВТОР", null) || npt.Noun.IsValue("ТРУД", "ПРАЦЯ")) || npt.Noun.IsValue("ТЕМА", null) || npt.Noun.IsValue("ДИССЕРТАЦИЯ", "ДИСЕРТАЦІЯ"))
                                {
                                    tt = npt.EndToken;
                                    if (_isPub(npt))
                                    {
                                        publ++;
                                    }
                                    if (tt.EndChar > res.EndChar)
                                    {
                                        res.EndToken = tt;
                                        if (!tt.IsNewlineAfter)
                                        {
                                            for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next)
                                            {
                                                if (res.EndToken.IsNewlineAfter)
                                                {
                                                    break;
                                                }
                                            }
                                        }
                                    }
                                    continue;
                                }
                                ok = false;
                                break;
                            }
                            if (ok)
                            {
                                res.Typ         = BlkTyps.Literature;
                                res.IsExistName = true;
                                if (publ == 0 && (res.EndChar < (((res.Kit.Sofa.Text.Length * 2) / 3))))
                                {
                                    if (res.NumberEnd != null)
                                    {
                                        res.Typ = BlkTyps.Misc;
                                    }
                                    else
                                    {
                                        res.Typ = BlkTyps.Undefined;
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(res);
        }
Пример #26
0
 /// <summary>
 /// Удалить элементы, не соответствующие другой морфологической коллекции
 /// </summary>
 public void RemoveItemsEx(MorphCollection col, Pullenti.Morph.MorphClass cla)
 {
     this.RemoveItemsListCla(col.Items, cla);
 }
Пример #27
0
        public static OrgItemEponymToken TryAttach(Pullenti.Ner.Token t, bool mustHasPrefix = false)
        {
            Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
            if (tt == null)
            {
                if (t == null)
                {
                    return(null);
                }
                Pullenti.Ner.Referent r1 = t.GetReferent();
                if (r1 != null && r1.TypeName == "DATE")
                {
                    string str = r1.ToString().ToUpper();
                    if ((str == "1 МАЯ" || str == "7 ОКТЯБРЯ" || str == "9 МАЯ") || str == "8 МАРТА")
                    {
                        OrgItemEponymToken dt = new OrgItemEponymToken(t, t)
                        {
                            Eponyms = new List <string>()
                        };
                        dt.Eponyms.Add(str);
                        return(dt);
                    }
                }
                Pullenti.Ner.NumberToken age = Pullenti.Ner.Core.NumberHelper.TryParseAge(t);
                if ((age != null && (((age.EndToken.Next is Pullenti.Ner.TextToken) || (age.EndToken.Next is Pullenti.Ner.ReferentToken))) && (age.WhitespacesAfterCount < 3)) && !age.EndToken.Next.Chars.IsAllLower && age.EndToken.Next.Chars.IsCyrillicLetter)
                {
                    OrgItemEponymToken dt = new OrgItemEponymToken(t, age.EndToken.Next)
                    {
                        Eponyms = new List <string>()
                    };
                    dt.Eponyms.Add(string.Format("{0} {1}", age.Value, dt.EndToken.GetSourceText().ToUpper()));
                    return(dt);
                }
                return(null);
            }
            Pullenti.Ner.Token t1 = null;
            bool full             = false;
            bool hasName          = false;

            if (tt.Term == "ИМЕНИ" || tt.Term == "ІМЕНІ")
            {
                t1      = t.Next;
                full    = true;
                hasName = true;
            }
            else if (((tt.Term == "ИМ" || tt.Term == "ІМ")) && tt.Next != null)
            {
                if (tt.Next.IsChar('.'))
                {
                    t1   = tt.Next.Next;
                    full = true;
                }
                else if ((tt.Next is Pullenti.Ner.TextToken) && tt.Chars.IsAllLower && !tt.Next.Chars.IsAllLower)
                {
                    t1 = tt.Next;
                }
                hasName = true;
            }
            else if (tt.Previous != null && ((tt.Previous.IsValue("ФОНД", null) || tt.Previous.IsValue("ХРАМ", null) || tt.Previous.IsValue("ЦЕРКОВЬ", "ЦЕРКВА"))))
            {
                if ((!tt.Chars.IsCyrillicLetter || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction) || !tt.Chars.IsLetter)
                {
                    return(null);
                }
                if (tt.WhitespacesBeforeCount != 1)
                {
                    return(null);
                }
                if (tt.Chars.IsAllLower)
                {
                    return(null);
                }
                if (tt.Morph.Class.IsAdjective)
                {
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null && npt.BeginToken != npt.EndToken)
                    {
                        return(null);
                    }
                }
                OrgItemNameToken na = OrgItemNameToken.TryAttach(tt, null, false, true);
                if (na != null)
                {
                    if (na.IsEmptyWord || na.IsStdName || na.IsStdTail)
                    {
                        return(null);
                    }
                }
                t1 = tt;
            }
            if (t1 == null || ((t1.IsNewlineBefore && !full)))
            {
                return(null);
            }
            if (tt.Previous != null && tt.Previous.Morph.Class.IsPreposition)
            {
                return(null);
            }
            if (mustHasPrefix && !hasName)
            {
                return(null);
            }
            Pullenti.Ner.Referent r = t1.GetReferent();
            if ((r != null && r.TypeName == "DATE" && full) && r.FindSlot("DAY", null, true) != null && r.FindSlot("YEAR", null, true) == null)
            {
                OrgItemEponymToken dt = new OrgItemEponymToken(t, t1)
                {
                    Eponyms = new List <string>()
                };
                dt.Eponyms.Add(r.ToString().ToUpper());
                return(dt);
            }
            bool holy = false;

            if ((t1.IsValue("СВЯТОЙ", null) || t1.IsValue("СВЯТИЙ", null) || t1.IsValue("СВ", null)) || t1.IsValue("СВЯТ", null))
            {
                t1   = t1.Next;
                holy = true;
                if (t1 != null && t1.IsChar('.'))
                {
                    t1 = t1.Next;
                }
            }
            if (t1 == null)
            {
                return(null);
            }
            Pullenti.Morph.MorphClass cl = t1.GetMorphClassInDictionary();
            if (cl.IsNoun || cl.IsAdjective)
            {
                Pullenti.Ner.ReferentToken rt = t1.Kit.ProcessReferent("PERSON", t1);
                if (rt != null && rt.Referent.TypeName == "PERSON" && rt.BeginToken != rt.EndToken)
                {
                    string e = rt.Referent.GetStringValue("LASTNAME");
                    if (e != null)
                    {
                        if (rt.EndToken.IsValue(e, null))
                        {
                            OrgItemEponymToken re = new OrgItemEponymToken(t, rt.EndToken);
                            re.Eponyms.Add(rt.EndToken.GetSourceText());
                            return(re);
                        }
                    }
                }
            }
            Pullenti.Ner.NumberToken nt = Pullenti.Ner.Core.NumberHelper.TryParseAnniversary(t1);
            if (nt != null && nt.Typ == Pullenti.Ner.NumberSpellingType.Age)
            {
                Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(nt.EndToken.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt != null)
                {
                    string             s   = string.Format("{0}-{1} {2}", nt.Value, (t.Kit.BaseLanguage.IsUa ? "РОКІВ" : "ЛЕТ"), Pullenti.Ner.Core.MiscHelper.GetTextValue(npt.BeginToken, npt.EndToken, Pullenti.Ner.Core.GetTextAttr.No));
                    OrgItemEponymToken res = new OrgItemEponymToken(t, npt.EndToken);
                    res.Eponyms.Add(s);
                    return(res);
                }
            }
            List <PersonItemToken> its = PersonItemToken.TryAttach(t1);

            if (its == null)
            {
                if ((t1 is Pullenti.Ner.ReferentToken) && (t1.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
                {
                    string             s  = Pullenti.Ner.Core.MiscHelper.GetTextValue(t1, t1, Pullenti.Ner.Core.GetTextAttr.No);
                    OrgItemEponymToken re = new OrgItemEponymToken(t, t1);
                    re.Eponyms.Add(s);
                    return(re);
                }
                return(null);
            }
            List <string> eponims = new List <string>();
            int           i       = 0;
            int           j;

            if (its[i].Typ == PersonItemType.LocaseWord)
            {
                i++;
            }
            if (i >= its.Count)
            {
                return(null);
            }
            if (!full)
            {
                if (its[i].BeginToken.Morph.Class.IsAdjective && !its[i].BeginToken.Morph.Class.IsProperSurname)
                {
                    return(null);
                }
            }
            if (its[i].Typ == PersonItemType.Initial)
            {
                i++;
                while (true)
                {
                    if ((i < its.Count) && its[i].Typ == PersonItemType.Initial)
                    {
                        i++;
                    }
                    if (i >= its.Count || ((its[i].Typ != PersonItemType.Surname && its[i].Typ != PersonItemType.Name)))
                    {
                        break;
                    }
                    eponims.Add(its[i].Value);
                    t1 = its[i].EndToken;
                    if ((i + 2) >= its.Count || its[i + 1].Typ != PersonItemType.And || its[i + 2].Typ != PersonItemType.Initial)
                    {
                        break;
                    }
                    i += 3;
                }
            }
            else if (((i + 1) < its.Count) && its[i].Typ == PersonItemType.Name && its[i + 1].Typ == PersonItemType.Surname)
            {
                eponims.Add(its[i + 1].Value);
                t1 = its[i + 1].EndToken;
                i += 2;
                if ((((i + 2) < its.Count) && its[i].Typ == PersonItemType.And && its[i + 1].Typ == PersonItemType.Name) && its[i + 2].Typ == PersonItemType.Surname)
                {
                    eponims.Add(its[i + 2].Value);
                    t1 = its[i + 2].EndToken;
                }
            }
            else if (its[i].Typ == PersonItemType.Surname)
            {
                if (its.Count == (i + 2) && its[i].Chars == its[i + 1].Chars)
                {
                    its[i].Value   += (" " + its[i + 1].Value);
                    its[i].EndToken = its[i + 1].EndToken;
                    its.RemoveAt(i + 1);
                }
                eponims.Add(its[i].Value);
                if (((i + 1) < its.Count) && its[i + 1].Typ == PersonItemType.Name)
                {
                    if ((i + 2) == its.Count)
                    {
                        i++;
                    }
                    else if (its[i + 2].Typ != PersonItemType.Surname)
                    {
                        i++;
                    }
                }
                else if (((i + 1) < its.Count) && its[i + 1].Typ == PersonItemType.Initial)
                {
                    if ((i + 2) == its.Count)
                    {
                        i++;
                    }
                    else if (its[i + 2].Typ == PersonItemType.Initial && (i + 3) == its.Count)
                    {
                        i += 2;
                    }
                }
                else if (((i + 2) < its.Count) && its[i + 1].Typ == PersonItemType.And && its[i + 2].Typ == PersonItemType.Surname)
                {
                    bool ok = true;
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(its[i + 2].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                    if (npt != null && !npt.Morph.Case.IsGenitive && !npt.Morph.Case.IsUndefined)
                    {
                        ok = false;
                    }
                    if (ok)
                    {
                        eponims.Add(its[i + 2].Value);
                        i += 2;
                    }
                }
                t1 = its[i].EndToken;
            }
            else if (its[i].Typ == PersonItemType.Name && holy)
            {
                t1 = its[i].EndToken;
                bool sec = false;
                if (((i + 1) < its.Count) && its[i].Chars == its[i + 1].Chars && its[i + 1].Typ != PersonItemType.Initial)
                {
                    sec = true;
                    t1  = its[i + 1].EndToken;
                }
                if (sec)
                {
                    eponims.Add(string.Format("СВЯТ.{0} {1}", its[i].Value, its[i + 1].Value));
                }
                else
                {
                    eponims.Add(string.Format("СВЯТ.{0}", its[i].Value));
                }
            }
            else if (full && (i + 1) == its.Count && ((its[i].Typ == PersonItemType.Name || its[i].Typ == PersonItemType.Surname)))
            {
                t1 = its[i].EndToken;
                eponims.Add(its[i].Value);
            }
            else if ((its[i].Typ == PersonItemType.Name && its.Count == 3 && its[i + 1].Typ == PersonItemType.Name) && its[i + 2].Typ == PersonItemType.Surname)
            {
                t1 = its[i + 2].EndToken;
                eponims.Add(string.Format("{0} {1} {2}", its[i].Value, its[i + 1].Value, its[i + 2].Value));
                i += 2;
            }
            if (eponims.Count == 0)
            {
                return(null);
            }
            return(new OrgItemEponymToken(t, t1)
            {
                Eponyms = eponims
            });
        }
Пример #28
0
        public AnalysisKit(Pullenti.Ner.SourceOfAnalysis sofa = null, bool onlyTokenizing = false, Pullenti.Morph.MorphLang lang = null, ProgressChangedEventHandler progress = null)
        {
            if (sofa == null)
            {
                return;
            }
            m_Sofa    = sofa;
            StartDate = DateTime.Now;
            List <Pullenti.Morph.MorphToken> tokens = Pullenti.Morph.MorphologyService.Process(sofa.Text, lang, progress);

            Pullenti.Ner.Token t0 = null;
            if (tokens != null)
            {
                for (int ii = 0; ii < tokens.Count; ii++)
                {
                    Pullenti.Morph.MorphToken mt = tokens[ii];
                    if (mt.BeginChar == 733860)
                    {
                    }
                    Pullenti.Ner.TextToken tt = new Pullenti.Ner.TextToken(mt, this);
                    if (sofa.CorrectionDict != null)
                    {
                        string corw;
                        if (sofa.CorrectionDict.TryGetValue(mt.Term, out corw))
                        {
                            List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null);
                            if (ccc != null && ccc.Count == 1)
                            {
                                Pullenti.Ner.TextToken tt1 = new Pullenti.Ner.TextToken(ccc[0], this, tt.BeginChar, tt.EndChar)
                                {
                                    Term0 = tt.Term
                                };
                                tt1.Chars = tt.Chars;
                                tt        = tt1;
                                if (CorrectedTokens == null)
                                {
                                    CorrectedTokens = new Dictionary <Pullenti.Ner.Token, string>();
                                }
                                CorrectedTokens.Add(tt, tt.GetSourceText());
                            }
                        }
                    }
                    if (t0 == null)
                    {
                        FirstToken = tt;
                    }
                    else
                    {
                        t0.Next = tt;
                    }
                    t0 = tt;
                }
            }
            if (sofa.ClearDust)
            {
                this.ClearDust();
            }
            if (sofa.DoWordsMergingByMorph)
            {
                this.CorrectWordsByMerging(lang);
            }
            if (sofa.DoWordCorrectionByMorph)
            {
                this.CorrectWordsByMorph(lang);
            }
            this.MergeLetters();
            this.DefineBaseLanguage();
            if (sofa.CreateNumberTokens)
            {
                for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next)
                {
                    Pullenti.Ner.NumberToken nt = NumberHelper.TryParseNumber(t);
                    if (nt == null)
                    {
                        continue;
                    }
                    this.EmbedToken(nt);
                    t = nt;
                }
            }
            if (onlyTokenizing)
            {
                return;
            }
            for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next)
            {
                if (t.Morph.Class.IsPreposition)
                {
                    continue;
                }
                Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                if (mc.IsUndefined && t.Chars.IsCyrillicLetter && t.LengthChar > 4)
                {
                    string             tail = sofa.Text.Substring(t.EndChar - 1, 2);
                    Pullenti.Ner.Token tte  = null;
                    Pullenti.Ner.Token tt   = t.Previous;
                    if (tt != null && ((tt.IsCommaAnd || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction)))
                    {
                        tt = tt.Previous;
                    }
                    if ((tt != null && !tt.GetMorphClassInDictionary().IsUndefined&& ((tt.Morph.Class.Value & t.Morph.Class.Value)) != 0) && tt.LengthChar > 4)
                    {
                        string tail2 = sofa.Text.Substring(tt.EndChar - 1, 2);
                        if (tail2 == tail)
                        {
                            tte = tt;
                        }
                    }
                    if (tte == null)
                    {
                        tt = t.Next;
                        if (tt != null && ((tt.IsCommaAnd || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction)))
                        {
                            tt = tt.Next;
                        }
                        if ((tt != null && !tt.GetMorphClassInDictionary().IsUndefined&& ((tt.Morph.Class.Value & t.Morph.Class.Value)) != 0) && tt.LengthChar > 4)
                        {
                            string tail2 = sofa.Text.Substring(tt.EndChar - 1, 2);
                            if (tail2 == tail)
                            {
                                tte = tt;
                            }
                        }
                    }
                    if (tte != null)
                    {
                        t.Morph.RemoveItemsEx(tte.Morph, tte.GetMorphClassInDictionary());
                    }
                }
                continue;
            }
            this.CreateStatistics();
        }
Пример #29
0
        public static UnitToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection addUnits, UnitToken prev, bool parseUnknownUnits = false)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.Token t0 = t;
            int  pow   = 1;
            bool isNeg = false;

            if ((t.IsCharOf("\\/") || t.IsValue("НА", null) || t.IsValue("OF", null)) || t.IsValue("PER", null))
            {
                isNeg = true;
                t     = t.Next;
            }
            else if (t.IsValue("В", null) && prev != null)
            {
                isNeg = true;
                t     = t.Next;
            }
            else if (MeasureHelper.IsMultChar(t))
            {
                t = t.Next;
            }
            Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
            if (tt == null)
            {
                return(null);
            }
            if (tt.Term == "КВ" || tt.Term == "КВАДР" || tt.IsValue("КВАДРАТНЫЙ", null))
            {
                pow = 2;
                tt  = tt.Next as Pullenti.Ner.TextToken;
                if (tt != null && tt.IsChar('.'))
                {
                    tt = tt.Next as Pullenti.Ner.TextToken;
                }
                if (tt == null)
                {
                    return(null);
                }
            }
            else if (tt.Term == "КУБ" || tt.Term == "КУБИЧ" || tt.IsValue("КУБИЧЕСКИЙ", null))
            {
                pow = 3;
                tt  = tt.Next as Pullenti.Ner.TextToken;
                if (tt != null && tt.IsChar('.'))
                {
                    tt = tt.Next as Pullenti.Ner.TextToken;
                }
                if (tt == null)
                {
                    return(null);
                }
            }
            else if (tt.Term == "µ")
            {
                UnitToken res = TryParse(tt.Next, addUnits, prev, false);
                if (res != null)
                {
                    foreach (Unit u in UnitsHelper.Units)
                    {
                        if (u.Factor == UnitsFactors.Micro && string.Compare("мк" + u.NameCyr, res.Unit.NameCyr, true) == 0)
                        {
                            res.Unit       = u;
                            res.BeginToken = tt;
                            res.Pow        = pow;
                            if (isNeg)
                            {
                                res.Pow = -pow;
                            }
                            return(res);
                        }
                    }
                }
            }
            List <Pullenti.Ner.Core.TerminToken> toks = UnitsHelper.Termins.TryParseAll(tt, Pullenti.Ner.Core.TerminParseAttr.No);

            if (toks != null)
            {
                if ((prev != null && tt == t0 && toks.Count == 1) && t.IsWhitespaceBefore)
                {
                    return(null);
                }
                if (toks[0].BeginToken == toks[0].EndToken && tt.Morph.Class.IsPreposition && (tt.WhitespacesAfterCount < 3))
                {
                    if (Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null) != null)
                    {
                        return(null);
                    }
                    if (tt.Next is Pullenti.Ner.NumberToken)
                    {
                        if ((tt.Next as Pullenti.Ner.NumberToken).Typ != Pullenti.Ner.NumberSpellingType.Digit)
                        {
                            return(null);
                        }
                    }
                    UnitToken nex = TryParse(tt.Next, addUnits, null, false);
                    if (nex != null)
                    {
                        return(null);
                    }
                }
                if (toks[0].BeginToken == toks[0].EndToken && ((toks[0].BeginToken.IsValue("М", null) || toks[0].BeginToken.IsValue("M", null))) && toks[0].BeginToken.Chars.IsAllLower)
                {
                    if (prev != null && prev.Unit != null && prev.Unit.Kind == Pullenti.Ner.Measure.MeasureKind.Length)
                    {
                        UnitToken res = new UnitToken(t0, toks[0].EndToken)
                        {
                            Unit = UnitsHelper.uMinute
                        };
                        res.Pow = pow;
                        if (isNeg)
                        {
                            res.Pow = -pow;
                        }
                        return(res);
                    }
                }
                List <UnitToken> uts = new List <UnitToken>();
                foreach (Pullenti.Ner.Core.TerminToken tok in toks)
                {
                    UnitToken res = new UnitToken(t0, tok.EndToken)
                    {
                        Unit = tok.Termin.Tag as Unit
                    };
                    res.Pow = pow;
                    if (isNeg)
                    {
                        res.Pow = -pow;
                    }
                    if (res.Unit.BaseMultiplier == 1000000 && (t0 is Pullenti.Ner.TextToken) && char.IsLower((t0 as Pullenti.Ner.TextToken).GetSourceText()[0]))
                    {
                        foreach (Unit u in UnitsHelper.Units)
                        {
                            if (u.Factor == UnitsFactors.Milli && string.Compare(u.NameCyr, res.Unit.NameCyr, true) == 0)
                            {
                                res.Unit = u;
                                break;
                            }
                        }
                    }
                    res._correct();
                    res._checkDoubt();
                    uts.Add(res);
                }
                int       max  = 0;
                UnitToken best = null;
                foreach (UnitToken ut in uts)
                {
                    if (ut.Keyword != null)
                    {
                        if (ut.Keyword.BeginChar >= max)
                        {
                            max  = ut.Keyword.BeginChar;
                            best = ut;
                        }
                    }
                }
                if (best != null)
                {
                    return(best);
                }
                foreach (UnitToken ut in uts)
                {
                    if (!ut.IsDoubt)
                    {
                        return(ut);
                    }
                }
                return(uts[0]);
            }
            Pullenti.Ner.Token t1 = null;
            if (t.IsCharOf("º°"))
            {
                t1 = t;
            }
            else if ((t.IsChar('<') && t.Next != null && t.Next.Next != null) && t.Next.Next.IsChar('>') && ((t.Next.IsValue("О", null) || t.Next.IsValue("O", null) || (((t.Next is Pullenti.Ner.NumberToken) && (t.Next as Pullenti.Ner.NumberToken).Value == "0")))))
            {
                t1 = t.Next.Next;
            }
            if (t1 != null)
            {
                UnitToken res = new UnitToken(t0, t1)
                {
                    Unit = UnitsHelper.uGradus
                };
                res._checkDoubt();
                t = t1.Next;
                if (t != null && t.IsComma)
                {
                    t = t.Next;
                }
                if (t != null && t.IsValue("ПО", null))
                {
                    t = t.Next;
                }
                if (t is Pullenti.Ner.TextToken)
                {
                    string vv = (t as Pullenti.Ner.TextToken).Term;
                    if (vv == "C" || vv == "С" || vv.StartsWith("ЦЕЛЬС"))
                    {
                        res.Unit     = UnitsHelper.uGradusC;
                        res.IsDoubt  = false;
                        res.EndToken = t;
                    }
                    if (vv == "F" || vv.StartsWith("ФАР"))
                    {
                        res.Unit     = UnitsHelper.uGradusF;
                        res.IsDoubt  = false;
                        res.EndToken = t;
                    }
                }
                return(res);
            }
            if ((t is Pullenti.Ner.TextToken) && ((t.IsValue("ОС", null) || t.IsValue("OC", null))))
            {
                string str = t.GetSourceText();
                if (str == "оС" || str == "oC")
                {
                    UnitToken res = new UnitToken(t, t)
                    {
                        Unit = UnitsHelper.uGradusC, IsDoubt = false
                    };
                    return(res);
                }
            }
            if (t.IsChar('%'))
            {
                Pullenti.Ner.Token tt1 = t.Next;
                if (tt1 != null && tt1.IsChar('('))
                {
                    tt1 = tt1.Next;
                }
                if ((tt1 is Pullenti.Ner.TextToken) && (tt1 as Pullenti.Ner.TextToken).Term.StartsWith("ОБ"))
                {
                    UnitToken re = new UnitToken(t, tt1)
                    {
                        Unit = UnitsHelper.uAlco
                    };
                    if (re.EndToken.Next != null && re.EndToken.Next.IsChar('.'))
                    {
                        re.EndToken = re.EndToken.Next;
                    }
                    if (re.EndToken.Next != null && re.EndToken.Next.IsChar(')') && t.Next.IsChar('('))
                    {
                        re.EndToken = re.EndToken.Next;
                    }
                    return(re);
                }
                return(new UnitToken(t, t)
                {
                    Unit = UnitsHelper.uPercent
                });
            }
            if (addUnits != null)
            {
                Pullenti.Ner.Core.TerminToken tok = addUnits.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok != null)
                {
                    UnitToken res = new UnitToken(t0, tok.EndToken)
                    {
                        ExtOnto = tok.Termin.Tag as Pullenti.Ner.Measure.UnitReferent
                    };
                    if (tok.EndToken.Next != null && tok.EndToken.Next.IsChar('.'))
                    {
                        tok.EndToken = tok.EndToken.Next;
                    }
                    res.Pow = pow;
                    if (isNeg)
                    {
                        res.Pow = -pow;
                    }
                    res._correct();
                    return(res);
                }
            }
            if (!parseUnknownUnits)
            {
                return(null);
            }
            if ((t.WhitespacesBeforeCount > 2 || !t.Chars.IsLetter || t.LengthChar > 5) || !(t is Pullenti.Ner.TextToken))
            {
                return(null);
            }
            if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t))
            {
                return(null);
            }
            t1 = t;
            if (t.Next != null && t.Next.IsChar('.'))
            {
                t1 = t;
            }
            bool ok = false;

            if (t1.Next == null || t1.WhitespacesAfterCount > 2)
            {
                ok = true;
            }
            else if (t1.Next.IsComma || t1.Next.IsCharOf("\\/") || t1.Next.IsTableControlChar)
            {
                ok = true;
            }
            else if (MeasureHelper.IsMultChar(t1.Next))
            {
                ok = true;
            }
            if (!ok)
            {
                return(null);
            }
            Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
            if (mc.IsUndefined)
            {
            }
            else if (t.LengthChar > 7)
            {
                return(null);
            }
            UnitToken res1 = new UnitToken(t0, t1)
            {
                Pow = pow, IsDoubt = true
            };

            res1.UnknownName = (t as Pullenti.Ner.TextToken).GetSourceText();
            res1._correct();
            return(res1);
        }
Пример #30
0
 public bool Check(Pullenti.Morph.MorphClass cl)
 {
     return(((Class.Value & cl.Value)) != 0);
 }