void SerializeItem(Stream stream, Pullenti.Morph.MorphBaseInfo bi)
        {
            byte ty = (byte)0;

            if (bi is Pullenti.Morph.MorphWordForm)
            {
                ty = 1;
            }
            stream.WriteByte(ty);
            Pullenti.Ner.Core.Internal.SerializerHelper.SerializeShort(stream, bi.Class.Value);
            Pullenti.Ner.Core.Internal.SerializerHelper.SerializeShort(stream, bi.Case.Value);
            Pullenti.Ner.Core.Internal.SerializerHelper.SerializeShort(stream, (short)bi.Gender);
            Pullenti.Ner.Core.Internal.SerializerHelper.SerializeShort(stream, (short)bi.Number);
            Pullenti.Ner.Core.Internal.SerializerHelper.SerializeShort(stream, bi.Language.Value);
            Pullenti.Morph.MorphWordForm wf = bi as Pullenti.Morph.MorphWordForm;
            if (wf == null)
            {
                return;
            }
            Pullenti.Ner.Core.Internal.SerializerHelper.SerializeString(stream, wf.NormalCase);
            Pullenti.Ner.Core.Internal.SerializerHelper.SerializeString(stream, wf.NormalFull);
            Pullenti.Ner.Core.Internal.SerializerHelper.SerializeShort(stream, wf.UndefCoef);
            Pullenti.Ner.Core.Internal.SerializerHelper.SerializeInt(stream, (wf.Misc == null ? 0 : wf.Misc.Attrs.Count));
            if (wf.Misc != null)
            {
                foreach (string a in wf.Misc.Attrs)
                {
                    Pullenti.Ner.Core.Internal.SerializerHelper.SerializeString(stream, a);
                }
            }
        }
示例#2
0
 public NounPhraseItemTextVar(Pullenti.Morph.MorphBaseInfo src = null, Pullenti.Ner.Token t = null) : base()
 {
     if (src != null)
     {
         this.CopyFrom(src);
     }
     Pullenti.Morph.MorphWordForm wf = src as Pullenti.Morph.MorphWordForm;
     if (wf != null)
     {
         NormalValue = wf.NormalCase;
         if (wf.Number == Pullenti.Morph.MorphNumber.Plural && wf.NormalFull != null)
         {
             SingleNumberValue = wf.NormalFull;
         }
         UndefCoef = wf.UndefCoef;
     }
     else if (t != null)
     {
         NormalValue = t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
     }
     if (Case.IsUndefined && src != null)
     {
         if (src.ContainsAttr("неизм.", null))
         {
             Case = Pullenti.Morph.MorphCase.AllCases;
         }
     }
 }
示例#3
0
 public static void _setMorph(Pullenti.Semantic.SemObject obj, Pullenti.Morph.MorphWordForm wf)
 {
     if (wf == null)
     {
         return;
     }
     obj.Morph.NormalCase = wf.NormalCase;
     obj.Morph.NormalFull = wf.NormalFull ?? wf.NormalCase;
     obj.Morph.Number     = wf.Number;
     obj.Morph.Gender     = wf.Gender;
     obj.Morph.Misc       = wf.Misc;
 }
示例#4
0
 static bool _isRevVerb(Pullenti.Morph.MorphWordForm vf)
 {
     if (vf.Misc.Attrs.Contains("возвр."))
     {
         return(true);
     }
     if (vf.NormalCase != null)
     {
         if (vf.NormalCase.EndsWith("СЯ") || vf.NormalCase.EndsWith("СЬ"))
         {
             return(true);
         }
     }
     return(false);
 }
示例#5
0
        double _calcActant()
        {
            if (CanBeParticiple)
            {
                return(Coef = -1);
            }
            Pullenti.Morph.MorphWordForm vf2 = ToVerb.LastVerb.VerbMorph;
            if (vf2 == null)
            {
                return(-1);
            }
            if (FromPrep == null)
            {
                return(Coef = 0);
            }
            Pullenti.Ner.MorphCollection fm = From.Source.Source.Morph;
            List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(vf2.NormalFull ?? vf2.NormalCase, true, null);

            if (grs != null)
            {
                foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs)
                {
                    if (gr.Cm.Nexts == null || !gr.Cm.Nexts.ContainsKey(FromPrep))
                    {
                        continue;
                    }
                    Pullenti.Morph.MorphCase cas = gr.Cm.Nexts[FromPrep];
                    if (!((cas & fm.Case)).IsUndefined)
                    {
                        Coef = Pullenti.Semantic.SemanticService.Params.NextModel;
                        if (string.IsNullOrEmpty(FromPrep))
                        {
                            if (fm.Case.IsNominative)
                            {
                                Coef /= 2;
                            }
                            Coef /= 2;
                        }
                        return(Coef);
                    }
                    if (From.Source.Source.Morph.Case.IsUndefined)
                    {
                        return(Coef = 0);
                    }
                }
            }
            return(Coef = 0.1);
        }
        public Pullenti.Morph.MorphBaseInfo FindItem(Pullenti.Morph.MorphCase cas, Pullenti.Morph.MorphNumber num = Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender gen = Pullenti.Morph.MorphGender.Undefined)
        {
            if (m_Items == null)
            {
                return(null);
            }
            Pullenti.Morph.MorphBaseInfo res = null;
            int maxCoef = 0;

            foreach (Pullenti.Morph.MorphBaseInfo it in m_Items)
            {
                if (!cas.IsUndefined)
                {
                    if (((it.Case & cas)).IsUndefined)
                    {
                        continue;
                    }
                }
                if (num != Pullenti.Morph.MorphNumber.Undefined)
                {
                    if (((num & it.Number)) == Pullenti.Morph.MorphNumber.Undefined)
                    {
                        continue;
                    }
                }
                if (gen != Pullenti.Morph.MorphGender.Undefined)
                {
                    if (((gen & it.Gender)) == Pullenti.Morph.MorphGender.Undefined)
                    {
                        continue;
                    }
                }
                Pullenti.Morph.MorphWordForm wf = it as Pullenti.Morph.MorphWordForm;
                if (wf != null && wf.UndefCoef > 0)
                {
                    if (wf.UndefCoef > maxCoef)
                    {
                        maxCoef = wf.UndefCoef;
                        res     = it;
                    }
                    continue;
                }
                return(it);
            }
            return(res);
        }
 public MorphCollection(MorphCollection source = null)
 {
     if (source == null)
     {
         return;
     }
     foreach (Pullenti.Morph.MorphBaseInfo it in source.Items)
     {
         Pullenti.Morph.MorphBaseInfo mi = null;
         if (it is Pullenti.Morph.MorphWordForm)
         {
             Pullenti.Morph.MorphWordForm wf = new Pullenti.Morph.MorphWordForm();
             wf.CopyFromWordForm(it as Pullenti.Morph.MorphWordForm);
             mi = wf;
         }
         else
         {
             mi = new Pullenti.Morph.MorphBaseInfo();
             mi.CopyFrom(it);
         }
         if (m_Items == null)
         {
             m_Items = new List <Pullenti.Morph.MorphBaseInfo>();
         }
         m_Items.Add(mi);
     }
     m_Class = new Pullenti.Morph.MorphClass()
     {
         Value = source.m_Class.Value
     };
     m_Gender = source.m_Gender;
     m_Case   = new Pullenti.Morph.MorphCase()
     {
         Value = source.m_Case.Value
     };
     m_Number   = source.m_Number;
     m_Language = new Pullenti.Morph.MorphLang()
     {
         Value = source.m_Language.Value
     };
     m_Voice      = source.m_Voice;
     m_NeedRecalc = false;
 }
        Pullenti.Morph.MorphBaseInfo DeserializeItem(Stream stream)
        {
            int ty = stream.ReadByte();

            Pullenti.Morph.MorphBaseInfo res = (ty == 0 ? new Pullenti.Morph.MorphBaseInfo() : (Pullenti.Morph.MorphBaseInfo) new Pullenti.Morph.MorphWordForm());
            res.Class = new Pullenti.Morph.MorphClass()
            {
                Value = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeShort(stream)
            };
            res.Case = new Pullenti.Morph.MorphCase()
            {
                Value = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeShort(stream)
            };
            res.Gender   = (Pullenti.Morph.MorphGender)Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeShort(stream);
            res.Number   = (Pullenti.Morph.MorphNumber)Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeShort(stream);
            res.Language = new Pullenti.Morph.MorphLang()
            {
                Value = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeShort(stream)
            };
            if (ty == 0)
            {
                return(res);
            }
            Pullenti.Morph.MorphWordForm wf = res as Pullenti.Morph.MorphWordForm;
            wf.NormalCase = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeString(stream);
            wf.NormalFull = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeString(stream);
            wf.UndefCoef  = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeShort(stream);
            int cou = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeInt(stream);

            for (int i = 0; i < cou; i++)
            {
                if (wf.Misc == null)
                {
                    wf.Misc = new Pullenti.Morph.MorphMiscInfo();
                }
                wf.Misc.Attrs.Add(Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeString(stream));
            }
            return(res);
        }
 /// <summary>
 /// Удалить элементы, не соответствующие падежу
 /// </summary>
 public void RemoveItems(Pullenti.Morph.MorphCase cas)
 {
     if (m_Items == null)
     {
         return;
     }
     if (m_Items.Count == 0)
     {
         m_Case = m_Case & cas;
     }
     for (int i = m_Items.Count - 1; i >= 0; i--)
     {
         if (((m_Items[i].Case & cas)).IsUndefined)
         {
             m_Items.RemoveAt(i);
             m_NeedRecalc = true;
         }
         else if (((m_Items[i].Case & cas)) != m_Items[i].Case)
         {
             if (m_Items[i] is Pullenti.Morph.MorphWordForm)
             {
                 Pullenti.Morph.MorphWordForm wf = new Pullenti.Morph.MorphWordForm();
                 wf.CopyFromWordForm(m_Items[i] as Pullenti.Morph.MorphWordForm);
                 wf.Case   &= cas;
                 m_Items[i] = wf;
             }
             else
             {
                 Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo();
                 bi.CopyFrom(m_Items[i]);
                 bi.Case   &= cas;
                 m_Items[i] = bi;
             }
             m_NeedRecalc = true;
         }
     }
     m_NeedRecalc = true;
 }
示例#10
0
        /// <summary>
        /// Попробовать привязать словарь
        /// </summary>
        public object CheckValue(Dictionary <string, object> dict)
        {
            if (dict == null)
            {
                return(null);
            }
            object res;

            if (dict.TryGetValue(Term, out res))
            {
                return(res);
            }
            if (Morph != null)
            {
                foreach (Pullenti.Morph.MorphBaseInfo it in Morph.Items)
                {
                    Pullenti.Morph.MorphWordForm mf = it as Pullenti.Morph.MorphWordForm;
                    if (mf != null)
                    {
                        if (mf.NormalCase != null)
                        {
                            if (dict.TryGetValue(mf.NormalCase, out res))
                            {
                                return(res);
                            }
                        }
                        if (mf.NormalFull != null && mf.NormalCase != mf.NormalFull)
                        {
                            if (dict.TryGetValue(mf.NormalFull, out res))
                            {
                                return(res);
                            }
                        }
                    }
                }
            }
            return(null);
        }
示例#11
0
        StatisticWordInfo AddToken(Pullenti.Ner.TextToken tt)
        {
            List <string> vars = new List <string>();

            vars.Add(tt.Term);
            string s = MiscHelper.GetAbsoluteNormalValue(tt.Term, false);

            if (s != null && !vars.Contains(s))
            {
                vars.Add(s);
            }
            foreach (Pullenti.Morph.MorphBaseInfo wff in tt.Morph.Items)
            {
                Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm;
                if (wf == null)
                {
                    continue;
                }
                if (wf.NormalCase != null && !vars.Contains(wf.NormalCase))
                {
                    vars.Add(wf.NormalCase);
                }
                if (wf.NormalFull != null && !vars.Contains(wf.NormalFull))
                {
                    vars.Add(wf.NormalFull);
                }
            }
            StatisticWordInfo res = null;

            foreach (string v in vars)
            {
                if (m_Items.TryGetValue(v, out res))
                {
                    break;
                }
            }
            if (res == null)
            {
                res = new StatisticWordInfo()
                {
                    Normal = tt.Lemma
                }
            }
            ;
            foreach (string v in vars)
            {
                if (!m_Items.ContainsKey(v))
                {
                    m_Items.Add(v, res);
                }
            }
            res.TotalCount++;
            if ((tt.Next is Pullenti.Ner.TextToken) && tt.Next.Chars.IsAllLower)
            {
                if (tt.Next.Chars.IsCyrillicLetter && tt.Next.GetMorphClassInDictionary().IsVerb)
                {
                    Pullenti.Morph.MorphGender g = tt.Next.Morph.Gender;
                    if (g == Pullenti.Morph.MorphGender.Feminie)
                    {
                        res.FemaleVerbsAfterCount++;
                    }
                    else if (((g & Pullenti.Morph.MorphGender.Masculine)) != Pullenti.Morph.MorphGender.Undefined)
                    {
                        res.MaleVerbsAfterCount++;
                    }
                }
            }
            if (tt.Previous != null)
            {
                if ((tt.Previous is Pullenti.Ner.TextToken) && tt.Previous.Chars.IsLetter && !tt.Previous.Chars.IsAllLower)
                {
                }
                else
                {
                    res.NotCapitalBeforeCount++;
                }
            }
            return(res);
        }

        Dictionary <string, StatisticWordInfo> m_Items = new Dictionary <string, StatisticWordInfo>();
        StatisticWordInfo FindItem(Pullenti.Ner.TextToken tt, bool doAbsolute = true)
        {
            if (tt == null)
            {
                return(null);
            }
            StatisticWordInfo res;

            if (m_Items.TryGetValue(tt.Term, out res))
            {
                return(res);
            }
            if (doAbsolute)
            {
                string s = MiscHelper.GetAbsoluteNormalValue(tt.Term, false);
                if (s != null)
                {
                    if (m_Items.TryGetValue(s, out res))
                    {
                        return(res);
                    }
                }
            }
            foreach (Pullenti.Morph.MorphBaseInfo wff in tt.Morph.Items)
            {
                Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm;
                if (wf == null)
                {
                    continue;
                }
                if (m_Items.TryGetValue(wf.NormalCase ?? "", out res))
                {
                    return(res);
                }
                if (wf.NormalFull != null && m_Items.TryGetValue(wf.NormalFull, out res))
                {
                    return(res);
                }
            }
            return(null);
        }

        void AddBigramm(StatisticWordInfo b1, StatisticWordInfo b2)
        {
            Dictionary <string, int> di;

            if (!m_Bigramms.TryGetValue(b1.Normal, out di))
            {
                m_Bigramms.Add(b1.Normal, (di = new Dictionary <string, int>()));
            }
            if (di.ContainsKey(b2.Normal))
            {
                di[b2.Normal]++;
            }
            else
            {
                di.Add(b2.Normal, 1);
            }
            if (!m_BigrammsRev.TryGetValue(b2.Normal, out di))
            {
                m_BigrammsRev.Add(b2.Normal, (di = new Dictionary <string, int>()));
            }
            if (di.ContainsKey(b1.Normal))
            {
                di[b1.Normal]++;
            }
            else
            {
                di.Add(b1.Normal, 1);
            }
        }

        Dictionary <string, Dictionary <string, int> > m_Bigramms    = new Dictionary <string, Dictionary <string, int> >();
        Dictionary <string, Dictionary <string, int> > m_BigrammsRev = new Dictionary <string, Dictionary <string, int> >();
        Dictionary <string, Dictionary <string, int> > m_Initials    = new Dictionary <string, Dictionary <string, int> >();
        Dictionary <string, Dictionary <string, int> > m_InitialsRev = new Dictionary <string, Dictionary <string, int> >();
示例#12
0
        static VerbPhraseToken TryParseRu(Pullenti.Ner.Token t, bool canBePartition, bool canBeAdjPartition, bool forceParse)
        {
            VerbPhraseToken res = null;

            Pullenti.Ner.Token t0         = t;
            Pullenti.Ner.Token not        = null;
            bool             hasVerb      = false;
            bool             verbBeBefore = false;
            PrepositionToken prep         = null;

            for (; t != null; t = t.Next)
            {
                if (!(t is Pullenti.Ner.TextToken))
                {
                    break;
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                bool isParticiple         = false;
                if (tt.Term == "НЕ")
                {
                    not = t;
                    continue;
                }
                int    ty   = 0;
                string norm = null;
                Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary();
                if (tt.Term == "НЕТ")
                {
                    if (hasVerb)
                    {
                        break;
                    }
                    ty = 1;
                }
                else if (tt.Term == "ДОПУСТИМО")
                {
                    ty = 3;
                }
                else if (mc.IsAdverb && !mc.IsVerb)
                {
                    ty = 2;
                }
                else if (tt.IsPureVerb || tt.IsVerbBe)
                {
                    ty = 1;
                    if (hasVerb)
                    {
                        if (!tt.Morph.ContainsAttr("инф.", null))
                        {
                            if (verbBeBefore)
                            {
                            }
                            else
                            {
                                break;
                            }
                        }
                    }
                }
                else if (mc.IsVerb)
                {
                    if (mc.IsPreposition || mc.IsMisc || mc.IsPronoun)
                    {
                    }
                    else if (mc.IsNoun)
                    {
                        if (tt.Term == "СТАЛИ" || tt.Term == "СТЕКЛО" || tt.Term == "БЫЛИ")
                        {
                            ty = 1;
                        }
                        else if (!tt.Chars.IsAllLower && !MiscHelper.CanBeStartOfSentence(tt))
                        {
                            ty = 1;
                        }
                        else if (mc.IsAdjective && canBePartition)
                        {
                            ty = 1;
                        }
                        else if (forceParse)
                        {
                            ty = 1;
                        }
                    }
                    else if (mc.IsProper)
                    {
                        if (tt.Chars.IsAllLower)
                        {
                            ty = 1;
                        }
                    }
                    else
                    {
                        ty = 1;
                    }
                    if (mc.IsAdjective)
                    {
                        isParticiple = true;
                    }
                    if (!tt.Morph.Case.IsUndefined)
                    {
                        isParticiple = true;
                    }
                    if (!canBePartition && isParticiple)
                    {
                        break;
                    }
                    if (hasVerb)
                    {
                        if (tt.Morph.ContainsAttr("инф.", null))
                        {
                        }
                        else if (!isParticiple)
                        {
                        }
                        else
                        {
                            break;
                        }
                    }
                }
                else if ((mc.IsAdjective && tt.Morph.ContainsAttr("к.ф.", null) && tt.Term.EndsWith("О")) && NounPhraseHelper.TryParse(tt, NounPhraseParseAttr.No, 0, null) == null)
                {
                    ty = 2;
                }
                else if (mc.IsAdjective && ((canBePartition || canBeAdjPartition)))
                {
                    if (tt.Morph.ContainsAttr("к.ф.", null) && !canBeAdjPartition)
                    {
                        break;
                    }
                    norm = tt.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Masculine, false);
                    if (norm.EndsWith("ЙШИЙ"))
                    {
                    }
                    else
                    {
                        List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, null);
                        if (grs != null && grs.Count > 0)
                        {
                            bool hVerb = false;
                            bool hPart = false;
                            foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs)
                            {
                                foreach (Pullenti.Semantic.Utils.DerivateWord w in gr.Words)
                                {
                                    if (w.Class.IsAdjective && w.Class.IsVerb)
                                    {
                                        if (w.Spelling == norm)
                                        {
                                            hPart = true;
                                        }
                                    }
                                    else if (w.Class.IsVerb)
                                    {
                                        hVerb = true;
                                    }
                                }
                            }
                            if (hPart && hVerb)
                            {
                                ty = 3;
                            }
                            else if (canBeAdjPartition)
                            {
                                ty = 3;
                            }
                            if (ty != 3 && !string.IsNullOrEmpty(grs[0].Prefix) && norm.StartsWith(grs[0].Prefix))
                            {
                                hVerb = false;
                                hPart = false;
                                string norm1 = norm.Substring(grs[0].Prefix.Length);
                                grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm1, true, null);
                                if (grs != null && grs.Count > 0)
                                {
                                    foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs)
                                    {
                                        foreach (Pullenti.Semantic.Utils.DerivateWord w in gr.Words)
                                        {
                                            if (w.Class.IsAdjective && w.Class.IsVerb)
                                            {
                                                if (w.Spelling == norm1)
                                                {
                                                    hPart = true;
                                                }
                                            }
                                            else if (w.Class.IsVerb)
                                            {
                                                hVerb = true;
                                            }
                                        }
                                    }
                                }
                                if (hPart && hVerb)
                                {
                                    ty = 3;
                                }
                            }
                        }
                    }
                }
                if (ty == 0 && t == t0 && canBePartition)
                {
                    prep = PrepositionHelper.TryParse(t);
                    if (prep != null)
                    {
                        t = prep.EndToken;
                        continue;
                    }
                }
                if (ty == 0)
                {
                    break;
                }
                if (res == null)
                {
                    res = new VerbPhraseToken(t0, t);
                }
                res.EndToken = t;
                VerbPhraseItemToken it = new VerbPhraseItemToken(t, t)
                {
                    Morph = new Pullenti.Ner.MorphCollection(t.Morph)
                };
                if (not != null)
                {
                    it.BeginToken = not;
                    it.Not        = true;
                    not           = null;
                }
                it.IsAdverb = ty == 2;
                if (prep != null && !t.Morph.Case.IsUndefined && res.Items.Count == 0)
                {
                    if (((prep.NextCase & t.Morph.Case)).IsUndefined)
                    {
                        return(null);
                    }
                    it.Morph.RemoveItems(prep.NextCase);
                    res.Preposition = prep;
                }
                if (norm == null)
                {
                    norm = t.GetNormalCaseText((ty == 3 ? Pullenti.Morph.MorphClass.Adjective : (ty == 2 ? Pullenti.Morph.MorphClass.Adverb : Pullenti.Morph.MorphClass.Verb)), Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Masculine, false);
                    if (ty == 1 && !tt.Morph.Case.IsUndefined)
                    {
                        Pullenti.Morph.MorphWordForm mi = new Pullenti.Morph.MorphWordForm()
                        {
                            Case = Pullenti.Morph.MorphCase.Nominative, Number = Pullenti.Morph.MorphNumber.Singular, Gender = Pullenti.Morph.MorphGender.Masculine
                        };
                        foreach (Pullenti.Morph.MorphBaseInfo mit in tt.Morph.Items)
                        {
                            if (mit is Pullenti.Morph.MorphWordForm)
                            {
                                mi.Misc = (mit as Pullenti.Morph.MorphWordForm).Misc;
                                break;
                            }
                        }
                        string nnn = Pullenti.Morph.MorphologyService.GetWordform("КК" + (t as Pullenti.Ner.TextToken).Term, mi);
                        if (nnn != null)
                        {
                            norm = nnn.Substring(2);
                        }
                    }
                }
                it.Normal = norm;
                res.Items.Add(it);
                if (!hasVerb && ((ty == 1 || ty == 3)))
                {
                    res.Morph = it.Morph;
                    hasVerb   = true;
                }
                if (ty == 1 || ty == 3)
                {
                    if (ty == 1 && tt.IsVerbBe)
                    {
                        verbBeBefore = true;
                    }
                    else
                    {
                        verbBeBefore = false;
                    }
                }
            }
            if (!hasVerb)
            {
                return(null);
            }
            for (int i = res.Items.Count - 1; i > 0; i--)
            {
                if (res.Items[i].IsAdverb)
                {
                    res.Items.RemoveAt(i);
                    res.EndToken = res.Items[i - 1].EndToken;
                }
                else
                {
                    break;
                }
            }
            return(res);
        }
示例#13
0
        List <TerminToken> _TryAttachAll_(Pullenti.Ner.Token token, TerminParseAttr pars = TerminParseAttr.No, bool mainRoot = false)
        {
            if (Termins.Count == 0 || token == null)
            {
                return(null);
            }
            string s = null;

            Pullenti.Ner.TextToken tt = token as Pullenti.Ner.TextToken;
            if (tt == null && (token is Pullenti.Ner.ReferentToken))
            {
                tt = (token as Pullenti.Ner.ReferentToken).BeginToken as Pullenti.Ner.TextToken;
            }
            List <TerminToken> res = null;
            bool     wasVars       = false;
            CharNode root          = (mainRoot ? m_Root : this._getRoot(token.Morph.Language, token.Chars.IsLatinLetter));

            if (tt != null)
            {
                s = tt.Term;
                CharNode nod    = root;
                bool     noVars = false;
                int      len0   = 0;
                if (((pars & TerminParseAttr.TermOnly)) != TerminParseAttr.No)
                {
                }
                else if (tt.InvariantPrefixLengthOfMorphVars <= s.Length)
                {
                    len0 = tt.InvariantPrefixLengthOfMorphVars;
                    for (int i = 0; i < tt.InvariantPrefixLengthOfMorphVars; i++)
                    {
                        short ch = (short)s[i];
                        if (nod.Children == null)
                        {
                            noVars = true;
                            break;
                        }
                        CharNode nn;
                        if (!nod.Children.TryGetValue(ch, out nn))
                        {
                            noVars = true;
                            break;
                        }
                        nod = nn;
                    }
                }
                if (!noVars)
                {
                    if (this._manageVar(token, pars, s, nod, len0, ref res))
                    {
                        wasVars = true;
                    }
                    for (int i = 0; i < tt.Morph.ItemsCount; i++)
                    {
                        if (((pars & TerminParseAttr.TermOnly)) != TerminParseAttr.No)
                        {
                            continue;
                        }
                        Pullenti.Morph.MorphWordForm wf = tt.Morph[i] as Pullenti.Morph.MorphWordForm;
                        if (wf == null)
                        {
                            continue;
                        }
                        if (((pars & TerminParseAttr.InDictionaryOnly)) != TerminParseAttr.No)
                        {
                            if (!wf.IsInDictionary)
                            {
                                continue;
                            }
                        }
                        int  j;
                        bool ok = true;
                        if (wf.NormalCase == null || wf.NormalCase == s)
                        {
                            ok = false;
                        }
                        else
                        {
                            for (j = 0; j < i; j++)
                            {
                                Pullenti.Morph.MorphWordForm wf2 = tt.Morph[j] as Pullenti.Morph.MorphWordForm;
                                if (wf2 != null)
                                {
                                    if (wf2.NormalCase == wf.NormalCase || wf2.NormalFull == wf.NormalCase)
                                    {
                                        break;
                                    }
                                }
                            }
                            if (j < i)
                            {
                                ok = false;
                            }
                        }
                        if (ok)
                        {
                            if (this._manageVar(token, pars, wf.NormalCase, nod, tt.InvariantPrefixLengthOfMorphVars, ref res))
                            {
                                wasVars = true;
                            }
                        }
                        if (wf.NormalFull == null || wf.NormalFull == wf.NormalCase || wf.NormalFull == s)
                        {
                            continue;
                        }
                        for (j = 0; j < i; j++)
                        {
                            Pullenti.Morph.MorphWordForm wf2 = tt.Morph[j] as Pullenti.Morph.MorphWordForm;
                            if (wf2 != null && wf2.NormalFull == wf.NormalFull)
                            {
                                break;
                            }
                        }
                        if (j < i)
                        {
                            continue;
                        }
                        if (this._manageVar(token, pars, wf.NormalFull, nod, tt.InvariantPrefixLengthOfMorphVars, ref res))
                        {
                            wasVars = true;
                        }
                    }
                }
            }
            else if (token is Pullenti.Ner.NumberToken)
            {
                if (this._manageVar(token, pars, (token as Pullenti.Ner.NumberToken).Value.ToString(), root, 0, ref res))
                {
                    wasVars = true;
                }
            }
            else
            {
                return(null);
            }
            if (!wasVars && s != null && s.Length == 1)
            {
                List <Termin> vars;
                if (m_Hash1.TryGetValue((short)s[0], out vars))
                {
                    foreach (Termin t in vars)
                    {
                        if (!t.Lang.IsUndefined)
                        {
                            if (!token.Morph.Language.IsUndefined)
                            {
                                if (((token.Morph.Language & t.Lang)).IsUndefined)
                                {
                                    continue;
                                }
                            }
                        }
                        TerminToken ar = t.TryParse(tt, TerminParseAttr.No);
                        if (ar == null)
                        {
                            continue;
                        }
                        ar.Termin = t;
                        if (res == null)
                        {
                            res = new List <TerminToken>();
                            res.Add(ar);
                        }
                        else if (ar.TokensCount > res[0].TokensCount)
                        {
                            res.Clear();
                            res.Add(ar);
                        }
                        else if (ar.TokensCount == res[0].TokensCount)
                        {
                            res.Add(ar);
                        }
                    }
                }
            }
            if (res != null)
            {
                int ii  = 0;
                int max = 0;
                for (int i = 0; i < res.Count; i++)
                {
                    if (res[i].LengthChar > max)
                    {
                        max = res[i].LengthChar;
                        ii  = i;
                    }
                }
                if (ii > 0)
                {
                    TerminToken v = res[ii];
                    res.RemoveAt(ii);
                    res.Insert(0, v);
                }
            }
            return(res);
        }
示例#14
0
 public string GetWordform(string word, Pullenti.Morph.MorphClass cla, Pullenti.Morph.MorphGender gender, Pullenti.Morph.MorphCase cas, Pullenti.Morph.MorphNumber num, Pullenti.Morph.MorphLang lang, Pullenti.Morph.MorphWordForm addInfo)
 {
     if (Pullenti.Morph.LanguageHelper.IsCyrillicChar(word[0]))
     {
         if (m_EngineRu.Language.IsRu && lang.IsRu)
         {
             return(m_EngineRu.GetWordform(word, cla, gender, cas, num, addInfo));
         }
         if (m_EngineUa.Language.IsUa && lang.IsUa)
         {
             return(m_EngineUa.GetWordform(word, cla, gender, cas, num, addInfo));
         }
         if (m_EngineBy.Language.IsBy && lang.IsBy)
         {
             return(m_EngineBy.GetWordform(word, cla, gender, cas, num, addInfo));
         }
         if (m_EngineKz.Language.IsKz && lang.IsKz)
         {
             return(m_EngineKz.GetWordform(word, cla, gender, cas, num, addInfo));
         }
         return(m_EngineRu.GetWordform(word, cla, gender, cas, num, addInfo));
     }
     else
     {
         return(m_EngineEn.GetWordform(word, cla, gender, cas, num, addInfo));
     }
 }
示例#15
0
        public List <Pullenti.Morph.MorphToken> Run(string text, bool onlyTokenizing, Pullenti.Morph.MorphLang dlang, bool goodText, ProgressChangedEventHandler progress)
        {
            if (string.IsNullOrEmpty(text))
            {
                return(null);
            }
            TextWrapper twr = new TextWrapper(text, goodText);

            TextWrapper.CharsList            twrch  = twr.Chars;
            List <Pullenti.Morph.MorphToken> res    = new List <Pullenti.Morph.MorphToken>(text.Length / 6);
            Dictionary <string, UniLexWrap>  uniLex = new Dictionary <string, UniLexWrap>();
            int    i;
            int    j;
            string term0        = null;
            int    pureRusWords = 0;
            int    pureUkrWords = 0;
            int    pureByWords  = 0;
            int    pureKzWords  = 0;
            int    totRusWords  = 0;
            int    totUkrWords  = 0;
            int    totByWords   = 0;
            int    totKzWords   = 0;

            for (i = 0; i < twr.Length; i++)
            {
                int ty = this.GetCharTyp(twrch[i]);
                if (ty == 0)
                {
                    continue;
                }
                if (ty > 2)
                {
                    j = i + 1;
                }
                else
                {
                    for (j = i + 1; j < twr.Length; j++)
                    {
                        if (this.GetCharTyp(twrch[j]) != ty)
                        {
                            break;
                        }
                    }
                }
                string wstr = text.Substring(i, j - i);
                string term = null;
                if (goodText)
                {
                    term = wstr;
                }
                else
                {
                    string trstr = Pullenti.Morph.LanguageHelper.TransliteralCorrection(wstr, term0, false);
                    term = Pullenti.Morph.LanguageHelper.CorrectWord(trstr);
                }
                if (string.IsNullOrEmpty(term))
                {
                    i = j - 1;
                    continue;
                }
                Pullenti.Morph.MorphLang lang = Pullenti.Morph.LanguageHelper.GetWordLang(term);
                if (lang == Pullenti.Morph.MorphLang.UA)
                {
                    pureUkrWords++;
                }
                else if (lang == Pullenti.Morph.MorphLang.RU)
                {
                    pureRusWords++;
                }
                else if (lang == Pullenti.Morph.MorphLang.BY)
                {
                    pureByWords++;
                }
                else if (lang == Pullenti.Morph.MorphLang.KZ)
                {
                    pureKzWords++;
                }
                if (((lang & Pullenti.Morph.MorphLang.RU)) != Pullenti.Morph.MorphLang.Unknown)
                {
                    totRusWords++;
                }
                if (((lang & Pullenti.Morph.MorphLang.UA)) != Pullenti.Morph.MorphLang.Unknown)
                {
                    totUkrWords++;
                }
                if (((lang & Pullenti.Morph.MorphLang.BY)) != Pullenti.Morph.MorphLang.Unknown)
                {
                    totByWords++;
                }
                if (((lang & Pullenti.Morph.MorphLang.KZ)) != Pullenti.Morph.MorphLang.Unknown)
                {
                    totKzWords++;
                }
                if (ty == 1)
                {
                    term0 = term;
                }
                UniLexWrap lemmas = null;
                if (ty == 1 && !onlyTokenizing)
                {
                    if (!uniLex.TryGetValue(term, out lemmas))
                    {
                        UniLexWrap nuni = new UniLexWrap(lang);
                        uniLex.Add(term, nuni);
                        lemmas = nuni;
                    }
                }
                Pullenti.Morph.MorphToken tok = new Pullenti.Morph.MorphToken();
                tok.Term      = term;
                tok.BeginChar = i;
                if (i == 733860)
                {
                }
                tok.EndChar = j - 1;
                tok.Tag     = lemmas;
                res.Add(tok);
                i = j - 1;
            }
            Pullenti.Morph.MorphLang defLang = new Pullenti.Morph.MorphLang();
            if (dlang != null)
            {
                defLang.Value = dlang.Value;
            }
            if (pureRusWords > pureUkrWords && pureRusWords > pureByWords && pureRusWords > pureKzWords)
            {
                defLang = Pullenti.Morph.MorphLang.RU;
            }
            else if (totRusWords > totUkrWords && totRusWords > totByWords && totRusWords > totKzWords)
            {
                defLang = Pullenti.Morph.MorphLang.RU;
            }
            else if (pureUkrWords > pureRusWords && pureUkrWords > pureByWords && pureUkrWords > pureKzWords)
            {
                defLang = Pullenti.Morph.MorphLang.UA;
            }
            else if (totUkrWords > totRusWords && totUkrWords > totByWords && totUkrWords > totKzWords)
            {
                defLang = Pullenti.Morph.MorphLang.UA;
            }
            else if (pureKzWords > pureRusWords && pureKzWords > pureUkrWords && pureKzWords > pureByWords)
            {
                defLang = Pullenti.Morph.MorphLang.KZ;
            }
            else if (totKzWords > totRusWords && totKzWords > totUkrWords && totKzWords > totByWords)
            {
                defLang = Pullenti.Morph.MorphLang.KZ;
            }
            else if (pureByWords > pureRusWords && pureByWords > pureUkrWords && pureByWords > pureKzWords)
            {
                defLang = Pullenti.Morph.MorphLang.BY;
            }
            else if (totByWords > totRusWords && totByWords > totUkrWords && totByWords > totKzWords)
            {
                if (totRusWords > 10 && totByWords > (totRusWords + 20))
                {
                    defLang = Pullenti.Morph.MorphLang.BY;
                }
                else if (totRusWords == 0 || totByWords >= (totRusWords * 2))
                {
                    defLang = Pullenti.Morph.MorphLang.BY;
                }
            }
            if (((defLang.IsUndefined || defLang.IsUa)) && totRusWords > 0)
            {
                if (((totUkrWords > totRusWords && m_EngineUa.Language.IsUa)) || ((totByWords > totRusWords && m_EngineBy.Language.IsBy)) || ((totKzWords > totRusWords && m_EngineKz.Language.IsKz)))
                {
                    int cou0 = 0;
                    totRusWords = (totByWords = (totUkrWords = (totKzWords = 0)));
                    foreach (KeyValuePair <string, UniLexWrap> kp in uniLex)
                    {
                        Pullenti.Morph.MorphLang lang = new Pullenti.Morph.MorphLang();
                        kp.Value.WordForms = this.ProcessOneWord(kp.Key, ref lang);
                        if (kp.Value.WordForms != null)
                        {
                            foreach (Pullenti.Morph.MorphWordForm wf in kp.Value.WordForms)
                            {
                                lang |= wf.Language;
                            }
                        }
                        kp.Value.Lang = lang;
                        if (lang.IsRu)
                        {
                            totRusWords++;
                        }
                        if (lang.IsUa)
                        {
                            totUkrWords++;
                        }
                        if (lang.IsBy)
                        {
                            totByWords++;
                        }
                        if (lang.IsKz)
                        {
                            totKzWords++;
                        }
                        if (lang.IsCyrillic)
                        {
                            cou0++;
                        }
                        if (cou0 >= 100)
                        {
                            break;
                        }
                    }
                    if (totRusWords > ((totByWords / 2)) && totRusWords > ((totUkrWords / 2)))
                    {
                        defLang = Pullenti.Morph.MorphLang.RU;
                    }
                    else if (totUkrWords > ((totRusWords / 2)) && totUkrWords > ((totByWords / 2)))
                    {
                        defLang = Pullenti.Morph.MorphLang.UA;
                    }
                    else if (totByWords > ((totRusWords / 2)) && totByWords > ((totUkrWords / 2)))
                    {
                        defLang = Pullenti.Morph.MorphLang.BY;
                    }
                }
                else if (defLang.IsUndefined)
                {
                    defLang = Pullenti.Morph.MorphLang.RU;
                }
            }
            int cou = 0;

            totRusWords = (totByWords = (totUkrWords = (totKzWords = 0)));
            foreach (KeyValuePair <string, UniLexWrap> kp in uniLex)
            {
                Pullenti.Morph.MorphLang lang = defLang;
                if (lang.IsUndefined)
                {
                    if (totRusWords > totByWords && totRusWords > totUkrWords && totRusWords > totKzWords)
                    {
                        lang = Pullenti.Morph.MorphLang.RU;
                    }
                    else if (totUkrWords > totRusWords && totUkrWords > totByWords && totUkrWords > totKzWords)
                    {
                        lang = Pullenti.Morph.MorphLang.UA;
                    }
                    else if (totByWords > totRusWords && totByWords > totUkrWords && totByWords > totKzWords)
                    {
                        lang = Pullenti.Morph.MorphLang.BY;
                    }
                    else if (totKzWords > totRusWords && totKzWords > totUkrWords && totKzWords > totByWords)
                    {
                        lang = Pullenti.Morph.MorphLang.KZ;
                    }
                }
                kp.Value.WordForms = this.ProcessOneWord(kp.Key, ref lang);
                kp.Value.Lang      = lang;
                if (((lang & Pullenti.Morph.MorphLang.RU)) != Pullenti.Morph.MorphLang.Unknown)
                {
                    totRusWords++;
                }
                if (((lang & Pullenti.Morph.MorphLang.UA)) != Pullenti.Morph.MorphLang.Unknown)
                {
                    totUkrWords++;
                }
                if (((lang & Pullenti.Morph.MorphLang.BY)) != Pullenti.Morph.MorphLang.Unknown)
                {
                    totByWords++;
                }
                if (((lang & Pullenti.Morph.MorphLang.KZ)) != Pullenti.Morph.MorphLang.Unknown)
                {
                    totKzWords++;
                }
                if (progress != null)
                {
                    this.OnProgress(cou, uniLex.Count, progress);
                }
                cou++;
            }
            List <Pullenti.Morph.MorphWordForm> emptyList = null;

            foreach (Pullenti.Morph.MorphToken r in res)
            {
                UniLexWrap uni = r.Tag as UniLexWrap;
                r.Tag = null;
                if (uni == null || uni.WordForms == null || uni.WordForms.Count == 0)
                {
                    if (emptyList == null)
                    {
                        emptyList = new List <Pullenti.Morph.MorphWordForm>();
                    }
                    r.WordForms = emptyList;
                    if (uni != null)
                    {
                        r.Language = uni.Lang;
                    }
                }
                else
                {
                    r.WordForms = uni.WordForms;
                }
            }
            if (!goodText)
            {
                for (i = 0; i < (res.Count - 2); i++)
                {
                    UnicodeInfo ui0 = twrch[res[i].BeginChar];
                    UnicodeInfo ui1 = twrch[res[i + 1].BeginChar];
                    UnicodeInfo ui2 = twrch[res[i + 2].BeginChar];
                    if (ui1.IsQuot)
                    {
                        int p = res[i + 1].BeginChar;
                        if ((p >= 2 && "БбТт".IndexOf(text[p - 1]) >= 0 && ((p + 3) < text.Length)) && "ЕеЯяЁё".IndexOf(text[p + 1]) >= 0)
                        {
                            string wstr = Pullenti.Morph.LanguageHelper.TransliteralCorrection(Pullenti.Morph.LanguageHelper.CorrectWord(string.Format("{0}Ъ{1}", res[i].GetSourceText(text), res[i + 2].GetSourceText(text))), null, false);
                            List <Pullenti.Morph.MorphWordForm> li = this.ProcessOneWord0(wstr);
                            if (li != null && li.Count > 0 && li[0].IsInDictionary)
                            {
                                res[i].EndChar   = res[i + 2].EndChar;
                                res[i].Term      = wstr;
                                res[i].WordForms = li;
                                res.RemoveRange(i + 1, 2);
                            }
                        }
                        else if ((ui1.IsApos && p > 0 && char.IsLetter(text[p - 1])) && ((p + 1) < text.Length) && char.IsLetter(text[p + 1]))
                        {
                            if (defLang == Pullenti.Morph.MorphLang.UA || ((res[i].Language & Pullenti.Morph.MorphLang.UA)) != Pullenti.Morph.MorphLang.Unknown || ((res[i + 2].Language & Pullenti.Morph.MorphLang.UA)) != Pullenti.Morph.MorphLang.Unknown)
                            {
                                string wstr = Pullenti.Morph.LanguageHelper.TransliteralCorrection(Pullenti.Morph.LanguageHelper.CorrectWord(string.Format("{0}{1}", res[i].GetSourceText(text), res[i + 2].GetSourceText(text))), null, false);
                                List <Pullenti.Morph.MorphWordForm> li = this.ProcessOneWord0(wstr);
                                bool okk = true;
                                if (okk)
                                {
                                    res[i].EndChar = res[i + 2].EndChar;
                                    res[i].Term    = wstr;
                                    if (li == null)
                                    {
                                        li = new List <Pullenti.Morph.MorphWordForm>();
                                    }
                                    if (li != null && li.Count > 0)
                                    {
                                        res[i].Language = li[0].Language;
                                    }
                                    res[i].WordForms = li;
                                    res.RemoveRange(i + 1, 2);
                                }
                            }
                        }
                    }
                    else if (((ui1.UniChar == '3' || ui1.UniChar == '4')) && res[i + 1].Length == 1)
                    {
                        string src = (ui1.UniChar == '3' ? "З" : "Ч");
                        int    i0  = i + 1;
                        if ((res[i].EndChar + 1) == res[i + 1].BeginChar && ui0.IsCyrillic)
                        {
                            i0--;
                            src = res[i0].GetSourceText(text) + src;
                        }
                        int i1 = i + 1;
                        if ((res[i + 1].EndChar + 1) == res[i + 2].BeginChar && ui2.IsCyrillic)
                        {
                            i1++;
                            src += res[i1].GetSourceText(text);
                        }
                        if (src.Length > 2)
                        {
                            string wstr = Pullenti.Morph.LanguageHelper.TransliteralCorrection(Pullenti.Morph.LanguageHelper.CorrectWord(src), null, false);
                            List <Pullenti.Morph.MorphWordForm> li = this.ProcessOneWord0(wstr);
                            if (li != null && li.Count > 0 && li[0].IsInDictionary)
                            {
                                res[i0].EndChar   = res[i1].EndChar;
                                res[i0].Term      = wstr;
                                res[i0].WordForms = li;
                                res.RemoveRange(i0 + 1, i1 - i0);
                            }
                        }
                    }
                    else if ((ui1.IsHiphen && ui0.IsLetter && ui2.IsLetter) && res[i].EndChar > res[i].BeginChar && res[i + 2].EndChar > res[i + 2].BeginChar)
                    {
                        bool newline = false;
                        int  sps     = 0;
                        for (j = res[i + 1].EndChar + 1; j < res[i + 2].BeginChar; j++)
                        {
                            if (text[j] == '\r' || text[j] == '\n')
                            {
                                newline = true;
                                sps++;
                            }
                            else if (!char.IsWhiteSpace(text[j]))
                            {
                                break;
                            }
                            else
                            {
                                sps++;
                            }
                        }
                        string fullWord = Pullenti.Morph.LanguageHelper.CorrectWord(res[i].GetSourceText(text) + res[i + 2].GetSourceText(text));
                        if (!newline)
                        {
                            if (uniLex.ContainsKey(fullWord) || fullWord == "ИЗЗА")
                            {
                                newline = true;
                            }
                            else if (text[res[i + 1].BeginChar] == ((char)0x00AD))
                            {
                                newline = true;
                            }
                            else if (Pullenti.Morph.LanguageHelper.EndsWithEx(res[i].GetSourceText(text), "О", "о", null, null) && res[i + 2].WordForms.Count > 0 && res[i + 2].WordForms[0].IsInDictionary)
                            {
                                if (text[res[i + 1].BeginChar] == '¬')
                                {
                                    List <Pullenti.Morph.MorphWordForm> li = this.ProcessOneWord0(fullWord);
                                    if (li != null && li.Count > 0 && li[0].IsInDictionary)
                                    {
                                        newline = true;
                                    }
                                }
                            }
                            else if ((res[i].EndChar + 2) == res[i + 2].BeginChar)
                            {
                                if (!char.IsUpper(text[res[i + 2].BeginChar]) && (sps < 2) && fullWord.Length > 4)
                                {
                                    newline = true;
                                    if ((i + 3) < res.Count)
                                    {
                                        UnicodeInfo ui3 = twrch[res[i + 3].BeginChar];
                                        if (ui3.IsHiphen)
                                        {
                                            newline = false;
                                        }
                                    }
                                }
                            }
                            else if (((res[i].EndChar + 1) == res[i + 1].BeginChar && sps > 0 && (sps < 3)) && fullWord.Length > 4)
                            {
                                newline = true;
                            }
                        }
                        if (newline)
                        {
                            List <Pullenti.Morph.MorphWordForm> li = this.ProcessOneWord0(fullWord);
                            if (li != null && li.Count > 0 && ((li[0].IsInDictionary || uniLex.ContainsKey(fullWord))))
                            {
                                res[i].EndChar   = res[i + 2].EndChar;
                                res[i].Term      = fullWord;
                                res[i].WordForms = li;
                                res.RemoveRange(i + 1, 2);
                            }
                        }
                        else
                        {
                        }
                    }
                    else if ((ui1.IsLetter && ui0.IsLetter && res[i].Length > 2) && res[i + 1].Length > 1)
                    {
                        if (ui0.IsUpper != ui1.IsUpper)
                        {
                            continue;
                        }
                        if (!ui0.IsCyrillic || !ui1.IsCyrillic)
                        {
                            continue;
                        }
                        bool newline = false;
                        for (j = res[i].EndChar + 1; j < res[i + 1].BeginChar; j++)
                        {
                            if (twrch[j].Code == 0xD || twrch[j].Code == 0xA)
                            {
                                newline = true;
                                break;
                            }
                        }
                        if (!newline)
                        {
                            continue;
                        }
                        string fullWord = Pullenti.Morph.LanguageHelper.CorrectWord(res[i].GetSourceText(text) + res[i + 1].GetSourceText(text));
                        if (!uniLex.ContainsKey(fullWord))
                        {
                            continue;
                        }
                        List <Pullenti.Morph.MorphWordForm> li = this.ProcessOneWord0(fullWord);
                        if (li != null && li.Count > 0 && li[0].IsInDictionary)
                        {
                            res[i].EndChar   = res[i + 1].EndChar;
                            res[i].Term      = fullWord;
                            res[i].WordForms = li;
                            res.RemoveAt(i + 1);
                        }
                    }
                }
            }
            for (i = 0; i < res.Count; i++)
            {
                Pullenti.Morph.MorphToken mt = res[i];
                mt.CharInfo = new Pullenti.Morph.CharsInfo();
                UnicodeInfo ui0  = twrch[mt.BeginChar];
                UnicodeInfo ui00 = UnicodeInfo.AllChars[(int)(mt.Term[0])];
                for (j = mt.BeginChar + 1; j <= mt.EndChar; j++)
                {
                    if (ui0.IsLetter)
                    {
                        break;
                    }
                    ui0 = twrch[j];
                }
                if (ui0.IsLetter)
                {
                    mt.CharInfo.IsLetter = true;
                    if (ui00.IsLatin)
                    {
                        mt.CharInfo.IsLatinLetter = true;
                    }
                    else if (ui00.IsCyrillic)
                    {
                        mt.CharInfo.IsCyrillicLetter = true;
                    }
                    if (mt.Language == Pullenti.Morph.MorphLang.Unknown)
                    {
                        if (Pullenti.Morph.LanguageHelper.IsCyrillic(mt.Term))
                        {
                            mt.Language = (defLang.IsUndefined ? Pullenti.Morph.MorphLang.RU : defLang);
                        }
                    }
                    if (goodText)
                    {
                        continue;
                    }
                    bool allUp = true;
                    bool allLo = true;
                    for (j = mt.BeginChar; j <= mt.EndChar; j++)
                    {
                        if (twrch[j].IsUpper || twrch[j].IsDigit)
                        {
                            allLo = false;
                        }
                        else
                        {
                            allUp = false;
                        }
                    }
                    if (allUp)
                    {
                        mt.CharInfo.IsAllUpper = true;
                    }
                    else if (allLo)
                    {
                        mt.CharInfo.IsAllLower = true;
                    }
                    else if (((ui0.IsUpper || twrch[mt.BeginChar].IsDigit)) && mt.EndChar > mt.BeginChar)
                    {
                        allLo = true;
                        for (j = mt.BeginChar + 1; j <= mt.EndChar; j++)
                        {
                            if (twrch[j].IsUpper || twrch[j].IsDigit)
                            {
                                allLo = false;
                                break;
                            }
                        }
                        if (allLo)
                        {
                            mt.CharInfo.IsCapitalUpper = true;
                        }
                        else if (twrch[mt.EndChar].IsLower && (mt.EndChar - mt.BeginChar) > 1)
                        {
                            allUp = true;
                            for (j = mt.BeginChar; j < mt.EndChar; j++)
                            {
                                if (twrch[j].IsLower)
                                {
                                    allUp = false;
                                    break;
                                }
                            }
                            if (allUp)
                            {
                                mt.CharInfo.IsLastLower = true;
                            }
                        }
                    }
                }
                if (mt.CharInfo.IsLastLower && mt.Length > 2 && mt.CharInfo.IsCyrillicLetter)
                {
                    string pref = text.Substring(mt.BeginChar, mt.EndChar - mt.BeginChar);
                    bool   ok   = false;
                    foreach (Pullenti.Morph.MorphWordForm wf in mt.WordForms)
                    {
                        if (wf.NormalCase == pref || wf.NormalFull == pref)
                        {
                            ok = true;
                            break;
                        }
                    }
                    if (!ok)
                    {
                        Pullenti.Morph.MorphWordForm wf0 = new Pullenti.Morph.MorphWordForm()
                        {
                            NormalCase = pref, Class = Pullenti.Morph.MorphClass.Noun, UndefCoef = 1
                        };
                        mt.WordForms = new List <Pullenti.Morph.MorphWordForm>(mt.WordForms);
                        mt.WordForms.Insert(0, wf0);
                    }
                }
            }
            if (goodText || onlyTokenizing)
            {
                return(res);
            }
            for (i = 0; i < res.Count; i++)
            {
                if (res[i].Length == 1 && res[i].CharInfo.IsLatinLetter)
                {
                    char ch = res[i].Term[0];
                    if (ch == 'C' || ch == 'A' || ch == 'P')
                    {
                    }
                    else
                    {
                        continue;
                    }
                    bool isRus = false;
                    for (int ii = i - 1; ii >= 0; ii--)
                    {
                        if ((res[ii].EndChar + 1) != res[ii + 1].BeginChar)
                        {
                            break;
                        }
                        else if (res[ii].CharInfo.IsLetter)
                        {
                            isRus = res[ii].CharInfo.IsCyrillicLetter;
                            break;
                        }
                    }
                    if (!isRus)
                    {
                        for (int ii = i + 1; ii < res.Count; ii++)
                        {
                            if ((res[ii - 1].EndChar + 1) != res[ii].BeginChar)
                            {
                                break;
                            }
                            else if (res[ii].CharInfo.IsLetter)
                            {
                                isRus = res[ii].CharInfo.IsCyrillicLetter;
                                break;
                            }
                        }
                    }
                    if (isRus)
                    {
                        res[i].Term = Pullenti.Morph.LanguageHelper.TransliteralCorrection(res[i].Term, null, true);
                        res[i].CharInfo.IsCyrillicLetter = true;
                        res[i].CharInfo.IsLatinLetter    = true;
                    }
                }
            }
            foreach (Pullenti.Morph.MorphToken r in res)
            {
                if (r.CharInfo.IsAllUpper || r.CharInfo.IsCapitalUpper)
                {
                    if (r.Language.IsCyrillic)
                    {
                        bool ok = false;
                        foreach (Pullenti.Morph.MorphWordForm wf in r.WordForms)
                        {
                            if (wf.Class.IsProperSurname)
                            {
                                ok = true;
                                break;
                            }
                        }
                        if (!ok)
                        {
                            r.WordForms = new List <Pullenti.Morph.MorphWordForm>(r.WordForms);
                            m_EngineRu.ProcessSurnameVariants(r.Term, r.WordForms);
                        }
                    }
                }
            }
            foreach (Pullenti.Morph.MorphToken r in res)
            {
                foreach (Pullenti.Morph.MorphWordForm mv in r.WordForms)
                {
                    if (mv.NormalCase == null)
                    {
                        mv.NormalCase = r.Term;
                    }
                }
            }
            for (i = 0; i < (res.Count - 2); i++)
            {
                if (res[i].CharInfo.IsLatinLetter && res[i].CharInfo.IsAllUpper && res[i].Length == 1)
                {
                    if (twrch[res[i + 1].BeginChar].IsQuot && res[i + 2].CharInfo.IsLatinLetter && res[i + 2].Length > 2)
                    {
                        if ((res[i].EndChar + 1) == res[i + 1].BeginChar && (res[i + 1].EndChar + 1) == res[i + 2].BeginChar)
                        {
                            string wstr = string.Format("{0}{1}", res[i].Term, res[i + 2].Term);
                            List <Pullenti.Morph.MorphWordForm> li = this.ProcessOneWord0(wstr);
                            if (li != null)
                            {
                                res[i].WordForms = li;
                            }
                            res[i].EndChar = res[i + 2].EndChar;
                            res[i].Term    = wstr;
                            if (res[i + 2].CharInfo.IsAllLower)
                            {
                                res[i].CharInfo.IsAllUpper     = false;
                                res[i].CharInfo.IsCapitalUpper = true;
                            }
                            else if (!res[i + 2].CharInfo.IsAllUpper)
                            {
                                res[i].CharInfo.IsAllUpper = false;
                            }
                            res.RemoveRange(i + 1, 2);
                        }
                    }
                }
            }
            for (i = 0; i < (res.Count - 1); i++)
            {
                if (!res[i].CharInfo.IsLetter && !res[i + 1].CharInfo.IsLetter && (res[i].EndChar + 1) == res[i + 1].BeginChar)
                {
                    if (twrch[res[i].BeginChar].IsHiphen && twrch[res[i + 1].BeginChar].IsHiphen)
                    {
                        if (i == 0 || !twrch[res[i - 1].BeginChar].IsHiphen)
                        {
                        }
                        else
                        {
                            continue;
                        }
                        if ((i + 2) == res.Count || !twrch[res[i + 2].BeginChar].IsHiphen)
                        {
                        }
                        else
                        {
                            continue;
                        }
                        res[i].EndChar = res[i + 1].EndChar;
                        res.RemoveAt(i + 1);
                    }
                }
            }
            return(res);
        }
示例#16
0
        public static NounPhraseItem TryParse(Pullenti.Ner.Token t, List <NounPhraseItem> items, Pullenti.Ner.Core.NounPhraseParseAttr attrs)
        {
            if (t == null)
            {
                return(null);
            }
            Pullenti.Ner.Token t0 = t;
            bool _canBeSurname    = false;
            bool _isDoubtAdj      = false;

            Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken;
            if (rt != null && rt.BeginToken == rt.EndToken && (rt.BeginToken is Pullenti.Ner.TextToken))
            {
                NounPhraseItem res = TryParse(rt.BeginToken, items, attrs);
                if (res != null)
                {
                    res.BeginToken = (res.EndToken = t);
                    res.CanBeNoun  = true;
                    return(res);
                }
            }
            if (rt != null)
            {
                NounPhraseItem res = new NounPhraseItem(t, t);
                foreach (Pullenti.Morph.MorphBaseInfo m in t.Morph.Items)
                {
                    NounPhraseItemTextVar v = new NounPhraseItemTextVar(m, null);
                    v.NormalValue = t.GetReferent().ToString();
                    res.NounMorph.Add(v);
                }
                res.CanBeNoun = true;
                return(res);
            }
            if (t is Pullenti.Ner.NumberToken)
            {
            }
            bool hasLegalVerb = false;

            if (t is Pullenti.Ner.TextToken)
            {
                if (!t.Chars.IsLetter)
                {
                    return(null);
                }
                string str = (t as Pullenti.Ner.TextToken).Term;
                if (str[str.Length - 1] == 'А' || str[str.Length - 1] == 'О')
                {
                    foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items)
                    {
                        if ((wf is Pullenti.Morph.MorphWordForm) && (wf as Pullenti.Morph.MorphWordForm).IsInDictionary)
                        {
                            if (wf.Class.IsVerb)
                            {
                                Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                                if (!mc.IsNoun && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples)) == Pullenti.Ner.Core.NounPhraseParseAttr.No)
                                {
                                    if (!Pullenti.Morph.LanguageHelper.EndsWithEx(str, "ОГО", "ЕГО", null, null))
                                    {
                                        return(null);
                                    }
                                }
                                hasLegalVerb = true;
                            }
                            if (wf.Class.IsAdverb)
                            {
                                if (t.Next == null || !t.Next.IsHiphen)
                                {
                                    if ((str == "ВСЕГО" || str == "ДОМА" || str == "НЕСКОЛЬКО") || str == "МНОГО" || str == "ПОРЯДКА")
                                    {
                                    }
                                    else
                                    {
                                        return(null);
                                    }
                                }
                            }
                            if (wf.Class.IsAdjective)
                            {
                                if (wf.ContainsAttr("к.ф.", null))
                                {
                                    if (t.GetMorphClassInDictionary() == Pullenti.Morph.MorphClass.Adjective)
                                    {
                                    }
                                    else
                                    {
                                        _isDoubtAdj = true;
                                    }
                                }
                            }
                        }
                    }
                }
                Pullenti.Morph.MorphClass mc0 = t.Morph.Class;
                if (mc0.IsProperSurname && !t.Chars.IsAllLower)
                {
                    foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items)
                    {
                        if (wf.Class.IsProperSurname && wf.Number != Pullenti.Morph.MorphNumber.Plural)
                        {
                            Pullenti.Morph.MorphWordForm wff = wf as Pullenti.Morph.MorphWordForm;
                            if (wff == null)
                            {
                                continue;
                            }
                            string s = ((wff.NormalFull ?? wff.NormalCase)) ?? "";
                            if (Pullenti.Morph.LanguageHelper.EndsWithEx(s, "ИН", "ЕН", "ЫН", null))
                            {
                                if (!wff.IsInDictionary)
                                {
                                    _canBeSurname = true;
                                }
                                else
                                {
                                    return(null);
                                }
                            }
                            if (wff.IsInDictionary && Pullenti.Morph.LanguageHelper.EndsWith(s, "ОВ"))
                            {
                                _canBeSurname = true;
                            }
                        }
                    }
                }
                if (mc0.IsProperName && !t.Chars.IsAllLower)
                {
                    foreach (Pullenti.Morph.MorphBaseInfo wff in t.Morph.Items)
                    {
                        Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm;
                        if (wf == null)
                        {
                            continue;
                        }
                        if (wf.NormalCase == "ГОР")
                        {
                            continue;
                        }
                        if (wf.Class.IsProperName && wf.IsInDictionary)
                        {
                            if (wf.NormalCase == null || !wf.NormalCase.StartsWith("ЛЮБ"))
                            {
                                if (mc0.IsAdjective && t.Morph.ContainsAttr("неизм.", null))
                                {
                                }
                                else if (((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun)) == Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun)
                                {
                                }
                                else
                                {
                                    if (items == null || (items.Count < 1))
                                    {
                                        return(null);
                                    }
                                    if (!items[0].IsStdAdjective)
                                    {
                                        return(null);
                                    }
                                }
                            }
                        }
                    }
                }
                if (mc0.IsAdjective && t.Morph.ItemsCount == 1)
                {
                    if (t.Morph[0].ContainsAttr("в.ср.ст.", null))
                    {
                        return(null);
                    }
                }
                Pullenti.Morph.MorphClass mc1 = t.GetMorphClassInDictionary();
                if (mc1 == Pullenti.Morph.MorphClass.Verb && t.Morph.Case.IsUndefined)
                {
                    return(null);
                }
                if ((((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples)) == Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreParticiples && t.Morph.Class.IsVerb && !t.Morph.Class.IsNoun) && !t.Morph.Class.IsProper)
                {
                    foreach (Pullenti.Morph.MorphBaseInfo wf in t.Morph.Items)
                    {
                        if (wf.Class.IsVerb)
                        {
                            if (wf.ContainsAttr("дейст.з.", null))
                            {
                                if (Pullenti.Morph.LanguageHelper.EndsWith((t as Pullenti.Ner.TextToken).Term, "СЯ"))
                                {
                                }
                                else
                                {
                                    return(null);
                                }
                            }
                        }
                    }
                }
            }
            Pullenti.Ner.Token t1 = null;
            for (int k = 0; k < 2; k++)
            {
                t = t1 ?? t0;
                if (k == 0)
                {
                    if (((t0 is Pullenti.Ner.TextToken) && t0.Next != null && t0.Next.IsHiphen) && t0.Next.Next != null)
                    {
                        if (!t0.IsWhitespaceAfter && !t0.Morph.Class.IsPronoun && !(t0.Next.Next is Pullenti.Ner.NumberToken))
                        {
                            if (!t0.Next.IsWhitespaceAfter)
                            {
                                t = t0.Next.Next;
                            }
                            else if (t0.Next.Next.Chars.IsAllLower && Pullenti.Morph.LanguageHelper.EndsWith((t0 as Pullenti.Ner.TextToken).Term, "О"))
                            {
                                t = t0.Next.Next;
                            }
                        }
                    }
                }
                NounPhraseItem it = new NounPhraseItem(t0, t)
                {
                    CanBeSurname = _canBeSurname
                };
                if (t0 == t && (t0 is Pullenti.Ner.ReferentToken))
                {
                    it.CanBeNoun = true;
                    it.Morph     = new Pullenti.Ner.MorphCollection(t0.Morph);
                }
                bool canBePrepos = false;
                foreach (Pullenti.Morph.MorphBaseInfo v in t.Morph.Items)
                {
                    Pullenti.Morph.MorphWordForm wf = v as Pullenti.Morph.MorphWordForm;
                    if (v.Class.IsVerb && !v.Case.IsUndefined)
                    {
                        it.CanBeAdj = true;
                        it.AdjMorph.Add(new NounPhraseItemTextVar(v, t));
                        continue;
                    }
                    if (v.Class.IsPreposition)
                    {
                        canBePrepos = true;
                    }
                    if (v.Class.IsAdjective || ((v.Class.IsPronoun && !v.Class.IsPersonalPronoun && !v.ContainsAttr("неизм.", null))) || ((v.Class.IsNoun && (t is Pullenti.Ner.NumberToken))))
                    {
                        if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, false))
                        {
                            bool isDoub = false;
                            if (v.ContainsAttr("к.ф.", null))
                            {
                                continue;
                            }
                            if (v.ContainsAttr("собир.", null) && !(t is Pullenti.Ner.NumberToken))
                            {
                                if (wf != null && wf.IsInDictionary)
                                {
                                    return(null);
                                }
                                continue;
                            }
                            if (v.ContainsAttr("сравн.", null))
                            {
                                continue;
                            }
                            bool ok = true;
                            if (t is Pullenti.Ner.TextToken)
                            {
                                string s = (t as Pullenti.Ner.TextToken).Term;
                                if (s == "ПРАВО" || s == "ПРАВА")
                                {
                                    ok = false;
                                }
                                else if (Pullenti.Morph.LanguageHelper.EndsWith(s, "ОВ") && t.GetMorphClassInDictionary().IsNoun)
                                {
                                    ok = false;
                                }
                            }
                            else if (t is Pullenti.Ner.NumberToken)
                            {
                                if (v.Class.IsNoun && t.Morph.Class.IsAdjective)
                                {
                                    ok = false;
                                }
                                else if (t.Morph.Class.IsNoun && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective)) == Pullenti.Ner.Core.NounPhraseParseAttr.No)
                                {
                                    ok = false;
                                }
                            }
                            if (ok)
                            {
                                it.AdjMorph.Add(new NounPhraseItemTextVar(v, t));
                                it.CanBeAdj = true;
                                if (_isDoubtAdj && t0 == t)
                                {
                                    it.IsDoubtAdjective = true;
                                }
                                if (hasLegalVerb && wf != null && wf.IsInDictionary)
                                {
                                    it.CanBeNoun = true;
                                }
                                if (wf != null && wf.Class.IsPronoun)
                                {
                                    it.CanBeNoun = true;
                                    it.NounMorph.Add(new NounPhraseItemTextVar(v, t));
                                }
                            }
                        }
                    }
                    bool canBeNoun = false;
                    if (t is Pullenti.Ner.NumberToken)
                    {
                    }
                    else if (v.Class.IsNoun || ((wf != null && wf.NormalCase == "САМ")))
                    {
                        canBeNoun = true;
                    }
                    else if (v.Class.IsPersonalPronoun)
                    {
                        if (items == null || items.Count == 0)
                        {
                            canBeNoun = true;
                        }
                        else
                        {
                            foreach (NounPhraseItem it1 in items)
                            {
                                if (it1.IsVerb)
                                {
                                    if (items.Count == 1 && !v.Case.IsNominative)
                                    {
                                        canBeNoun = true;
                                    }
                                    else
                                    {
                                        return(null);
                                    }
                                }
                            }
                            if (items.Count == 1)
                            {
                                if (items[0].CanBeAdjForPersonalPronoun)
                                {
                                    canBeNoun = true;
                                }
                            }
                        }
                    }
                    else if ((v.Class.IsPronoun && ((items == null || items.Count == 0 || ((items.Count == 1 && items[0].CanBeAdjForPersonalPronoun)))) && wf != null) && (((((wf.NormalCase == "ТОТ" || wf.NormalFull == "ТО" || wf.NormalCase == "ТО") || wf.NormalCase == "ЭТО" || wf.NormalCase == "ВСЕ") || wf.NormalCase == "ЧТО" || wf.NormalCase == "КТО") || wf.NormalFull == "КОТОРЫЙ" || wf.NormalCase == "КОТОРЫЙ")))
                    {
                        if (wf.NormalCase == "ВСЕ")
                        {
                            if (t.Next != null && t.Next.IsValue("РАВНО", null))
                            {
                                return(null);
                            }
                        }
                        canBeNoun = true;
                    }
                    else if (wf != null && ((wf.NormalFull ?? wf.NormalCase)) == "КОТОРЫЙ" && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns)) == Pullenti.Ner.Core.NounPhraseParseAttr.No)
                    {
                        return(null);
                    }
                    else if (v.Class.IsProper && (t is Pullenti.Ner.TextToken))
                    {
                        if (t.LengthChar > 4 || v.Class.IsProperName)
                        {
                            canBeNoun = true;
                        }
                    }
                    if (canBeNoun)
                    {
                        bool added = false;
                        if (items != null && items.Count > 1 && ((attrs & Pullenti.Ner.Core.NounPhraseParseAttr.MultiNouns)) != Pullenti.Ner.Core.NounPhraseParseAttr.No)
                        {
                            bool ok1 = true;
                            for (int ii = 1; ii < items.Count; ii++)
                            {
                                if (!items[ii].ConjBefore)
                                {
                                    ok1 = false;
                                    break;
                                }
                            }
                            if (ok1)
                            {
                                if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, true))
                                {
                                    it.NounMorph.Add(new NounPhraseItemTextVar(v, t));
                                    it.CanBeNoun  = true;
                                    it.MultiNouns = true;
                                    added         = true;
                                }
                            }
                        }
                        if (!added)
                        {
                            if (TryAccordVariant(items, (items == null ? 0 : items.Count), v, false))
                            {
                                it.NounMorph.Add(new NounPhraseItemTextVar(v, t));
                                it.CanBeNoun = true;
                                if (v.Class.IsPersonalPronoun && t.Morph.ContainsAttr("неизм.", null) && !it.CanBeAdj)
                                {
                                    NounPhraseItemTextVar itt = new NounPhraseItemTextVar(v, t);
                                    itt.Case   = Pullenti.Morph.MorphCase.AllCases;
                                    itt.Number = Pullenti.Morph.MorphNumber.Undefined;
                                    if (itt.NormalValue == null)
                                    {
                                    }
                                    it.AdjMorph.Add(itt);
                                    it.CanBeAdj = true;
                                }
                            }
                            else if ((items.Count > 0 && items[0].AdjMorph.Count > 0 && items[0].AdjMorph[0].Number == Pullenti.Morph.MorphNumber.Plural) && !((items[0].AdjMorph[0].Case & v.Case)).IsUndefined && !items[0].AdjMorph[0].Class.IsVerb)
                            {
                                if (t.Next != null && t.Next.IsCommaAnd && (t.Next.Next is Pullenti.Ner.TextToken))
                                {
                                    Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next.Next, attrs, 0, null);
                                    if (npt2 != null && npt2.Preposition == null && !((npt2.Morph.Case & v.Case & items[0].AdjMorph[0].Case)).IsUndefined)
                                    {
                                        it.NounMorph.Add(new NounPhraseItemTextVar(v, t));
                                        it.CanBeNoun = true;
                                    }
                                }
                            }
                        }
                    }
                }
                if (t0 != t)
                {
                    foreach (NounPhraseItemTextVar v in it.AdjMorph)
                    {
                        v.CorrectPrefix(t0 as Pullenti.Ner.TextToken, false);
                    }
                    foreach (NounPhraseItemTextVar v in it.NounMorph)
                    {
                        v.CorrectPrefix(t0 as Pullenti.Ner.TextToken, true);
                    }
                }
                if (k == 1 && it.CanBeNoun && !it.CanBeAdj)
                {
                    if (t1 != null)
                    {
                        it.EndToken = t1;
                    }
                    else
                    {
                        it.EndToken = t0.Next.Next;
                    }
                    foreach (NounPhraseItemTextVar v in it.NounMorph)
                    {
                        if (v.NormalValue != null && (v.NormalValue.IndexOf('-') < 0))
                        {
                            v.NormalValue = string.Format("{0}-{1}", v.NormalValue, it.EndToken.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false));
                        }
                    }
                }
                if (it.CanBeAdj)
                {
                    if (m_StdAdjectives.TryParse(it.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No) != null)
                    {
                        it.IsStdAdjective = true;
                    }
                }
                if (canBePrepos && it.CanBeNoun)
                {
                    if (items != null && items.Count > 0)
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns | Pullenti.Ner.Core.NounPhraseParseAttr.ParseVerbs, 0, null);
                        if (npt1 != null && npt1.EndChar > t.EndChar)
                        {
                            return(null);
                        }
                    }
                    else
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns | Pullenti.Ner.Core.NounPhraseParseAttr.ParseVerbs, 0, null);
                        if (npt1 != null)
                        {
                            Pullenti.Morph.MorphCase mc = Pullenti.Morph.LanguageHelper.GetCaseAfterPreposition((t as Pullenti.Ner.TextToken).Lemma);
                            if (!((mc & npt1.Morph.Case)).IsUndefined)
                            {
                                return(null);
                            }
                        }
                    }
                }
                if (it.CanBeNoun || it.CanBeAdj || k == 1)
                {
                    if (it.BeginToken.Morph.Class.IsPronoun)
                    {
                        Pullenti.Ner.Token tt2 = it.EndToken.Next;
                        if ((tt2 != null && tt2.IsHiphen && !tt2.IsWhitespaceAfter) && !tt2.IsWhitespaceBefore)
                        {
                            tt2 = tt2.Next;
                        }
                        if (tt2 is Pullenti.Ner.TextToken)
                        {
                            string ss = (tt2 as Pullenti.Ner.TextToken).Term;
                            if ((ss == "ЖЕ" || ss == "БЫ" || ss == "ЛИ") || ss == "Ж")
                            {
                                it.EndToken = tt2;
                            }
                            else if (ss == "НИБУДЬ" || ss == "ЛИБО" || (((ss == "ТО" && tt2.Previous.IsHiphen)) && it.CanBeAdj))
                            {
                                it.EndToken = tt2;
                                foreach (NounPhraseItemTextVar m in it.AdjMorph)
                                {
                                    m.NormalValue = string.Format("{0}-{1}", m.NormalValue, ss);
                                    if (m.SingleNumberValue != null)
                                    {
                                        m.SingleNumberValue = string.Format("{0}-{1}", m.SingleNumberValue, ss);
                                    }
                                }
                            }
                        }
                    }
                    return(it);
                }
                if (t0 == t)
                {
                    if (t0.IsValue("БИЗНЕС", null) && t0.Next != null && t0.Next.Chars == t0.Chars)
                    {
                        t1 = t0.Next;
                        continue;
                    }
                    return(it);
                }
            }
            return(null);
        }
示例#17
0
        public override string GetNormalCaseText(Pullenti.Morph.MorphClass mc = null, Pullenti.Morph.MorphNumber num = Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender gender = Pullenti.Morph.MorphGender.Undefined, bool keepChars = false)
        {
            bool empty = true;

            if (mc != null && mc.IsPreposition)
            {
                return(Pullenti.Morph.LanguageHelper.NormalizePreposition(Term));
            }
            foreach (Pullenti.Morph.MorphBaseInfo it in Morph.Items)
            {
                if (mc != null && !mc.IsUndefined)
                {
                    Pullenti.Morph.MorphClass cc = it.Class & mc;
                    if (cc.IsUndefined)
                    {
                        continue;
                    }
                    if (cc.IsMisc && !cc.IsProper && mc != it.Class)
                    {
                        continue;
                    }
                }
                Pullenti.Morph.MorphWordForm wf = it as Pullenti.Morph.MorphWordForm;
                bool normalFull = false;
                if (gender != Pullenti.Morph.MorphGender.Undefined)
                {
                    if (((it.Gender & gender)) == Pullenti.Morph.MorphGender.Undefined)
                    {
                        if ((gender == Pullenti.Morph.MorphGender.Masculine && ((it.Gender != Pullenti.Morph.MorphGender.Undefined || it.Number == Pullenti.Morph.MorphNumber.Plural)) && wf != null) && wf.NormalFull != null)
                        {
                            normalFull = true;
                        }
                        else if (gender == Pullenti.Morph.MorphGender.Masculine && it.Class.IsPersonalPronoun)
                        {
                        }
                        else
                        {
                            continue;
                        }
                    }
                }
                if (!it.Case.IsUndefined)
                {
                    empty = false;
                }
                if (wf != null)
                {
                    string res;
                    if (num == Pullenti.Morph.MorphNumber.Singular && it.Number == Pullenti.Morph.MorphNumber.Plural && wf.NormalFull != null)
                    {
                        int le = wf.NormalCase.Length;
                        if ((le == (wf.NormalFull.Length + 2) && le > 4 && wf.NormalCase[le - 2] == 'С') && wf.NormalCase[le - 1] == 'Я')
                        {
                            res = wf.NormalCase;
                        }
                        else
                        {
                            res = (normalFull ? wf.NormalFull : wf.NormalFull);
                        }
                    }
                    else
                    {
                        res = (normalFull ? wf.NormalFull : (wf.NormalCase ?? Term));
                    }
                    if (num == Pullenti.Morph.MorphNumber.Singular && mc != null && mc == Pullenti.Morph.MorphClass.Noun)
                    {
                        if (res == "ДЕТИ")
                        {
                            res = "РЕБЕНОК";
                        }
                    }
                    if (keepChars)
                    {
                        if (Chars.IsAllLower)
                        {
                            res = res.ToLower();
                        }
                        else if (Chars.IsCapitalUpper)
                        {
                            res = Pullenti.Ner.Core.MiscHelper.ConvertFirstCharUpperAndOtherLower(res);
                        }
                    }
                    return(res);
                }
            }
            if (!empty)
            {
                return(null);
            }
            string te = null;

            if (num == Pullenti.Morph.MorphNumber.Singular && mc != null)
            {
                Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo()
                {
                    Class = new Pullenti.Morph.MorphClass()
                    {
                        Value = mc.Value
                    }, Gender = gender, Number = Pullenti.Morph.MorphNumber.Singular, Language = Morph.Language
                };
                string vars = Pullenti.Morph.MorphologyService.GetWordform(Term, bi);
                if (vars != null)
                {
                    te = vars;
                }
            }
            if (te == null)
            {
                te = Term;
            }
            if (keepChars)
            {
                if (Chars.IsAllLower)
                {
                    return(te.ToLower());
                }
                else if (Chars.IsCapitalUpper)
                {
                    return(Pullenti.Ner.Core.MiscHelper.ConvertFirstCharUpperAndOtherLower(te));
                }
            }
            return(te);
        }
示例#18
0
 public TextToken(Pullenti.Morph.MorphToken source, Pullenti.Ner.Core.AnalysisKit kit, int bchar = -1, int echar = -1) : base(kit, (bchar >= 0 ? bchar : (source == null ? 0 : source.BeginChar)), (echar >= 0 ? echar : (source == null ? 0 : source.EndChar)))
 {
     if (source == null)
     {
         return;
     }
     Chars = source.CharInfo;
     Term  = source.Term;
     Lemma = source.GetLemma() ?? Term;
     MaxLengthOfMorphVars = (short)Term.Length;
     Morph = new MorphCollection();
     if (source.WordForms != null)
     {
         foreach (Pullenti.Morph.MorphWordForm wf in source.WordForms)
         {
             Morph.AddItem(wf);
             if (wf.NormalCase != null && (MaxLengthOfMorphVars < wf.NormalCase.Length))
             {
                 MaxLengthOfMorphVars = (short)wf.NormalCase.Length;
             }
             if (wf.NormalFull != null && (MaxLengthOfMorphVars < wf.NormalFull.Length))
             {
                 MaxLengthOfMorphVars = (short)wf.NormalFull.Length;
             }
         }
     }
     for (int i = 0; i < Term.Length; i++)
     {
         char ch = Term[i];
         int  j;
         for (j = 0; j < Morph.ItemsCount; j++)
         {
             Pullenti.Morph.MorphWordForm wf = Morph[j] as Pullenti.Morph.MorphWordForm;
             if (wf.NormalCase != null)
             {
                 if (i >= wf.NormalCase.Length)
                 {
                     break;
                 }
                 if (wf.NormalCase[i] != ch)
                 {
                     break;
                 }
             }
             if (wf.NormalFull != null)
             {
                 if (i >= wf.NormalFull.Length)
                 {
                     break;
                 }
                 if (wf.NormalFull[i] != ch)
                 {
                     break;
                 }
             }
         }
         if (j < Morph.ItemsCount)
         {
             break;
         }
         InvariantPrefixLengthOfMorphVars = (short)((i + 1));
     }
     if (Morph.Language.IsUndefined && !source.Language.IsUndefined)
     {
         Morph.Language = source.Language;
     }
 }
示例#19
0
        public static string GetNameEx(Pullenti.Ner.Token begin, Pullenti.Ner.Token end, Pullenti.Morph.MorphClass cla, Pullenti.Morph.MorphCase mc, Pullenti.Morph.MorphGender gender = Pullenti.Morph.MorphGender.Undefined, bool ignoreBracketsAndHiphens = false, bool ignoreGeoReferent = false)
        {
            if (end == null || begin == null)
            {
                return(null);
            }
            if (begin.EndChar > end.BeginChar && begin != end)
            {
                return(null);
            }
            StringBuilder res    = new StringBuilder();
            string        prefix = null;

            for (Pullenti.Ner.Token t = begin; t != null && t.EndChar <= end.EndChar; t = t.Next)
            {
                if (res.Length > 1000)
                {
                    break;
                }
                if (t.IsTableControlChar)
                {
                    continue;
                }
                if (ignoreBracketsAndHiphens)
                {
                    if (BracketHelper.IsBracket(t, false))
                    {
                        if (t == end)
                        {
                            break;
                        }
                        if (t.IsCharOf("(<["))
                        {
                            BracketSequenceToken br = BracketHelper.TryParse(t, BracketParseAttr.No, 100);
                            if (br != null && br.EndChar <= end.EndChar)
                            {
                                string tmp = GetNameEx(br.BeginToken.Next, br.EndToken.Previous, Pullenti.Morph.MorphClass.Undefined, Pullenti.Morph.MorphCase.Undefined, Pullenti.Morph.MorphGender.Undefined, ignoreBracketsAndHiphens, false);
                                if (tmp != null)
                                {
                                    if ((br.EndChar == end.EndChar && br.BeginToken.Next == br.EndToken.Previous && !br.BeginToken.Next.Chars.IsLetter) && !(br.BeginToken.Next is Pullenti.Ner.ReferentToken))
                                    {
                                    }
                                    else
                                    {
                                        res.AppendFormat(" {0}{1}{2}", t.GetSourceText(), tmp, br.EndToken.GetSourceText());
                                    }
                                }
                                t = br.EndToken;
                            }
                        }
                        continue;
                    }
                    if (t.IsHiphen)
                    {
                        if (t == end)
                        {
                            break;
                        }
                        else if (t.IsWhitespaceBefore || t.IsWhitespaceAfter)
                        {
                            continue;
                        }
                    }
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt != null)
                {
                    if (!ignoreBracketsAndHiphens)
                    {
                        if ((tt.Next != null && tt.Next.IsHiphen && (tt.Next.Next is Pullenti.Ner.TextToken)) && tt != end && tt.Next != end)
                        {
                            if (prefix == null)
                            {
                                prefix = tt.Term;
                            }
                            else
                            {
                                prefix = string.Format("{0}-{1}", prefix, tt.Term);
                            }
                            t = tt.Next;
                            if (t == end)
                            {
                                break;
                            }
                            else
                            {
                                continue;
                            }
                        }
                    }
                    string s = null;
                    if (cla.Value != 0 || !mc.IsUndefined || gender != Pullenti.Morph.MorphGender.Undefined)
                    {
                        foreach (Pullenti.Morph.MorphBaseInfo wff in tt.Morph.Items)
                        {
                            Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm;
                            if (wf == null)
                            {
                                continue;
                            }
                            if (cla.Value != 0)
                            {
                                if (((wf.Class.Value & cla.Value)) == 0)
                                {
                                    continue;
                                }
                            }
                            if (!mc.IsUndefined)
                            {
                                if (((wf.Case & mc)).IsUndefined)
                                {
                                    continue;
                                }
                            }
                            if (gender != Pullenti.Morph.MorphGender.Undefined)
                            {
                                if (((wf.Gender & gender)) == Pullenti.Morph.MorphGender.Undefined)
                                {
                                    continue;
                                }
                            }
                            if (s == null || wf.NormalCase == tt.Term)
                            {
                                s = wf.NormalCase;
                            }
                        }
                        if (s == null && gender != Pullenti.Morph.MorphGender.Undefined)
                        {
                            foreach (Pullenti.Morph.MorphBaseInfo wff in tt.Morph.Items)
                            {
                                Pullenti.Morph.MorphWordForm wf = wff as Pullenti.Morph.MorphWordForm;
                                if (wf == null)
                                {
                                    continue;
                                }
                                if (cla.Value != 0)
                                {
                                    if (((wf.Class.Value & cla.Value)) == 0)
                                    {
                                        continue;
                                    }
                                }
                                if (!mc.IsUndefined)
                                {
                                    if (((wf.Case & mc)).IsUndefined)
                                    {
                                        continue;
                                    }
                                }
                                if (s == null || wf.NormalCase == tt.Term)
                                {
                                    s = wf.NormalCase;
                                }
                            }
                        }
                    }
                    if (s == null)
                    {
                        s = tt.Term;
                        if (tt.Chars.IsLastLower && tt.LengthChar > 2)
                        {
                            s = tt.GetSourceText();
                            for (int i = s.Length - 1; i >= 0; i--)
                            {
                                if (char.IsUpper(s[i]))
                                {
                                    s = s.Substring(0, i + 1);
                                    break;
                                }
                            }
                        }
                    }
                    if (prefix != null)
                    {
                        string delim = "-";
                        if (ignoreBracketsAndHiphens)
                        {
                            delim = " ";
                        }
                        s = string.Format("{0}{1}{2}", prefix, delim, s);
                    }
                    prefix = null;
                    if (res.Length > 0 && s.Length > 0)
                    {
                        if (char.IsLetterOrDigit(s[0]))
                        {
                            char ch0 = res[res.Length - 1];
                            if (ch0 == '-')
                            {
                            }
                            else
                            {
                                res.Append(' ');
                            }
                        }
                        else if (!ignoreBracketsAndHiphens && BracketHelper.CanBeStartOfSequence(tt, false, false))
                        {
                            res.Append(' ');
                        }
                    }
                    res.Append(s);
                }
                else if (t is Pullenti.Ner.NumberToken)
                {
                    if (res.Length > 0)
                    {
                        if (!t.IsWhitespaceBefore && res[res.Length - 1] == '-')
                        {
                        }
                        else
                        {
                            res.Append(' ');
                        }
                    }
                    Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
                    if ((t.Morph.Class.IsAdjective && nt.Typ == Pullenti.Ner.NumberSpellingType.Words && nt.BeginToken == nt.EndToken) && (nt.BeginToken is Pullenti.Ner.TextToken))
                    {
                        res.Append((nt.BeginToken as Pullenti.Ner.TextToken).Term);
                    }
                    else
                    {
                        res.Append(nt.Value);
                    }
                }
                else if (t is Pullenti.Ner.MetaToken)
                {
                    if ((ignoreGeoReferent && t != begin && t.GetReferent() != null) && t.GetReferent().TypeName == "GEO")
                    {
                        continue;
                    }
                    string s = GetNameEx((t as Pullenti.Ner.MetaToken).BeginToken, (t as Pullenti.Ner.MetaToken).EndToken, cla, mc, gender, ignoreBracketsAndHiphens, ignoreGeoReferent);
                    if (!string.IsNullOrEmpty(s))
                    {
                        if (res.Length > 0)
                        {
                            if (!t.IsWhitespaceBefore && res[res.Length - 1] == '-')
                            {
                            }
                            else
                            {
                                res.Append(' ');
                            }
                        }
                        res.Append(s);
                    }
                }
                if (t == end)
                {
                    break;
                }
            }
            if (res.Length == 0)
            {
                return(null);
            }
            return(res.ToString());
        }
示例#20
0
 double _calcAgent(bool noplural)
 {
     if (!string.IsNullOrEmpty(FromPrep))
     {
         return(Coef = -1);
     }
     Pullenti.Morph.MorphWordForm vf = ToVerb.FirstVerb.VerbMorph;
     if (vf == null)
     {
         return(Coef = -1);
     }
     Pullenti.Morph.MorphWordForm vf2 = ToVerb.LastVerb.VerbMorph;
     if (vf2 == null)
     {
         return(Coef = -1);
     }
     if (vf.Misc.Mood == Pullenti.Morph.MorphMood.Imperative)
     {
         return(Coef = -1);
     }
     Pullenti.Ner.MorphCollection morph = FromMorph;
     if (vf2.Misc.Voice == Pullenti.Morph.MorphVoice.Passive || ToVerb.LastVerb.Morph.ContainsAttr("страд.з.", null))
     {
         if (!morph.Case.IsUndefined)
         {
             if (morph.Case.IsInstrumental)
             {
                 Coef = Pullenti.Semantic.SemanticService.Params.TransitiveCoef;
                 if (vf2.Case.IsInstrumental)
                 {
                     Coef /= 2;
                 }
                 return(Coef);
             }
             return(Coef = -1);
         }
         return(Coef = 0);
     }
     if (vf.Misc.Attrs.Contains("инф."))
     {
         return(Coef = -1);
     }
     if (_isRevVerb(vf2))
     {
         Pullenti.Morph.MorphCase agCase = Pullenti.Morph.MorphCase.Undefined;
         List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(vf2.NormalFull ?? vf2.NormalCase, true, null);
         if (grs != null)
         {
             foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs)
             {
                 if (gr.CmRev.Agent != null)
                 {
                     agCase = gr.CmRev.Agent.Case;
                     break;
                 }
             }
         }
         if (!morph.Case.IsUndefined)
         {
             if (agCase.IsDative)
             {
                 if (morph.Case.IsDative)
                 {
                     Coef = Pullenti.Semantic.SemanticService.Params.TransitiveCoef;
                     if (morph.Case.IsGenitive)
                     {
                         Coef /= 2;
                     }
                     return(Coef);
                 }
                 return(Coef = -1);
             }
             if (agCase.IsInstrumental)
             {
                 if (morph.Case.IsInstrumental)
                 {
                     if (morph.Case.IsNominative)
                     {
                         return(Coef = 0);
                     }
                     return(Coef = Pullenti.Semantic.SemanticService.Params.TransitiveCoef);
                 }
                 return(Coef = -1);
             }
             if (!morph.Case.IsNominative)
             {
                 return(Coef = -1);
             }
         }
         else
         {
             return(Coef = 0);
         }
     }
     if (vf.Number == Pullenti.Morph.MorphNumber.Plural)
     {
         if (!morph.Case.IsUndefined)
         {
             if (vf.Case.IsUndefined)
             {
                 if (!morph.Case.IsNominative)
                 {
                     return(Coef = -1);
                 }
             }
             else if (((vf.Case & morph.Case)).IsUndefined)
             {
                 return(Coef = -1);
             }
         }
         if (noplural)
         {
             if (FromIsPlural)
             {
             }
             else if (((morph.Number & Pullenti.Morph.MorphNumber.Plural)) == Pullenti.Morph.MorphNumber.Undefined)
             {
                 return(Coef = -1);
             }
             else if (!_checkMorphAccord(morph, false, vf))
             {
                 return(Coef = -1);
             }
             else if (morph.Items.Count > 0 && !vf.Case.IsUndefined)
             {
                 bool ok = false;
                 foreach (Pullenti.Morph.MorphBaseInfo it in morph.Items)
                 {
                     if (((it.Number & Pullenti.Morph.MorphNumber.Plural)) == Pullenti.Morph.MorphNumber.Plural)
                     {
                         if (!it.Case.IsUndefined && ((it.Case & vf.Case)).IsUndefined)
                         {
                             continue;
                         }
                         ok = true;
                         break;
                     }
                 }
                 if (!ok)
                 {
                     return(Coef = -1);
                 }
             }
         }
         Plural = 1;
         Coef   = Pullenti.Semantic.SemanticService.Params.VerbPlural;
         if (vf2.NormalCase == "БЫТЬ")
         {
             if (morph.Case.IsUndefined && From.Source.BeginToken.BeginChar > ToVerb.EndChar)
             {
                 Coef /= 2;
             }
         }
     }
     else
     {
         if (vf.Number == Pullenti.Morph.MorphNumber.Singular)
         {
             Plural = 0;
             if (FromIsPlural)
             {
                 return(Coef = -1);
             }
         }
         if (!_checkMorphAccord(morph, false, vf))
         {
             return(Coef = -1);
         }
         if (!morph.Case.IsUndefined)
         {
             if (!morph.Case.IsNominative)
             {
                 if (ToVerb.FirstVerb.IsParticiple)
                 {
                 }
                 else
                 {
                     return(Coef = -1);
                 }
             }
         }
         if (vf.Misc.Person != Pullenti.Morph.MorphPerson.Undefined)
         {
             if (((vf.Misc.Person & Pullenti.Morph.MorphPerson.Third)) == Pullenti.Morph.MorphPerson.Undefined)
             {
                 if (((vf.Misc.Person & Pullenti.Morph.MorphPerson.First)) == Pullenti.Morph.MorphPerson.First)
                 {
                     if (!morph.ContainsAttr("1 л.", null))
                     {
                         return(Coef = -1);
                     }
                 }
                 if (((vf.Misc.Person & Pullenti.Morph.MorphPerson.Second)) == Pullenti.Morph.MorphPerson.Second)
                 {
                     if (!morph.ContainsAttr("2 л.", null))
                     {
                         return(Coef = -1);
                     }
                 }
             }
         }
         Coef = Pullenti.Semantic.SemanticService.Params.MorphAccord;
         if (morph.Case.IsUndefined)
         {
             Coef /= 4;
         }
     }
     return(Coef);
 }
示例#21
0
 public static Pullenti.Semantic.SemObject CreateNptAdj(Pullenti.Semantic.SemGraph gr, Pullenti.Ner.Core.NounPhraseToken npt, Pullenti.Ner.MetaToken a)
 {
     if (a.Morph.Class.IsPronoun)
     {
         Pullenti.Semantic.SemObject asem = new Pullenti.Semantic.SemObject(gr);
         gr.Objects.Add(asem);
         asem.Tokens.Add(a);
         asem.Typ = (a.BeginToken.Morph.Class.IsPersonalPronoun ? Pullenti.Semantic.SemObjectType.PersonalPronoun : Pullenti.Semantic.SemObjectType.Pronoun);
         foreach (Pullenti.Morph.MorphBaseInfo it in a.BeginToken.Morph.Items)
         {
             Pullenti.Morph.MorphWordForm wf = it as Pullenti.Morph.MorphWordForm;
             if (wf == null)
             {
                 continue;
             }
             if (!npt.Morph.Case.IsUndefined)
             {
                 if (((npt.Morph.Case & wf.Case)).IsUndefined)
                 {
                     continue;
                 }
             }
             _setMorph(asem, wf);
             if (asem.Morph.NormalFull == "КАКОВ")
             {
                 asem.Morph.NormalFull = "КАКОЙ";
             }
             break;
         }
         if (asem.Morph.NormalFull == null)
         {
             asem.Morph.NormalFull = (asem.Morph.NormalCase = a.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false));
         }
         return(asem);
     }
     if (!a.Morph.Class.IsVerb)
     {
         Pullenti.Semantic.SemObject asem = new Pullenti.Semantic.SemObject(gr);
         gr.Objects.Add(asem);
         asem.Tokens.Add(a);
         asem.Typ = Pullenti.Semantic.SemObjectType.Adjective;
         foreach (Pullenti.Morph.MorphBaseInfo wf in a.BeginToken.Morph.Items)
         {
             if (wf.CheckAccord(npt.Morph, false, false) && wf.Class.IsAdjective && (wf is Pullenti.Morph.MorphWordForm))
             {
                 _setMorph(asem, wf as Pullenti.Morph.MorphWordForm);
                 break;
             }
         }
         if (asem.Morph.NormalCase == null)
         {
             asem.Morph.NormalCase = a.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
             asem.Morph.NormalFull = a.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Masculine, false);
             _setMorph0(asem, a.BeginToken.Morph);
         }
         List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(asem.Morph.NormalFull, true, null);
         if (grs != null && grs.Count > 0)
         {
             asem.Concept = grs[0];
         }
         return(asem);
     }
     return(null);
 }
示例#22
0
        double _calcPacient(bool noplural)
        {
            if (!string.IsNullOrEmpty(FromPrep))
            {
                return(Coef = -1);
            }
            Pullenti.Morph.MorphWordForm vf = ToVerb.FirstVerb.VerbMorph;
            if (vf == null)
            {
                return(-1);
            }
            Pullenti.Morph.MorphWordForm vf2 = ToVerb.LastVerb.VerbMorph;
            if (vf2 == null)
            {
                return(-1);
            }
            Pullenti.Ner.MorphCollection morph = FromMorph;
            if (vf2.Misc.Voice == Pullenti.Morph.MorphVoice.Passive || ToVerb.LastVerb.Morph.ContainsAttr("страд.з.", null))
            {
                if (vf.Number == Pullenti.Morph.MorphNumber.Plural)
                {
                    if (noplural)
                    {
                        if (FromIsPlural)
                        {
                        }
                        else if (!_checkMorphAccord(morph, false, vf))
                        {
                            return(-1);
                        }
                        else if (morph.Items.Count > 0 && !vf.Case.IsUndefined)
                        {
                            bool ok = false;
                            foreach (Pullenti.Morph.MorphBaseInfo it in morph.Items)
                            {
                                if (((it.Number & Pullenti.Morph.MorphNumber.Plural)) == Pullenti.Morph.MorphNumber.Plural)
                                {
                                    if (!it.Case.IsUndefined && ((it.Case & vf.Case)).IsUndefined)
                                    {
                                        continue;
                                    }
                                    ok = true;
                                    break;
                                }
                            }
                            if (!ok)
                            {
                                return(Coef = -1);
                            }
                        }
                    }
                    Coef   = Pullenti.Semantic.SemanticService.Params.VerbPlural;
                    Plural = 1;
                }
                else
                {
                    if (vf.Number == Pullenti.Morph.MorphNumber.Singular)
                    {
                        Plural = 0;
                        if (FromIsPlural)
                        {
                            return(-1);
                        }
                    }
                    if (!_checkMorphAccord(morph, false, vf))
                    {
                        return(-1);
                    }
                    Coef = Pullenti.Semantic.SemanticService.Params.MorphAccord;
                }
                return(Coef);
            }
            bool isTrans     = false;
            bool isRefDative = false;
            List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(vf2.NormalFull ?? vf2.NormalCase, true, null);

            if (grs != null)
            {
                foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs)
                {
                    if (gr.Cm.Transitive)
                    {
                        isTrans = true;
                    }
                    if (gr.CmRev.Agent != null && !gr.CmRev.Agent.Case.IsNominative)
                    {
                        isRefDative = true;
                    }
                }
            }
            if (_isRevVerb(vf2))
            {
                if (!string.IsNullOrEmpty(FromPrep))
                {
                    return(-1);
                }
                if (!morph.Case.IsUndefined)
                {
                    if (isRefDative)
                    {
                        if (morph.Case.IsNominative)
                        {
                            return(Coef = Pullenti.Semantic.SemanticService.Params.TransitiveCoef);
                        }
                    }
                    else if (morph.Case.IsInstrumental)
                    {
                        return(Coef = Pullenti.Semantic.SemanticService.Params.TransitiveCoef);
                    }
                    return(-1);
                }
                return(Coef = 0);
            }
            if (vf2 != vf && !isTrans)
            {
                grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(vf.NormalFull ?? vf.NormalCase, true, null);
                if (grs != null)
                {
                    foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs)
                    {
                        if (gr.Cm.Transitive)
                        {
                            isTrans = true;
                        }
                    }
                }
            }
            if (isTrans)
            {
                if (!string.IsNullOrEmpty(FromPrep))
                {
                    return(-1);
                }
                if (!morph.Case.IsUndefined)
                {
                    if (morph.Case.IsAccusative)
                    {
                        Coef = Pullenti.Semantic.SemanticService.Params.TransitiveCoef;
                        if (morph.Case.IsDative)
                        {
                            Coef /= 2;
                        }
                        if (morph.Case.IsGenitive)
                        {
                            Coef /= 2;
                        }
                        if (morph.Case.IsInstrumental)
                        {
                            Coef /= 2;
                        }
                        return(Coef);
                    }
                    else
                    {
                        return(-1);
                    }
                }
            }
            if (vf2.NormalCase == "БЫТЬ")
            {
                if (!string.IsNullOrEmpty(FromPrep))
                {
                    return(-1);
                }
                if (morph.Case.IsInstrumental)
                {
                    return(Coef = Pullenti.Semantic.SemanticService.Params.TransitiveCoef);
                }
                if (morph.Case.IsNominative)
                {
                    if (From.Source.BeginToken.BeginChar > ToVerb.EndChar)
                    {
                        return(Coef = Pullenti.Semantic.SemanticService.Params.TransitiveCoef);
                    }
                    else
                    {
                        return(Coef = Pullenti.Semantic.SemanticService.Params.TransitiveCoef / 2);
                    }
                }
                if (morph.Case.IsUndefined)
                {
                    return(Coef = Pullenti.Semantic.SemanticService.Params.TransitiveCoef / 2);
                }
            }
            return(-1);
        }
示例#23
0
        public static Pullenti.Semantic.SemObject CreateVerbGroup(Pullenti.Semantic.SemGraph gr, Pullenti.Ner.Core.VerbPhraseToken vpt)
        {
            List <Pullenti.Semantic.SemObject>    sems    = new List <Pullenti.Semantic.SemObject>();
            List <Pullenti.Semantic.SemAttribute> attrs   = new List <Pullenti.Semantic.SemAttribute>();
            List <Pullenti.Semantic.SemObject>    adverbs = new List <Pullenti.Semantic.SemObject>();

            for (int i = 0; i < vpt.Items.Count; i++)
            {
                Pullenti.Ner.Core.VerbPhraseItemToken v = vpt.Items[i];
                if (v.IsAdverb)
                {
                    AdverbToken adv = AdverbToken.TryParse(v.BeginToken);
                    if (adv == null)
                    {
                        continue;
                    }
                    if (adv.Typ != Pullenti.Semantic.SemAttributeType.Undefined)
                    {
                        attrs.Add(new Pullenti.Semantic.SemAttribute()
                        {
                            Not = adv.Not, Typ = adv.Typ, Spelling = adv.Spelling
                        });
                        continue;
                    }
                    Pullenti.Semantic.SemObject adverb = CreateAdverb(gr, adv);
                    if (attrs.Count > 0)
                    {
                        adverb.Attrs.AddRange(attrs);
                        attrs.Clear();
                    }
                    adverbs.Add(adverb);
                    continue;
                }
                if (v.Normal == "БЫТЬ")
                {
                    int j;
                    for (j = i + 1; j < vpt.Items.Count; j++)
                    {
                        if (!vpt.Items[j].IsAdverb)
                        {
                            break;
                        }
                    }
                    if (j < vpt.Items.Count)
                    {
                        continue;
                    }
                }
                Pullenti.Semantic.SemObject sem = new Pullenti.Semantic.SemObject(gr);
                gr.Objects.Add(sem);
                sem.Tokens.Add(v);
                v.Tag = sem;
                _setMorph(sem, v.VerbMorph);
                sem.Morph.NormalCase = (sem.Morph.NormalFull = v.Normal);
                if (v.IsParticiple || v.IsDeeParticiple)
                {
                    sem.Typ = Pullenti.Semantic.SemObjectType.Participle;
                    sem.Morph.NormalFull = v.EndToken.GetNormalCaseText(Pullenti.Morph.MorphClass.Verb, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) ?? sem.Morph.NormalCase;
                    sem.Morph.NormalCase = v.EndToken.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
                    if (sem.Morph.NormalCase == sem.Morph.NormalFull && v.Normal.EndsWith("Й"))
                    {
                        List <Pullenti.Semantic.Utils.DerivateGroup> grs2 = Pullenti.Semantic.Utils.DerivateService.FindDerivates(v.Normal, true, null);
                        if (grs2 != null)
                        {
                            foreach (Pullenti.Semantic.Utils.DerivateGroup g in grs2)
                            {
                                foreach (Pullenti.Semantic.Utils.DerivateWord w in g.Words)
                                {
                                    if (w.Lang == v.EndToken.Morph.Language && w.Class.IsVerb && !w.Class.IsAdjective)
                                    {
                                        sem.Morph.NormalFull = w.Spelling;
                                        break;
                                    }
                                }
                            }
                        }
                    }
                    else if (sem.Morph.NormalCase == sem.Morph.NormalFull && v.IsParticiple && sem.Morph.NormalFull.EndsWith("Ь"))
                    {
                        foreach (Pullenti.Morph.MorphBaseInfo it in v.EndToken.Morph.Items)
                        {
                            Pullenti.Morph.MorphWordForm wf = it as Pullenti.Morph.MorphWordForm;
                            if (wf == null)
                            {
                                continue;
                            }
                            if (wf.NormalCase.EndsWith("Й") || ((wf.NormalFull != null && wf.NormalFull.EndsWith("Й"))))
                            {
                                sem.Morph.NormalCase = wf.NormalFull ?? wf.NormalCase;
                                break;
                            }
                        }
                        if (sem.Morph.NormalCase == sem.Morph.NormalFull)
                        {
                            List <Pullenti.Semantic.Utils.DerivateGroup> grs2 = Pullenti.Semantic.Utils.DerivateService.FindDerivates(sem.Morph.NormalCase, true, null);
                            if (grs2 != null)
                            {
                                foreach (Pullenti.Semantic.Utils.DerivateGroup g in grs2)
                                {
                                    foreach (Pullenti.Semantic.Utils.DerivateWord w in g.Words)
                                    {
                                        if (w.Lang == v.EndToken.Morph.Language && w.Class.IsVerb && w.Class.IsAdjective)
                                        {
                                            sem.Morph.NormalCase = w.Spelling;
                                            break;
                                        }
                                    }
                                    break;
                                }
                            }
                        }
                    }
                }
                else
                {
                    sem.Typ = Pullenti.Semantic.SemObjectType.Verb;
                }
                if (v.VerbMorph != null && v.VerbMorph.ContainsAttr("возвр.", null))
                {
                    if (sem.Morph.NormalFull.EndsWith("СЯ") || sem.Morph.NormalFull.EndsWith("СЬ"))
                    {
                        sem.Morph.NormalFull = sem.Morph.NormalFull.Substring(0, sem.Morph.NormalFull.Length - 2);
                    }
                }
                List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(sem.Morph.NormalFull, true, null);
                if (grs != null && grs.Count > 0)
                {
                    sem.Concept = grs[0];
                    if (v.VerbMorph != null && v.VerbMorph.Misc.Aspect == Pullenti.Morph.MorphAspect.Imperfective)
                    {
                        foreach (Pullenti.Semantic.Utils.DerivateWord w in grs[0].Words)
                        {
                            if (w.Class.IsVerb && !w.Class.IsAdjective)
                            {
                                if (w.Aspect == Pullenti.Morph.MorphAspect.Perfective)
                                {
                                    sem.Morph.NormalFull = w.Spelling;
                                    break;
                                }
                            }
                        }
                    }
                }
                sem.Not = v.Not;
                sems.Add(sem);
                if (attrs.Count > 0)
                {
                    sem.Attrs.AddRange(attrs);
                    attrs.Clear();
                }
                if (adverbs.Count > 0)
                {
                    foreach (Pullenti.Semantic.SemObject a in adverbs)
                    {
                        gr.AddLink(Pullenti.Semantic.SemLinkType.Detail, sem, a, "как", false, null);
                    }
                }
                adverbs.Clear();
            }
            if (sems.Count == 0)
            {
                return(null);
            }
            if (attrs.Count > 0)
            {
                sems[sems.Count - 1].Attrs.AddRange(attrs);
            }
            if (adverbs.Count > 0)
            {
                Pullenti.Semantic.SemObject sem = sems[sems.Count - 1];
                foreach (Pullenti.Semantic.SemObject a in adverbs)
                {
                    gr.AddLink(Pullenti.Semantic.SemLinkType.Detail, sem, a, "как", false, null);
                }
            }
            for (int i = sems.Count - 1; i > 0; i--)
            {
                gr.AddLink(Pullenti.Semantic.SemLinkType.Detail, sems[i - 1], sems[i], "что делать", false, null);
            }
            return(sems[0]);
        }
示例#24
0
        static List <SemanticLink> _tryCreateVerb(Pullenti.Ner.Core.VerbPhraseToken vpt1, Pullenti.Ner.MetaToken slave, Pullenti.Semantic.Utils.DerivateGroup gr)
        {
            if (slave is Pullenti.Ner.Core.VerbPhraseToken)
            {
                return(_tryCreateInf(vpt1, slave as Pullenti.Ner.Core.VerbPhraseToken, gr));
            }
            SemanticAbstractSlave sla2 = slave as SemanticAbstractSlave;
            List <SemanticLink>   res  = new List <SemanticLink>();

            if (sla2 == null)
            {
                return(res);
            }
            Pullenti.Semantic.Utils.ControlModelItem cit = FindControlItem(vpt1.LastVerb, gr);
            string prep = sla2.Preposition;

            Pullenti.Morph.MorphBaseInfo morph = (Pullenti.Morph.MorphBaseInfo)sla2.Morph;
            bool isRev1  = vpt1.LastVerb.IsVerbReversive || vpt1.LastVerb.IsVerbPassive;
            bool noNomin = false;
            bool noInstr = false;

            if (prep == null && morph.Case.IsNominative && !vpt1.FirstVerb.IsParticiple)
            {
                bool ok  = true;
                bool err = false;
                Pullenti.Morph.MorphWordForm vm = vpt1.FirstVerb.VerbMorph;
                if (vm == null)
                {
                    return(res);
                }
                if (vm.Number == Pullenti.Morph.MorphNumber.Singular)
                {
                    if (morph.Number == Pullenti.Morph.MorphNumber.Plural)
                    {
                        if (!vpt1.FirstVerb.IsVerbInfinitive)
                        {
                            ok = false;
                        }
                    }
                }
                if (!CheckMorphAccord(morph, false, vm, false))
                {
                    if (!err && !vpt1.FirstVerb.IsVerbInfinitive)
                    {
                        ok = false;
                    }
                }
                else if (vm.Misc.Person != Pullenti.Morph.MorphPerson.Undefined)
                {
                    if (((vm.Misc.Person & Pullenti.Morph.MorphPerson.Third)) == Pullenti.Morph.MorphPerson.Undefined)
                    {
                        if (((vm.Misc.Person & Pullenti.Morph.MorphPerson.First)) == Pullenti.Morph.MorphPerson.First)
                        {
                            if (!morph.ContainsAttr("1 л.", null))
                            {
                                ok = false;
                            }
                        }
                        if (((vm.Misc.Person & Pullenti.Morph.MorphPerson.Second)) == Pullenti.Morph.MorphPerson.Second)
                        {
                            if (!morph.ContainsAttr("2 л.", null))
                            {
                                ok = false;
                            }
                        }
                    }
                }
                noNomin = true;
                if (ok)
                {
                    Pullenti.Semantic.Utils.ControlModelItem cit00 = cit;
                    bool isRev0 = isRev1;
                    if (vpt1.FirstVerb != vpt1.LastVerb && ((vpt1.FirstVerb.IsVerbReversive || vpt1.FirstVerb.IsVerbPassive || vpt1.FirstVerb.Normal == "ИМЕТЬ")))
                    {
                        cit00  = null;
                        isRev0 = true;
                        List <Pullenti.Semantic.Utils.DerivateGroup> grs = FindDerivates(vpt1.FirstVerb);
                        if (grs != null)
                        {
                            foreach (Pullenti.Semantic.Utils.DerivateGroup gg in grs)
                            {
                                if ((((cit00 = FindControlItem(vpt1.FirstVerb, gg)))) != null)
                                {
                                    break;
                                }
                            }
                        }
                    }
                    SemanticLink sl       = null;
                    bool         addagent = false;
                    if (cit00 == null)
                    {
                        sl = new SemanticLink()
                        {
                            Modelled = true, Role = (isRev0 ? SemanticRole.Pacient : SemanticRole.Agent), Rank = 1, Question = Pullenti.Semantic.Utils.ControlModelQuestion.BaseNominative, IsPassive = isRev0
                        }
                    }
                    ;
                    else
                    {
                        foreach (KeyValuePair <Pullenti.Semantic.Utils.ControlModelQuestion, SemanticRole> kp in cit00.Links)
                        {
                            Pullenti.Semantic.Utils.ControlModelQuestion q = kp.Key;
                            if (q.Check(null, Pullenti.Morph.MorphCase.Nominative))
                            {
                                sl = new SemanticLink()
                                {
                                    Role = kp.Value, Rank = 2, Question = q, IsPassive = isRev0
                                };
                                if (sl.Role == SemanticRole.Agent)
                                {
                                    sl.IsPassive = false;
                                }
                                else if (sl.Role == SemanticRole.Pacient && cit00.NominativeCanBeAgentAndPacient && vpt1.LastVerb.IsVerbReversive)
                                {
                                    addagent = true;
                                }
                                break;
                            }
                        }
                    }
                    if (sl != null)
                    {
                        if (cit00 == null && morph.Case.IsInstrumental && isRev0)
                        {
                            sl.Rank -= 0.5;
                        }
                        if (morph.Case.IsAccusative)
                        {
                            sl.Rank -= 0.5;
                        }
                        if (sla2.BeginChar > vpt1.BeginChar)
                        {
                            sl.Rank -= 0.5;
                        }
                        if (err)
                        {
                            sl.Rank -= 0.5;
                        }
                        res.Add(sl);
                        if (addagent)
                        {
                            res.Add(new SemanticLink()
                            {
                                Role = SemanticRole.Agent, Rank = sl.Rank, Question = sl.Question
                            });
                        }
                    }
                }
            }
            if (prep == null && isRev1 && morph.Case.IsInstrumental)
            {
                noInstr = true;
                Pullenti.Semantic.Utils.ControlModelItem cit00 = cit;
                SemanticLink sl = null;
                if (cit00 == null)
                {
                    sl = new SemanticLink()
                    {
                        Modelled = true, Role = SemanticRole.Agent, Rank = 1, Question = Pullenti.Semantic.Utils.ControlModelQuestion.BaseInstrumental, IsPassive = true
                    }
                }
                ;
                else
                {
                    foreach (KeyValuePair <Pullenti.Semantic.Utils.ControlModelQuestion, SemanticRole> kp in cit00.Links)
                    {
                        Pullenti.Semantic.Utils.ControlModelQuestion q = kp.Key;
                        if (q.Check(null, Pullenti.Morph.MorphCase.Instrumental))
                        {
                            sl = new SemanticLink()
                            {
                                Role = kp.Value, Rank = 2, Question = q
                            };
                            if (sl.Role == SemanticRole.Agent)
                            {
                                sl.IsPassive = true;
                            }
                            break;
                        }
                    }
                }
                if (sl != null)
                {
                    if (cit00 == null && morph.Case.IsNominative)
                    {
                        sl.Rank -= 0.5;
                    }
                    if (morph.Case.IsAccusative)
                    {
                        sl.Rank -= 0.5;
                    }
                    if (sla2.BeginChar < vpt1.BeginChar)
                    {
                        sl.Rank -= 0.5;
                    }
                    res.Add(sl);
                    if ((gr != null && gr.Model.Items.Count > 0 && gr.Model.Items[0].Typ == Pullenti.Semantic.Utils.ControlModelItemType.Verb) && gr.Model.Items[0].Links.ContainsKey(Pullenti.Semantic.Utils.ControlModelQuestion.BaseInstrumental))
                    {
                        sl.Rank = 0;
                        SemanticLink sl0 = new SemanticLink()
                        {
                            Question = sl.Question, Rank = 1, Role = gr.Model.Items[0].Links[Pullenti.Semantic.Utils.ControlModelQuestion.BaseInstrumental]
                        };
                        res.Insert(0, sl0);
                    }
                }
            }
            if (prep == null && morph.Case.IsDative && ((cit == null || !cit.Links.ContainsKey(Pullenti.Semantic.Utils.ControlModelQuestion.BaseDative))))
            {
                SemanticLink sl = new SemanticLink()
                {
                    Modelled = cit == null, Role = SemanticRole.Strong, Rank = 1, Question = Pullenti.Semantic.Utils.ControlModelQuestion.BaseDative
                };
                if (morph.Case.IsAccusative || morph.Case.IsNominative)
                {
                    sl.Rank -= 0.5;
                }
                if (vpt1.EndToken.Next != sla2.BeginToken)
                {
                    sl.Rank -= 0.5;
                }
                if (cit != null)
                {
                    sl.Rank -= 0.5;
                }
                res.Add(sl);
            }
            _createRoles(cit, prep, morph.Case, res, noNomin, noInstr);
            if (gr != null && gr.Model.Pacients.Count > 0)
            {
                bool ok = false;
                foreach (string n in gr.Model.Pacients)
                {
                    if (sla2.Source != null)
                    {
                        if (sla2.Source.EndToken.IsValue(n, null))
                        {
                            ok = true;
                            break;
                        }
                    }
                    else if (sla2.EndToken.IsValue(n, null))
                    {
                        ok = true;
                        break;
                    }
                }
                if (ok)
                {
                    if (res.Count == 0)
                    {
                        ok = false;
                        if (prep == null && isRev1 && morph.Case.IsNominative)
                        {
                            ok = true;
                        }
                        else if (prep == null && !isRev1 && morph.Case.IsAccusative)
                        {
                            ok = true;
                        }
                        if (ok)
                        {
                            res.Add(new SemanticLink()
                            {
                                Role = SemanticRole.Pacient, Question = (isRev1 ? Pullenti.Semantic.Utils.ControlModelQuestion.BaseNominative : Pullenti.Semantic.Utils.ControlModelQuestion.BaseAccusative), Idiom = true
                            });
                        }
                    }
                    else
                    {
                        foreach (SemanticLink r in res)
                        {
                            r.Rank += 4;
                            if (r.Role == SemanticRole.Common)
                            {
                                r.Role = SemanticRole.Strong;
                            }
                            if (vpt1.EndToken.Next == sla2.BeginToken)
                            {
                                r.Rank += 2;
                            }
                            r.Idiom = true;
                        }
                    }
                }
            }
            return(res);
        }
示例#25
0
        internal static NumbersWithUnitToken _tryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection addUnits, bool second, bool canOmitNumber, bool canBeNan)
        {
            if (t == null)
            {
                return(null);
            }
            while (t != null)
            {
                if (t.IsCommaAnd || t.IsValue("НО", null))
                {
                    t = t.Next;
                }
                else
                {
                    break;
                }
            }
            Pullenti.Ner.Token t0 = t;
            bool about            = false;
            bool hasKeyw          = false;
            bool isDiapKeyw       = false;
            int  minMax           = 0;

            Pullenti.Ner.Token ttt = _isMinOrMax(t, ref minMax);
            if (ttt != null)
            {
                t = ttt.Next;
                if (t == null)
                {
                    return(null);
                }
            }
            if (t == null)
            {
                return(null);
            }
            if (t.IsChar('~') || t.IsValue("ОКОЛО", null) || t.IsValue("ПРИМЕРНО", null))
            {
                t       = t.Next;
                about   = true;
                hasKeyw = true;
                if (t == null)
                {
                    return(null);
                }
            }
            if (t.IsValue("В", null) && t.Next != null)
            {
                if (t.Next.IsValue("ПРЕДЕЛ", null) || t.IsValue("ДИАПАЗОН", null))
                {
                    t = t.Next.Next;
                    if (t == null)
                    {
                        return(null);
                    }
                    isDiapKeyw = true;
                }
            }
            if (t0.IsChar('('))
            {
                NumbersWithUnitToken mt0 = _tryParse(t.Next, addUnits, false, false, false);
                if (mt0 != null && mt0.EndToken.Next != null && mt0.EndToken.Next.IsChar(')'))
                {
                    if (second)
                    {
                        if (mt0.FromVal != null && mt0.ToVal != null && mt0.FromVal.Value == (-mt0.ToVal.Value))
                        {
                        }
                        else
                        {
                            return(null);
                        }
                    }
                    mt0.BeginToken = t0;
                    mt0.EndToken   = mt0.EndToken.Next;
                    List <UnitToken> uu = UnitToken.TryParseList(mt0.EndToken.Next, addUnits, false);
                    if (uu != null && mt0.Units.Count == 0)
                    {
                        mt0.Units    = uu;
                        mt0.EndToken = uu[uu.Count - 1].EndToken;
                    }
                    return(mt0);
                }
            }
            bool    plusminus  = false;
            bool    unitBefore = false;
            bool    isAge      = false;
            DiapTyp dty        = DiapTyp.Undefined;

            Pullenti.Ner.MetaToken whd = null;
            List <UnitToken>       uni = null;

            Pullenti.Ner.Core.TerminToken tok = (m_Termins == null ? null : m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No));
            if (tok != null)
            {
                if (tok.EndToken.IsValue("СТАРШЕ", null) || tok.EndToken.IsValue("МЛАДШЕ", null))
                {
                    isAge = true;
                }
                t       = tok.EndToken.Next;
                dty     = (DiapTyp)tok.Termin.Tag;
                hasKeyw = true;
                if (!tok.IsWhitespaceAfter)
                {
                    if (t == null)
                    {
                        return(null);
                    }
                    if (t is Pullenti.Ner.NumberToken)
                    {
                        if (tok.BeginToken == tok.EndToken && !tok.Chars.IsAllLower)
                        {
                            return(null);
                        }
                    }
                    else if (t.IsComma && t.Next != null && t.Next.IsValue("ЧЕМ", null))
                    {
                        t = t.Next.Next;
                        if (t != null && t.Morph.Class.IsPreposition)
                        {
                            t = t.Next;
                        }
                    }
                    else if (t.IsCharOf(":,(") || t.IsTableControlChar)
                    {
                    }
                    else
                    {
                        return(null);
                    }
                }
                if (t != null && t.IsChar('('))
                {
                    uni = UnitToken.TryParseList(t.Next, addUnits, false);
                    if (uni != null)
                    {
                        t = uni[uni.Count - 1].EndToken.Next;
                        while (t != null)
                        {
                            if (t.IsCharOf("):"))
                            {
                                t = t.Next;
                            }
                            else
                            {
                                break;
                            }
                        }
                        NumbersWithUnitToken mt0 = _tryParse(t, addUnits, false, canOmitNumber, false);
                        if (mt0 != null && mt0.Units.Count == 0)
                        {
                            mt0.BeginToken = t0;
                            mt0.Units      = uni;
                            return(mt0);
                        }
                    }
                    whd = _tryParseWHL(t);
                    if (whd != null)
                    {
                        t = whd.EndToken.Next;
                    }
                }
                else if (t != null && t.IsValue("IP", null))
                {
                    uni = UnitToken.TryParseList(t, addUnits, false);
                    if (uni != null)
                    {
                        t = uni[uni.Count - 1].EndToken.Next;
                    }
                }
                if ((t != null && t.IsHiphen && t.IsWhitespaceBefore) && t.IsWhitespaceAfter)
                {
                    t = t.Next;
                }
            }
            else if (t.IsChar('<'))
            {
                dty     = DiapTyp.Ls;
                t       = t.Next;
                hasKeyw = true;
                if (t != null && t.IsChar('='))
                {
                    t   = t.Next;
                    dty = DiapTyp.Le;
                }
            }
            else if (t.IsChar('>'))
            {
                dty     = DiapTyp.Gt;
                t       = t.Next;
                hasKeyw = true;
                if (t != null && t.IsChar('='))
                {
                    t   = t.Next;
                    dty = DiapTyp.Ge;
                }
            }
            else if (t.IsChar('≤'))
            {
                dty     = DiapTyp.Le;
                hasKeyw = true;
                t       = t.Next;
            }
            else if (t.IsChar('≥'))
            {
                dty     = DiapTyp.Ge;
                hasKeyw = true;
                t       = t.Next;
            }
            else if (t.IsValue("IP", null))
            {
                uni = UnitToken.TryParseList(t, addUnits, false);
                if (uni != null)
                {
                    t = uni[uni.Count - 1].EndToken.Next;
                }
            }
            else if (t.IsValue("ЗА", null) && (t.Next is Pullenti.Ner.NumberToken))
            {
                dty = DiapTyp.Ge;
                t   = t.Next;
            }
            while (t != null && ((t.IsCharOf(":,") || t.IsValue("ЧЕМ", null) || t.IsTableControlChar)))
            {
                t = t.Next;
            }
            if (t != null)
            {
                if (t.IsChar('+') || t.IsValue("ПЛЮС", null))
                {
                    t = t.Next;
                    if (t != null && !t.IsWhitespaceBefore)
                    {
                        if (t.IsHiphen)
                        {
                            t         = t.Next;
                            plusminus = true;
                        }
                        else if ((t.IsCharOf("\\/") && t.Next != null && !t.IsNewlineAfter) && t.Next.IsHiphen)
                        {
                            t         = t.Next.Next;
                            plusminus = true;
                        }
                    }
                }
                else if (second && (t.IsCharOf("\\/÷…~")))
                {
                    t = t.Next;
                }
                else if ((t.IsHiphen && t == t0 && !second) && m_Termins.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No) != null)
                {
                    tok = m_Termins.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                    t   = tok.EndToken.Next;
                    dty = (DiapTyp)tok.Termin.Tag;
                }
                else if (t.IsHiphen && t == t0 && ((t.IsWhitespaceAfter || second)))
                {
                    t = t.Next;
                }
                else if (t.IsChar('±'))
                {
                    t         = t.Next;
                    plusminus = true;
                    hasKeyw   = true;
                }
                else if ((second && t.IsChar('.') && t.Next != null) && t.Next.IsChar('.'))
                {
                    t = t.Next.Next;
                    if (t != null && t.IsChar('.'))
                    {
                        t = t.Next;
                    }
                }
            }
            Pullenti.Ner.NumberToken num = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t, true, false);
            if (num == null)
            {
                uni = UnitToken.TryParseList(t, addUnits, false);
                if (uni != null)
                {
                    unitBefore = true;
                    t          = uni[uni.Count - 1].EndToken.Next;
                    bool delim = false;
                    while (t != null)
                    {
                        if (t.IsCharOf(":,"))
                        {
                            delim = true;
                            t     = t.Next;
                        }
                        else if (t.IsHiphen && t.IsWhitespaceAfter)
                        {
                            delim = true;
                            t     = t.Next;
                        }
                        else
                        {
                            break;
                        }
                    }
                    if (!delim)
                    {
                        if (t == null)
                        {
                            if (hasKeyw && canBeNan)
                            {
                            }
                            else
                            {
                                return(null);
                            }
                        }
                        else if (!t.IsWhitespaceBefore)
                        {
                            return(null);
                        }
                        if (t.Next != null && t.IsHiphen && t.IsWhitespaceAfter)
                        {
                            delim = true;
                            t     = t.Next;
                        }
                    }
                    num = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t, true, false);
                }
            }
            NumbersWithUnitToken res = null;
            double rval = (double)0;

            if (num == null)
            {
                Pullenti.Ner.Core.TerminToken tt = m_Spec.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tt != null)
                {
                    rval = (double)tt.Termin.Tag;
                    string unam = (string)tt.Termin.Tag2;
                    foreach (Unit u in UnitsHelper.Units)
                    {
                        if (u.FullnameCyr == unam)
                        {
                            uni = new List <UnitToken>();
                            uni.Add(new UnitToken(t, t)
                            {
                                Unit = u
                            });
                            break;
                        }
                    }
                    if (uni == null)
                    {
                        return(null);
                    }
                    res = new NumbersWithUnitToken(t0, tt.EndToken)
                    {
                        About = about
                    };
                    t = tt.EndToken.Next;
                }
                else
                {
                    if (!canOmitNumber && !hasKeyw && !canBeNan)
                    {
                        return(null);
                    }
                    if ((uni != null && uni.Count == 1 && uni[0].BeginToken == uni[0].EndToken) && uni[0].LengthChar > 3)
                    {
                        rval = 1;
                        res  = new NumbersWithUnitToken(t0, uni[uni.Count - 1].EndToken)
                        {
                            About = about
                        };
                        t = res.EndToken.Next;
                    }
                    else if (hasKeyw && canBeNan)
                    {
                        rval = double.NaN;
                        res  = new NumbersWithUnitToken(t0, t0)
                        {
                            About = about
                        };
                        if (t != null)
                        {
                            res.EndToken = t.Previous;
                        }
                        else
                        {
                            for (t = t0; t != null; t = t.Next)
                            {
                                res.EndToken = t;
                            }
                        }
                    }
                    else
                    {
                        return(null);
                    }
                }
            }
            else
            {
                if ((t == t0 && t0.IsHiphen && !t.IsWhitespaceBefore) && !t.IsWhitespaceAfter && (num.RealValue < 0))
                {
                    num = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t.Next, true, false);
                    if (num == null)
                    {
                        return(null);
                    }
                }
                if (t == t0 && (t is Pullenti.Ner.NumberToken) && t.Morph.Class.IsAdjective)
                {
                    Pullenti.Ner.TextToken nn = (t as Pullenti.Ner.NumberToken).EndToken as Pullenti.Ner.TextToken;
                    if (nn == null)
                    {
                        return(null);
                    }
                    string norm = nn.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
                    if ((norm.EndsWith("Ь") || norm == "ЧЕТЫРЕ" || norm == "ТРИ") || norm == "ДВА")
                    {
                    }
                    else
                    {
                        Pullenti.Morph.MorphWordForm mi = Pullenti.Morph.MorphologyService.GetWordBaseInfo("КОКО" + nn.Term, null, false, false);
                        if (mi.Class.IsAdjective)
                        {
                            return(null);
                        }
                    }
                }
                t   = num.EndToken.Next;
                res = new NumbersWithUnitToken(t0, num.EndToken)
                {
                    About = about
                };
                rval = num.RealValue;
            }
            if (uni == null)
            {
                uni = UnitToken.TryParseList(t, addUnits, false);
                if (uni != null)
                {
                    if ((plusminus && second && uni.Count >= 1) && uni[0].Unit == UnitsHelper.uPercent)
                    {
                        res.EndToken         = uni[0].EndToken;
                        res.PlusMinusPercent = true;
                        Pullenti.Ner.Token tt1 = uni[0].EndToken.Next;
                        uni = UnitToken.TryParseList(tt1, addUnits, false);
                        if (uni != null)
                        {
                            res.Units    = uni;
                            res.EndToken = uni[uni.Count - 1].EndToken;
                        }
                    }
                    else
                    {
                        res.Units    = uni;
                        res.EndToken = uni[uni.Count - 1].EndToken;
                    }
                    t = res.EndToken.Next;
                }
            }
            else
            {
                res.Units = uni;
                if (uni.Count > 1)
                {
                    List <UnitToken> uni1 = UnitToken.TryParseList(t, addUnits, false);
                    if (((uni1 != null && uni1[0].Unit == uni[0].Unit && (uni1.Count < uni.Count)) && uni[uni1.Count].Pow == -1 && uni1[uni1.Count - 1].EndToken.Next != null) && uni1[uni1.Count - 1].EndToken.Next.IsCharOf("/\\"))
                    {
                        NumbersWithUnitToken num2 = _tryParse(uni1[uni1.Count - 1].EndToken.Next.Next, addUnits, false, false, false);
                        if (num2 != null && num2.Units != null && num2.Units[0].Unit == uni[uni1.Count].Unit)
                        {
                            res.Units    = uni1;
                            res.DivNum   = num2;
                            res.EndToken = num2.EndToken;
                        }
                    }
                }
            }
            res.WHL = whd;
            if (dty != DiapTyp.Undefined)
            {
                if (dty == DiapTyp.Ge || dty == DiapTyp.From)
                {
                    res.FromInclude = true;
                    res.FromVal     = rval;
                }
                else if (dty == DiapTyp.Gt)
                {
                    res.FromInclude = false;
                    res.FromVal     = rval;
                }
                else if (dty == DiapTyp.Le || dty == DiapTyp.To)
                {
                    res.ToInclude = true;
                    res.ToVal     = rval;
                }
                else if (dty == DiapTyp.Ls)
                {
                    res.ToInclude = false;
                    res.ToVal     = rval;
                }
            }
            bool isSecondMax = false;

            if (!second)
            {
                int iii = 0;
                ttt = _isMinOrMax(t, ref iii);
                if (ttt != null && iii > 0)
                {
                    isSecondMax = true;
                    t           = ttt.Next;
                }
            }
            NumbersWithUnitToken next = (second || plusminus || ((t != null && ((t.IsTableControlChar || t.IsNewlineBefore)))) ? null : _tryParse(t, addUnits, true, false, canBeNan));

            if (next != null && (t.Previous is Pullenti.Ner.NumberToken))
            {
                if (MeasureHelper.IsMultChar((t.Previous as Pullenti.Ner.NumberToken).EndToken))
                {
                    next = null;
                }
            }
            if (next != null && ((next.ToVal != null || next.SingleVal != null)) && next.FromVal == null)
            {
                if ((((next.BeginToken.IsChar('+') && next.SingleVal != null && !double.IsNaN(next.SingleVal.Value)) && next.EndToken.Next != null && next.EndToken.Next.IsCharOf("\\/")) && next.EndToken.Next.Next != null && next.EndToken.Next.Next.IsHiphen) && !hasKeyw && !double.IsNaN(rval))
                {
                    NumbersWithUnitToken next2 = _tryParse(next.EndToken.Next.Next.Next, addUnits, true, false, false);
                    if (next2 != null && next2.SingleVal != null && !double.IsNaN(next2.SingleVal.Value))
                    {
                        res.FromVal     = rval - next2.SingleVal.Value;
                        res.FromInclude = true;
                        res.ToVal       = rval + next.SingleVal.Value;
                        res.ToInclude   = true;
                        if (next2.Units != null && res.Units.Count == 0)
                        {
                            res.Units = next2.Units;
                        }
                        res.EndToken = next2.EndToken;
                        return(res);
                    }
                }
                if (next.Units.Count > 0)
                {
                    if (res.Units.Count == 0)
                    {
                        res.Units = next.Units;
                    }
                    else if (!UnitToken.CanBeEquals(res.Units, next.Units))
                    {
                        next = null;
                    }
                }
                else if (res.Units.Count > 0 && !unitBefore && !next.PlusMinusPercent)
                {
                    next = null;
                }
                if (next != null)
                {
                    res.EndToken = next.EndToken;
                }
                if (next != null && next.ToVal != null)
                {
                    res.ToVal     = next.ToVal;
                    res.ToInclude = next.ToInclude;
                }
                else if (next != null && next.SingleVal != null)
                {
                    if (next.BeginToken.IsCharOf("/\\"))
                    {
                        res.DivNum    = next;
                        res.SingleVal = rval;
                        return(res);
                    }
                    else if (next.PlusMinusPercent)
                    {
                        res.SingleVal        = rval;
                        res.PlusMinus        = next.SingleVal;
                        res.PlusMinusPercent = true;
                        res.ToInclude        = true;
                    }
                    else
                    {
                        res.ToVal     = next.SingleVal;
                        res.ToInclude = true;
                    }
                }
                if (next != null)
                {
                    if (res.FromVal == null)
                    {
                        res.FromVal     = rval;
                        res.FromInclude = true;
                    }
                    return(res);
                }
            }
            else if ((next != null && next.FromVal != null && next.ToVal != null) && next.ToVal.Value == (-next.FromVal.Value))
            {
                if (next.Units.Count == 1 && next.Units[0].Unit == UnitsHelper.uPercent && res.Units.Count > 0)
                {
                    res.SingleVal        = rval;
                    res.PlusMinus        = next.ToVal.Value;
                    res.PlusMinusPercent = true;
                    res.EndToken         = next.EndToken;
                    return(res);
                }
                if (next.Units.Count == 0)
                {
                    res.SingleVal = rval;
                    res.PlusMinus = next.ToVal.Value;
                    res.EndToken  = next.EndToken;
                    return(res);
                }
                res.FromVal     = next.FromVal + rval;
                res.FromInclude = true;
                res.ToVal       = next.ToVal + rval;
                res.ToInclude   = true;
                res.EndToken    = next.EndToken;
                if (next.Units.Count > 0)
                {
                    res.Units = next.Units;
                }
                return(res);
            }
            if (dty == DiapTyp.Undefined)
            {
                if (plusminus && ((!res.PlusMinusPercent || !second)))
                {
                    res.FromInclude = true;
                    res.FromVal     = -rval;
                    res.ToInclude   = true;
                    res.ToVal       = rval;
                }
                else
                {
                    res.SingleVal        = rval;
                    res.PlusMinusPercent = plusminus;
                }
            }
            if (isAge)
            {
                res.IsAge = true;
            }
            return(res);
        }