Esempio n. 1
0
        public static WeaponItemToken TryParse(Pullenti.Ner.Token t, WeaponItemToken prev, bool afterConj, bool attachHigh = false)
        {
            WeaponItemToken res = _TryParse(t, prev, afterConj, attachHigh);

            if (res == null)
            {
                Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                if (npt != null && npt.Noun.BeginChar > npt.BeginChar)
                {
                    res = _TryParse(npt.Noun.BeginToken, prev, afterConj, attachHigh);
                    if (res != null)
                    {
                        if (res.Typ == Typs.Noun)
                        {
                            string str = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
                            if (str == "РУЧНОЙ ГРАНАТ")
                            {
                                str = "РУЧНАЯ ГРАНАТА";
                            }
                            if (((str ?? "")).EndsWith(res.Value))
                            {
                                if (res.AltValue == null)
                                {
                                    res.AltValue = str;
                                }
                                else
                                {
                                    str          = str.Substring(0, str.Length - res.Value.Length).Trim();
                                    res.AltValue = string.Format("{0} {1}", str, res.AltValue);
                                }
                                res.BeginToken = t;
                                return(res);
                            }
                        }
                    }
                }
                return(null);
            }
            if (res.Typ == Typs.Name)
            {
                Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(res.EndToken.Next, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                if (br != null && br.IsChar('('))
                {
                    string alt = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(br, Pullenti.Ner.Core.GetTextAttr.No);
                    if (Pullenti.Ner.Core.MiscHelper.CanBeEqualCyrAndLatSS(res.Value, alt))
                    {
                        res.AltValue = alt;
                        res.EndToken = br.EndToken;
                    }
                }
            }
            return(res);
        }
Esempio n. 2
0
        public static List <WeaponItemToken> TryParseList(Pullenti.Ner.Token t, int maxCount = 10)
        {
            WeaponItemToken tr = TryParse(t, null, false, false);

            if (tr == null)
            {
                return(null);
            }
            if (tr.Typ == Typs.Class || tr.Typ == Typs.Date)
            {
                return(null);
            }
            WeaponItemToken        tr0 = tr;
            List <WeaponItemToken> res = new List <WeaponItemToken>();

            if (tr.InnerTokens.Count > 0)
            {
                res.AddRange(tr.InnerTokens);
                if (res[0].BeginChar > tr.BeginChar)
                {
                    res[0].BeginToken = tr.BeginToken;
                }
            }
            res.Add(tr);
            t = tr.EndToken.Next;
            if (tr.Typ == Typs.Noun)
            {
                for (; t != null; t = t.Next)
                {
                    if (t.IsChar(':') || t.IsHiphen)
                    {
                    }
                    else
                    {
                        break;
                    }
                }
            }
            bool andConj = false;

            for (; t != null; t = t.Next)
            {
                if (maxCount > 0 && res.Count >= maxCount)
                {
                    break;
                }
                if (t.IsChar(':'))
                {
                    continue;
                }
                if (tr0.Typ == Typs.Noun)
                {
                    if (t.IsHiphen && t.Next != null)
                    {
                        t = t.Next;
                    }
                }
                tr = TryParse(t, tr0, false, false);
                if (tr == null)
                {
                    if (Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(t, true, null, false) && t.Next != null)
                    {
                        if (tr0.Typ == Typs.Model || tr0.Typ == Typs.Brand)
                        {
                            Pullenti.Ner.Token tt1 = t.Next;
                            if (tt1 != null && tt1.IsComma)
                            {
                                tt1 = tt1.Next;
                            }
                            tr = TryParse(tt1, tr0, false, false);
                        }
                    }
                }
                if (tr == null && (t is Pullenti.Ner.ReferentToken))
                {
                    Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken;
                    if (rt.BeginToken == rt.EndToken && (rt.BeginToken is Pullenti.Ner.TextToken))
                    {
                        tr = TryParse(rt.BeginToken, tr0, false, false);
                        if (tr != null && tr.BeginToken == tr.EndToken)
                        {
                            tr.BeginToken = (tr.EndToken = t);
                        }
                    }
                }
                if (tr == null && t.IsChar('('))
                {
                    Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                    if (br != null)
                    {
                        Pullenti.Ner.Token tt = br.EndToken.Next;
                        if (tt != null && tt.IsComma)
                        {
                            tt = tt.Next;
                        }
                        tr = TryParse(tt, tr0, false, false);
                        if (tr != null && tr.Typ == Typs.Number)
                        {
                        }
                        else
                        {
                            tr = null;
                        }
                    }
                }
                if (tr == null && t.IsHiphen)
                {
                    if (tr0.Typ == Typs.Brand || tr0.Typ == Typs.Model)
                    {
                        tr = TryParse(t.Next, tr0, false, false);
                    }
                }
                if (tr == null && t.IsComma)
                {
                    if ((tr0.Typ == Typs.Name || tr0.Typ == Typs.Brand || tr0.Typ == Typs.Model) || tr0.Typ == Typs.Class || tr0.Typ == Typs.Date)
                    {
                        tr = TryParse(t.Next, tr0, true, false);
                        if (tr != null)
                        {
                            if (tr.Typ == Typs.Number)
                            {
                            }
                            else
                            {
                                tr = null;
                            }
                        }
                    }
                }
                if (tr == null)
                {
                    break;
                }
                if (t.IsNewlineBefore)
                {
                    if (tr.Typ != Typs.Number)
                    {
                        break;
                    }
                }
                if (tr.InnerTokens.Count > 0)
                {
                    res.AddRange(tr.InnerTokens);
                }
                res.Add(tr);
                tr0 = tr;
                t   = tr.EndToken;
                if (andConj)
                {
                    break;
                }
            }
            for (int i = 0; i < (res.Count - 1); i++)
            {
                if (res[i].Typ == Typs.Model && res[i + 1].Typ == Typs.Model)
                {
                    res[i].EndToken = res[i + 1].EndToken;
                    res[i].Value    = string.Format("{0}{1}{2}", res[i].Value, (res[i].EndToken.Next != null && res[i].EndToken.Next.IsHiphen ? '-' : ' '), res[i + 1].Value);
                    res.RemoveAt(i + 1);
                    i--;
                }
            }
            return(res);
        }
Esempio n. 3
0
        static WeaponItemToken _TryParse(Pullenti.Ner.Token t, WeaponItemToken prev, bool afterConj, bool attachHigh = false)
        {
            if (t == null)
            {
                return(null);
            }
            if (Pullenti.Ner.Core.BracketHelper.IsBracket(t, true))
            {
                WeaponItemToken wit = _TryParse(t.Next, prev, afterConj, attachHigh);
                if (wit != null)
                {
                    if (wit.EndToken.Next == null)
                    {
                        wit.BeginToken = t;
                        return(wit);
                    }
                    if (Pullenti.Ner.Core.BracketHelper.IsBracket(wit.EndToken.Next, true))
                    {
                        wit.BeginToken = t;
                        wit.EndToken   = wit.EndToken.Next;
                        return(wit);
                    }
                }
            }
            Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
            if (tok != null)
            {
                WeaponItemToken res = new WeaponItemToken(t, tok.EndToken);
                res.Typ = (Typs)tok.Termin.Tag;
                if (res.Typ == Typs.Noun)
                {
                    res.Value = tok.Termin.CanonicText;
                    if (tok.Termin.Tag2 != null)
                    {
                        res.IsDoubt = true;
                    }
                    for (Pullenti.Ner.Token tt = res.EndToken.Next; tt != null; tt = tt.Next)
                    {
                        if (tt.WhitespacesBeforeCount > 2)
                        {
                            break;
                        }
                        WeaponItemToken wit = _TryParse(tt, null, false, false);
                        if (wit != null)
                        {
                            if (wit.Typ == Typs.Brand)
                            {
                                res.InnerTokens.Add(wit);
                                res.EndToken = (tt = wit.EndToken);
                                continue;
                            }
                            break;
                        }
                        if (!(tt is Pullenti.Ner.TextToken))
                        {
                            break;
                        }
                        Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary();
                        if (mc == Pullenti.Morph.MorphClass.Adjective)
                        {
                            if (res.AltValue == null)
                            {
                                res.AltValue = res.Value;
                            }
                            if (res.AltValue.EndsWith(res.Value))
                            {
                                res.AltValue = res.AltValue.Substring(0, res.AltValue.Length - res.Value.Length);
                            }
                            res.AltValue = string.Format("{0}{1} {2}", res.AltValue, (tt as Pullenti.Ner.TextToken).Term, res.Value);
                            res.EndToken = tt;
                            continue;
                        }
                        break;
                    }
                    return(res);
                }
                if (res.Typ == Typs.Brand || res.Typ == Typs.Name)
                {
                    res.Value = tok.Termin.CanonicText;
                    return(res);
                }
                if (res.Typ == Typs.Model)
                {
                    res.Value = tok.Termin.CanonicText;
                    if (tok.Termin.Tag2 is List <Pullenti.Ner.Core.Termin> )
                    {
                        List <Pullenti.Ner.Core.Termin> li = tok.Termin.Tag2 as List <Pullenti.Ner.Core.Termin>;
                        foreach (Pullenti.Ner.Core.Termin to in li)
                        {
                            WeaponItemToken wit = new WeaponItemToken(t, tok.EndToken)
                            {
                                Typ = (Typs)to.Tag, Value = to.CanonicText, IsInternal = tok.BeginToken == tok.EndToken
                            };
                            res.InnerTokens.Add(wit);
                            if (to.AdditionalVars != null && to.AdditionalVars.Count > 0)
                            {
                                wit.AltValue = to.AdditionalVars[0].CanonicText;
                            }
                        }
                    }
                    res._correctModel();
                    return(res);
                }
            }
            Pullenti.Ner.Token nnn = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(t);
            if (nnn != null)
            {
                Pullenti.Ner.Transport.Internal.TransItemToken tit = Pullenti.Ner.Transport.Internal.TransItemToken._attachNumber(nnn, true);
                if (tit != null)
                {
                    WeaponItemToken res = new WeaponItemToken(t, tit.EndToken)
                    {
                        Typ = Typs.Number
                    };
                    res.Value    = tit.Value;
                    res.AltValue = tit.AltValue;
                    return(res);
                }
            }
            if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && t.Chars.IsAllUpper) && (t.LengthChar < 4))
            {
                if ((t.Next != null && ((t.Next.IsHiphen || t.Next.IsChar('.'))) && (t.Next.WhitespacesAfterCount < 2)) && (t.Next.Next is Pullenti.Ner.NumberToken))
                {
                    WeaponItemToken res = new WeaponItemToken(t, t.Next)
                    {
                        Typ = Typs.Model, IsDoubt = true
                    };
                    res.Value = (t as Pullenti.Ner.TextToken).Term;
                    res._correctModel();
                    return(res);
                }
                if ((t.Next is Pullenti.Ner.NumberToken) && !t.IsWhitespaceAfter)
                {
                    WeaponItemToken res = new WeaponItemToken(t, t)
                    {
                        Typ = Typs.Model, IsDoubt = true
                    };
                    res.Value = (t as Pullenti.Ner.TextToken).Term;
                    res._correctModel();
                    return(res);
                }
                if ((t as Pullenti.Ner.TextToken).Term == "СП" && (t.WhitespacesAfterCount < 3) && (t.Next is Pullenti.Ner.TextToken))
                {
                    WeaponItemToken pp = _TryParse(t.Next, null, false, false);
                    if (pp != null && ((pp.Typ == Typs.Model || pp.Typ == Typs.Brand)))
                    {
                        WeaponItemToken res = new WeaponItemToken(t, t)
                        {
                            Typ = Typs.Noun
                        };
                        res.Value    = "ПИСТОЛЕТ";
                        res.AltValue = "СЛУЖЕБНЫЙ ПИСТОЛЕТ";
                        return(res);
                    }
                }
            }
            if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && !t.Chars.IsAllLower) && t.LengthChar > 2)
            {
                bool ok = false;
                if (prev != null && ((prev.Typ == Typs.Noun || prev.Typ == Typs.Model || prev.Typ == Typs.Brand)))
                {
                    ok = true;
                }
                else if (prev == null && t.Previous != null && t.Previous.IsCommaAnd)
                {
                    ok = true;
                }
                if (ok)
                {
                    WeaponItemToken res = new WeaponItemToken(t, t)
                    {
                        Typ = Typs.Name, IsDoubt = true
                    };
                    res.Value = (t as Pullenti.Ner.TextToken).Term;
                    if ((t.Next != null && t.Next.IsHiphen && (t.Next.Next is Pullenti.Ner.TextToken)) && t.Next.Next.Chars == t.Chars)
                    {
                        res.Value    = string.Format("{0}-{1}", res.Value, (t.Next.Next as Pullenti.Ner.TextToken).Term);
                        res.EndToken = t.Next.Next;
                    }
                    if (prev != null && prev.Typ == Typs.Noun)
                    {
                        res.Typ = Typs.Brand;
                    }
                    if (res.EndToken.Next != null && res.EndToken.Next.IsHiphen && (res.EndToken.Next.Next is Pullenti.Ner.NumberToken))
                    {
                        res.Typ = Typs.Model;
                        res._correctModel();
                    }
                    else if (!res.EndToken.IsWhitespaceAfter && (res.EndToken.Next is Pullenti.Ner.NumberToken))
                    {
                        res.Typ = Typs.Model;
                        res._correctModel();
                    }
                    return(res);
                }
            }
            if (t.IsValue("МАРКА", null))
            {
                WeaponItemToken res = _TryParse(t.Next, prev, afterConj, false);
                if (res != null && res.Typ == Typs.Brand)
                {
                    res.BeginToken = t;
                    return(res);
                }
                if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Next, true, false))
                {
                    Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t.Next, Pullenti.Ner.Core.BracketParseAttr.No, 100);
                    if (br != null)
                    {
                        return new WeaponItemToken(t, br.EndToken)
                               {
                                   Typ = Typs.Brand, Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(br.BeginToken, br.EndToken, Pullenti.Ner.Core.GetTextAttr.No)
                               }
                    }
                    ;
                }
                if (((t is Pullenti.Ner.TextToken) && (t.Next is Pullenti.Ner.TextToken) && t.Next.LengthChar > 1) && !t.Next.Chars.IsAllLower)
                {
                    return new WeaponItemToken(t, t.Next)
                           {
                               Typ = Typs.Brand, Value = (t as Pullenti.Ner.TextToken).Term
                           }
                }
                ;
            }
            if (t.IsValue("КАЛИБР", "КАЛІБР"))
            {
                Pullenti.Ner.Token tt1 = t.Next;
                if (tt1 != null && ((tt1.IsHiphen || tt1.IsChar(':'))))
                {
                    tt1 = tt1.Next;
                }
                Pullenti.Ner.Measure.Internal.NumbersWithUnitToken num = Pullenti.Ner.Measure.Internal.NumbersWithUnitToken.TryParse(tt1, null, false, false, false, false);
                if (num != null && num.SingleVal != null)
                {
                    return new WeaponItemToken(t, num.EndToken)
                           {
                               Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value)
                           }
                }
                ;
            }
            if (t is Pullenti.Ner.NumberToken)
            {
                Pullenti.Ner.Measure.Internal.NumbersWithUnitToken num = Pullenti.Ner.Measure.Internal.NumbersWithUnitToken.TryParse(t, null, false, false, false, false);
                if (num != null && num.SingleVal != null)
                {
                    if (num.Units.Count == 1 && num.Units[0].Unit != null && num.Units[0].Unit.NameCyr == "мм")
                    {
                        return new WeaponItemToken(t, num.EndToken)
                               {
                                   Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value)
                               }
                    }
                    ;

                    if (num.EndToken.Next != null && num.EndToken.Next.IsValue("КАЛИБР", "КАЛІБР"))
                    {
                        return new WeaponItemToken(t, num.EndToken.Next)
                               {
                                   Typ = Typs.Caliber, Value = Pullenti.Ner.Core.NumberHelper.DoubleToString(num.SingleVal.Value)
                               }
                    }
                    ;
                }
            }
            if (t.IsValue("ПРОИЗВОДСТВО", "ВИРОБНИЦТВО"))
            {
                Pullenti.Ner.Token tt1 = t.Next;
                if (tt1 != null && ((tt1.IsHiphen || tt1.IsChar(':'))))
                {
                    tt1 = tt1.Next;
                }
                if (tt1 is Pullenti.Ner.ReferentToken)
                {
                    if ((tt1.GetReferent() is Pullenti.Ner.Org.OrganizationReferent) || (tt1.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
                    {
                        return new WeaponItemToken(t, tt1)
                               {
                                   Typ = Typs.Developer, Ref = tt1.GetReferent()
                               }
                    }
                    ;
                }
            }
            return(null);
        }

        void _correctModel()
        {
            Pullenti.Ner.Token tt = EndToken.Next;
            if (tt == null || tt.WhitespacesBeforeCount > 2)
            {
                return;
            }
            if (tt.IsValue(":\\/.", null) || tt.IsHiphen)
            {
                tt = tt.Next;
            }
            if (tt is Pullenti.Ner.NumberToken)
            {
                StringBuilder tmp = new StringBuilder();
                tmp.Append((tt as Pullenti.Ner.NumberToken).Value);
                bool isLat = Pullenti.Morph.LanguageHelper.IsLatinChar(Value[0]);
                EndToken = tt;
                for (tt = tt.Next; tt != null; tt = tt.Next)
                {
                    if ((tt is Pullenti.Ner.TextToken) && tt.LengthChar == 1 && tt.Chars.IsLetter)
                    {
                        if (!tt.IsWhitespaceBefore || ((tt.Previous != null && tt.Previous.IsHiphen)))
                        {
                            char ch = (tt as Pullenti.Ner.TextToken).Term[0];
                            EndToken = tt;
                            char ch2 = (char)0;
                            if (Pullenti.Morph.LanguageHelper.IsLatinChar(ch) && !isLat)
                            {
                                ch2 = Pullenti.Morph.LanguageHelper.GetCyrForLat(ch);
                                if (ch2 != ((char)0))
                                {
                                    ch = ch2;
                                }
                            }
                            else if (Pullenti.Morph.LanguageHelper.IsCyrillicChar(ch) && isLat)
                            {
                                ch2 = Pullenti.Morph.LanguageHelper.GetLatForCyr(ch);
                                if (ch2 != ((char)0))
                                {
                                    ch = ch2;
                                }
                            }
                            tmp.Append(ch);
                            continue;
                        }
                    }
                    break;
                }
                Value    = string.Format("{0}-{1}", Value, tmp.ToString());
                AltValue = Pullenti.Ner.Core.MiscHelper.CreateCyrLatAlternative(Value);
            }
            if (!EndToken.IsWhitespaceAfter && EndToken.Next != null && ((EndToken.Next.IsHiphen || EndToken.Next.IsCharOf("\\/"))))
            {
                if (!EndToken.Next.IsWhitespaceAfter && (EndToken.Next.Next is Pullenti.Ner.NumberToken))
                {
                    EndToken = EndToken.Next.Next;
                    Value    = string.Format("{0}-{1}", Value, (EndToken as Pullenti.Ner.NumberToken).Value);
                    if (AltValue != null)
                    {
                        AltValue = string.Format("{0}-{1}", AltValue, (EndToken as Pullenti.Ner.NumberToken).Value);
                    }
                }
            }
        }