예제 #1
0
 public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
 {
     Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
     for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
     {
         Pullenti.Ner.ReferentToken rt = null;
         if (t.Chars.IsLetter)
         {
             Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
             if (tok != null)
             {
                 Pullenti.Ner.Token tt = tok.EndToken.Next;
                 if (tt != null && tt.IsChar(':'))
                 {
                     tt = tt.Next;
                 }
                 rt = this.TryAttach(tt, true);
                 if (rt != null)
                 {
                     rt.BeginToken = t;
                 }
             }
         }
         if (rt == null && (((t is Pullenti.Ner.ReferentToken) || t.IsNewlineBefore)))
         {
             rt = this.TryAttach(t, false);
         }
         if (rt != null)
         {
             rt.Referent = ad.RegisterReferent(rt.Referent);
             kit.EmbedToken(rt);
             t = rt;
         }
     }
 }
예제 #2
0
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
            int delta = 100000;
            int parts = (((kit.Sofa.Text.Length + delta) - 1)) / delta;

            if (parts == 0)
            {
                parts = 1;
            }
            int cur     = 0;
            int nextPos = 0;
            List <GoodReferent> goods = new List <GoodReferent>();

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                if (!t.IsNewlineBefore)
                {
                    continue;
                }
                if (t.BeginChar > nextPos)
                {
                    nextPos += delta;
                    cur++;
                    if (!this.OnProgress(cur, parts, kit))
                    {
                        break;
                    }
                }
                if (!t.Chars.IsLetter && t.Next != null)
                {
                    t = t.Next;
                }
                List <Pullenti.Ner.ReferentToken> rts = Pullenti.Ner.Goods.Internal.GoodAttrToken.TryParseList(t);
                if (rts == null || rts.Count == 0)
                {
                    continue;
                }
                GoodReferent good = new GoodReferent();
                foreach (Pullenti.Ner.ReferentToken rt in rts)
                {
                    rt.Referent = ad.RegisterReferent(rt.Referent);
                    if (good.FindSlot(GoodReferent.ATTR_ATTR, rt.Referent, true) == null)
                    {
                        good.AddSlot(GoodReferent.ATTR_ATTR, rt.Referent, false, 0);
                    }
                    kit.EmbedToken(rt);
                }
                goods.Add(good);
                Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(good, rts[0], rts[rts.Count - 1]);
                kit.EmbedToken(rt0);
                t = rt0;
            }
            foreach (GoodReferent g in goods)
            {
                ad.Referents.Add(g);
            }
        }
예제 #3
0
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
            Pullenti.Ner.Token             et;
            TitlePageReferent tpr = _process(kit.FirstToken, 0, kit, out et);

            if (tpr != null)
            {
                ad.RegisterReferent(tpr);
            }
        }
예제 #4
0
 public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
 {
     Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
     for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
     {
         Pullenti.Ner.ReferentToken mon = TryParse(t);
         if (mon != null)
         {
             mon.Referent = ad.RegisterReferent(mon.Referent);
             kit.EmbedToken(mon);
             t = mon;
             continue;
         }
     }
 }
예제 #5
0
        public Pullenti.Ner.Measure.UnitReferent CreateReferentWithRegister(Pullenti.Ner.Core.AnalyzerData ad)
        {
            Pullenti.Ner.Measure.UnitReferent ur = ExtOnto;
            if (Unit != null)
            {
                ur = _createReferent(Unit);
            }
            else if (UnknownName != null)
            {
                ur = new Pullenti.Ner.Measure.UnitReferent();
                ur.AddSlot(Pullenti.Ner.Measure.UnitReferent.ATTR_NAME, UnknownName, false, 0);
                ur.IsUnknown = true;
            }
            if (Pow != 1)
            {
                ur.AddSlot(Pullenti.Ner.Measure.UnitReferent.ATTR_POW, Pow.ToString(), false, 0);
            }
            List <Pullenti.Ner.Measure.UnitReferent> owns = new List <Pullenti.Ner.Measure.UnitReferent>();

            owns.Add(ur);
            if (Unit != null)
            {
                for (Unit uu = Unit.BaseUnit; uu != null; uu = uu.BaseUnit)
                {
                    Pullenti.Ner.Measure.UnitReferent ur0 = _createReferent(uu);
                    owns.Add(ur0);
                }
            }
            for (int i = owns.Count - 1; i >= 0; i--)
            {
                if (ad != null)
                {
                    owns[i] = ad.RegisterReferent(owns[i]) as Pullenti.Ner.Measure.UnitReferent;
                }
                if (i > 0)
                {
                    owns[i - 1].AddSlot(Pullenti.Ner.Measure.UnitReferent.ATTR_BASEUNIT, owns[i], false, 0);
                    if ((owns[i - 1].Tag as Unit).BaseMultiplier != 0)
                    {
                        owns[i - 1].AddSlot(Pullenti.Ner.Measure.UnitReferent.ATTR_BASEFACTOR, Pullenti.Ner.Core.NumberHelper.DoubleToString((owns[i - 1].Tag as Unit).BaseMultiplier), false, 0);
                    }
                }
            }
            return(owns[0]);
        }
예제 #6
0
 public static void Process(Pullenti.Ner.Core.AnalysisKit kit, Pullenti.Ner.Core.AnalyzerData ad)
 {
     for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
     {
         if (!Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t))
         {
             continue;
         }
         Pullenti.Ner.ReferentToken rt = TryParseThesis(t);
         if (rt == null)
         {
             continue;
         }
         rt.Referent = ad.RegisterReferent(rt.Referent);
         kit.EmbedToken(rt);
         t = rt;
     }
 }
예제 #7
0
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
            int delta = 100000;
            int parts = (((kit.Sofa.Text.Length + delta) - 1)) / delta;

            if (parts == 0)
            {
                parts = 1;
            }
            int cur     = 0;
            int nextPos = 0;

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                if (t.BeginChar > nextPos)
                {
                    nextPos += delta;
                    cur++;
                    if (!this.OnProgress(cur, parts, kit))
                    {
                        break;
                    }
                }
                Pullenti.Ner.Goods.Internal.GoodAttrToken at = Pullenti.Ner.Goods.Internal.GoodAttrToken.TryParse(t, null, true, true);
                if (at == null)
                {
                    continue;
                }
                GoodAttributeReferent attr = at._createAttr();
                if (attr == null)
                {
                    t = at.EndToken;
                    continue;
                }
                Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(attr, at.BeginToken, at.EndToken);
                rt.Referent = ad.RegisterReferent(attr);
                kit.EmbedToken(rt);
                t = rt;
            }
        }
예제 #8
0
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            List <Pullenti.Ner.Mail.Internal.MailLine> lines = new List <Pullenti.Ner.Mail.Internal.MailLine>();

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0);
                if (ml == null)
                {
                    continue;
                }
                if (lines.Count == 91)
                {
                }
                lines.Add(ml);
                t = ml.EndToken;
            }
            if (lines.Count == 0)
            {
                return;
            }
            int i;
            List <List <Pullenti.Ner.Mail.Internal.MailLine> > blocks = new List <List <Pullenti.Ner.Mail.Internal.MailLine> >();
            List <Pullenti.Ner.Mail.Internal.MailLine>         blk    = null;

            for (i = 0; i < lines.Count; i++)
            {
                Pullenti.Ner.Mail.Internal.MailLine ml = lines[i];
                if (ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                {
                    bool isNew = ml.MustBeFirstLine || i == 0;
                    if (((i + 2) < lines.Count) && (((lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From || lines[i + 2].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From || lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello) || lines[i + 2].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello)))
                    {
                        isNew = true;
                    }
                    if (!isNew)
                    {
                        for (int j = i - 1; j >= 0; j--)
                        {
                            if (lines[j].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined)
                            {
                                if (lines[j].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards)
                                {
                                    isNew = true;
                                }
                                break;
                            }
                        }
                    }
                    if (!isNew)
                    {
                        for (Pullenti.Ner.Token tt = ml.BeginToken; tt != null && tt.EndChar <= ml.EndChar; tt = tt.Next)
                        {
                            if (tt.GetReferent() != null)
                            {
                                if (tt.GetReferent().TypeName == "DATE" || tt.GetReferent().TypeName == "URI")
                                {
                                    isNew = true;
                                }
                            }
                        }
                    }
                    if (isNew)
                    {
                        blk = new List <Pullenti.Ner.Mail.Internal.MailLine>();
                        blocks.Add(blk);
                        for (; i < lines.Count; i++)
                        {
                            if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                            {
                                if (blk.Count > 0 && lines[i].MustBeFirstLine)
                                {
                                    break;
                                }
                                blk.Add(lines[i]);
                            }
                            else if (((i + 1) < lines.Count) && lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                            {
                                int j;
                                for (j = 0; j < blk.Count; j++)
                                {
                                    if (blk[j].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                                    {
                                        if (blk[j].IsRealFrom || blk[j].MustBeFirstLine || blk[j].MailAddr != null)
                                        {
                                            break;
                                        }
                                    }
                                }
                                if (j >= blk.Count)
                                {
                                    blk.Add(lines[i]);
                                    continue;
                                }
                                bool ok = false;
                                for (j = i + 1; j < lines.Count; j++)
                                {
                                    if (lines[j].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                                    {
                                        break;
                                    }
                                    if (lines[j].IsRealFrom || lines[j].MustBeFirstLine)
                                    {
                                        ok = true;
                                        break;
                                    }
                                    if (lines[j].MailAddr != null)
                                    {
                                        ok = true;
                                        break;
                                    }
                                }
                                if (ok)
                                {
                                    break;
                                }
                                blk.Add(lines[i]);
                            }
                            else
                            {
                                break;
                            }
                        }
                        i--;
                        continue;
                    }
                }
                if (blk == null)
                {
                    blocks.Add((blk = new List <Pullenti.Ner.Mail.Internal.MailLine>()));
                }
                blk.Add(lines[i]);
            }
            if (blocks.Count == 0)
            {
                return;
            }
            Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
            for (int j = 0; j < blocks.Count; j++)
            {
                lines = blocks[j];
                if (lines.Count == 0)
                {
                    continue;
                }
                i = 0;
                if (lines[0].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                {
                    Pullenti.Ner.Token t1 = lines[0].EndToken;
                    for (; i < lines.Count; i++)
                    {
                        if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                        {
                            t1 = lines[i].EndToken;
                        }
                        else if (((i + 1) < lines.Count) && lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                        {
                        }
                        else
                        {
                            break;
                        }
                    }
                    MailReferent mail = new MailReferent()
                    {
                        Kind = MailKind.Head
                    };
                    Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[0].BeginToken, t1);
                    mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister);
                    ad.RegisterReferent(mail);
                    mail.AddOccurenceOfRefTok(mt);
                }
                int i0 = i;
                Pullenti.Ner.Token t2 = null;
                int err = 0;
                for (i = lines.Count - 1; i >= i0; i--)
                {
                    Pullenti.Ner.Mail.Internal.MailLine li = lines[i];
                    if (li.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards)
                    {
                        t2 = lines[i].BeginToken;
                        for (--i; i >= i0; i--)
                        {
                            if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards && (lines[i].Words < 2))
                            {
                                t2 = lines[i].BeginToken;
                            }
                            else if ((i > i0 && (lines[i].Words < 3) && lines[i - 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards) && (lines[i - 1].Words < 2))
                            {
                                i--;
                                t2 = lines[i].BeginToken;
                            }
                            else
                            {
                                break;
                            }
                        }
                        break;
                    }
                    if (li.Refs.Count > 0 && (li.Words < 3) && i > i0)
                    {
                        err = 0;
                        t2  = li.BeginToken;
                        continue;
                    }
                    if (li.Words > 10)
                    {
                        t2 = null;
                        continue;
                    }
                    if (li.Words > 2)
                    {
                        if ((++err) > 2)
                        {
                            t2 = null;
                        }
                    }
                }
                if (t2 == null)
                {
                    for (i = lines.Count - 1; i >= i0; i--)
                    {
                        Pullenti.Ner.Mail.Internal.MailLine li = lines[i];
                        if (li.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined)
                        {
                            if (li.Refs.Count > 0 && (li.Refs[0] is Pullenti.Ner.Person.PersonReferent))
                            {
                                if (li.Words == 0 && i > i0)
                                {
                                    t2 = li.BeginToken;
                                    break;
                                }
                            }
                        }
                    }
                }
                for (int ii = i0; ii < lines.Count; ii++)
                {
                    if (lines[ii].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello)
                    {
                        MailReferent mail = new MailReferent()
                        {
                            Kind = MailKind.Hello
                        };
                        Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[i0].BeginToken, lines[ii].EndToken);
                        if (mt.LengthChar > 0)
                        {
                            mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister);
                            ad.RegisterReferent(mail);
                            mail.AddOccurenceOfRefTok(mt);
                            i0 = ii + 1;
                        }
                        break;
                    }
                    else if (lines[ii].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined || lines[ii].Words > 0 || lines[ii].Refs.Count > 0)
                    {
                        break;
                    }
                }
                if (i0 < lines.Count)
                {
                    if (t2 != null && t2.Previous == null)
                    {
                    }
                    else
                    {
                        MailReferent mail = new MailReferent()
                        {
                            Kind = MailKind.Body
                        };
                        Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[i0].BeginToken, (t2 != null && t2.Previous != null ? t2.Previous : lines[lines.Count - 1].EndToken));
                        if (mt.LengthChar > 0)
                        {
                            mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister);
                            ad.RegisterReferent(mail);
                            mail.AddOccurenceOfRefTok(mt);
                        }
                    }
                    if (t2 != null)
                    {
                        MailReferent mail = new MailReferent()
                        {
                            Kind = MailKind.Tail
                        };
                        Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, t2, lines[lines.Count - 1].EndToken);
                        if (mt.LengthChar > 0)
                        {
                            mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister);
                            ad.RegisterReferent(mail);
                            mail.AddOccurenceOfRefTok(mt);
                        }
                        for (i = i0; i < lines.Count; i++)
                        {
                            if (lines[i].BeginChar >= t2.BeginChar)
                            {
                                foreach (Pullenti.Ner.Referent r in lines[i].Refs)
                                {
                                    mail.AddRef(r, 0);
                                }
                            }
                        }
                    }
                }
            }
        }
예제 #9
0
        public List <Pullenti.Ner.ReferentToken> CreateRefenetsTokensWithRegister(Pullenti.Ner.Core.AnalyzerData ad, bool register = true)
        {
            if (Internals.Count == 0 && !Reliable)
            {
                if (Nums.Units.Count == 1 && Nums.Units[0].IsDoubt)
                {
                    if (Nums.Units[0].UnknownName != null)
                    {
                    }
                    else if (Nums.IsNewlineBefore)
                    {
                    }
                    else if (Nums.Units[0].BeginToken.LengthChar > 1 && Nums.Units[0].BeginToken.GetMorphClassInDictionary().IsUndefined)
                    {
                    }
                    else if (Nums.FromVal == null || Nums.ToVal == null)
                    {
                        return(null);
                    }
                }
            }
            List <Pullenti.Ner.ReferentToken> res = new List <Pullenti.Ner.ReferentToken>();

            if (((Nums == null || Nums.PlusMinusPercent)) && Internals.Count > 0)
            {
                List <Pullenti.Ner.ReferentToken> liEx = null;
                if (InternalEx != null)
                {
                    liEx = InternalEx.CreateRefenetsTokensWithRegister(ad, true);
                    if (liEx != null)
                    {
                        res.AddRange(liEx);
                    }
                }
                Pullenti.Ner.Measure.MeasureReferent mr = new Pullenti.Ner.Measure.MeasureReferent();
                string templ0 = "1";
                string templ  = null;
                if (Name != null)
                {
                    mr.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_NAME, Name, false, 0);
                }
                List <Pullenti.Ner.Measure.MeasureReferent> ints = new List <Pullenti.Ner.Measure.MeasureReferent>();
                for (int k = 0; k < Internals.Count; k++)
                {
                    MeasureToken ii = Internals[k];
                    ii.Reliable = true;
                    List <Pullenti.Ner.ReferentToken> li = ii.CreateRefenetsTokensWithRegister(ad, false);
                    if (li == null)
                    {
                        continue;
                    }
                    res.AddRange(li);
                    Pullenti.Ner.Measure.MeasureReferent mr0 = res[res.Count - 1].Referent as Pullenti.Ner.Measure.MeasureReferent;
                    if (liEx != null)
                    {
                        mr0.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_REF, liEx[liEx.Count - 1], false, 0);
                    }
                    if (k == 0 && !IsEmpty)
                    {
                        templ0       = mr0.Template;
                        mr0.Template = "1";
                    }
                    if (ad != null)
                    {
                        mr0 = ad.RegisterReferent(mr0) as Pullenti.Ner.Measure.MeasureReferent;
                    }
                    mr.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_VALUE, mr0, false, 0);
                    ints.Add(mr0);
                    if (templ == null)
                    {
                        templ = "1";
                    }
                    else
                    {
                        int nu = mr.GetStringValues(Pullenti.Ner.Measure.MeasureReferent.ATTR_VALUE).Count;
                        templ = string.Format("{0}{1}{2}", templ, (IsSet ? ", " : " × "), nu);
                    }
                }
                if (IsSet)
                {
                    templ = "{" + templ + "}";
                }
                if (templ0 != "1")
                {
                    templ = templ0.Replace("1", templ);
                }
                if (Nums != null && Nums.PlusMinusPercent && Nums.SingleVal != null)
                {
                    templ = string.Format("[{0} ±{1}%]", templ, Internals.Count + 1);
                    mr.AddValue(Nums.SingleVal.Value);
                }
                mr.Template = templ;
                int  i;
                bool hasLength = false;
                Pullenti.Ner.Measure.UnitReferent uref = null;
                for (i = 0; i < ints.Count; i++)
                {
                    if (ints[i].Kind == Pullenti.Ner.Measure.MeasureKind.Length)
                    {
                        hasLength = true;
                        uref      = ints[i].GetSlotValue(Pullenti.Ner.Measure.MeasureReferent.ATTR_UNIT) as Pullenti.Ner.Measure.UnitReferent;
                    }
                    else if (ints[i].Units.Count > 0)
                    {
                        break;
                    }
                }
                if (ints.Count > 1 && hasLength && uref != null)
                {
                    foreach (Pullenti.Ner.Measure.MeasureReferent ii in ints)
                    {
                        if (ii.FindSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_UNIT, null, true) == null)
                        {
                            ii.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_UNIT, uref, false, 0);
                            ii.Kind = Pullenti.Ner.Measure.MeasureKind.Length;
                        }
                    }
                }
                if (ints.Count == 3)
                {
                    if (ints[0].Kind == Pullenti.Ner.Measure.MeasureKind.Length && ints[1].Kind == Pullenti.Ner.Measure.MeasureKind.Length && ints[2].Kind == Pullenti.Ner.Measure.MeasureKind.Length)
                    {
                        mr.Kind = Pullenti.Ner.Measure.MeasureKind.Volume;
                    }
                    else if (ints[0].Units.Count == 0 && ints[1].Units.Count == 0 && ints[2].Units.Count == 0)
                    {
                        string nam = mr.GetStringValue(Pullenti.Ner.Measure.MeasureReferent.ATTR_NAME);
                        if (nam != null)
                        {
                            if (nam.Contains("РАЗМЕР") || nam.Contains("ГАБАРИТ"))
                            {
                                mr.Kind = Pullenti.Ner.Measure.MeasureKind.Volume;
                            }
                        }
                    }
                }
                if (ints.Count == 2)
                {
                    if (ints[0].Kind == Pullenti.Ner.Measure.MeasureKind.Length && ints[1].Kind == Pullenti.Ner.Measure.MeasureKind.Length)
                    {
                        mr.Kind = Pullenti.Ner.Measure.MeasureKind.Area;
                    }
                }
                if (!IsEmpty)
                {
                    if (ad != null)
                    {
                        mr = ad.RegisterReferent(mr) as Pullenti.Ner.Measure.MeasureReferent;
                    }
                    res.Add(new Pullenti.Ner.ReferentToken(mr, BeginToken, EndToken));
                }
                return(res);
            }
            List <Pullenti.Ner.ReferentToken> re2 = Nums.CreateRefenetsTokensWithRegister(ad, Name, register);

            foreach (MeasureToken ii in Internals)
            {
                List <Pullenti.Ner.ReferentToken> li = ii.CreateRefenetsTokensWithRegister(ad, true);
                if (li == null)
                {
                    continue;
                }
                res.AddRange(li);
                re2[re2.Count - 1].Referent.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_REF, res[res.Count - 1].Referent, false, 0);
            }
            re2[re2.Count - 1].BeginToken = BeginToken;
            re2[re2.Count - 1].EndToken   = EndToken;
            res.AddRange(re2);
            return(res);
        }
예제 #10
0
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData     ad     = kit.GetAnalyzerData(this);
            Pullenti.Ner.Core.TerminCollection models = new Pullenti.Ner.Core.TerminCollection();
            Dictionary <string, List <Pullenti.Ner.Referent> > objsByModel = new Dictionary <string, List <Pullenti.Ner.Referent> >();

            Pullenti.Ner.Core.TerminCollection objByNames = new Pullenti.Ner.Core.TerminCollection();
            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                List <Pullenti.Ner.Weapon.Internal.WeaponItemToken> its = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParseList(t, 10);
                if (its == null)
                {
                    continue;
                }
                List <Pullenti.Ner.ReferentToken> rts = this.TryAttach(its, false);
                if (rts != null)
                {
                    foreach (Pullenti.Ner.ReferentToken rt in rts)
                    {
                        rt.Referent = ad.RegisterReferent(rt.Referent);
                        kit.EmbedToken(rt);
                        t = rt;
                        foreach (Pullenti.Ner.Slot s in rt.Referent.Slots)
                        {
                            if (s.TypeName == WeaponReferent.ATTR_MODEL)
                            {
                                string mod = s.Value.ToString();
                                for (int k = 0; k < 2; k++)
                                {
                                    if (!char.IsDigit(mod[0]))
                                    {
                                        List <Pullenti.Ner.Referent> li;
                                        if (!objsByModel.TryGetValue(mod, out li))
                                        {
                                            objsByModel.Add(mod, (li = new List <Pullenti.Ner.Referent>()));
                                        }
                                        if (!li.Contains(rt.Referent))
                                        {
                                            li.Add(rt.Referent);
                                        }
                                        models.AddString(mod, li, null, false);
                                    }
                                    if (k > 0)
                                    {
                                        break;
                                    }
                                    string brand = rt.Referent.GetStringValue(WeaponReferent.ATTR_BRAND);
                                    if (brand == null)
                                    {
                                        break;
                                    }
                                    mod = string.Format("{0} {1}", brand, mod);
                                }
                            }
                            else if (s.TypeName == WeaponReferent.ATTR_NAME)
                            {
                                objByNames.Add(new Pullenti.Ner.Core.Termin(s.Value.ToString())
                                {
                                    Tag = rt.Referent
                                });
                            }
                        }
                    }
                }
            }
            if (objsByModel.Count == 0 && objByNames.Termins.Count == 0)
            {
                return;
            }
            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 10);
                if (br != null)
                {
                    Pullenti.Ner.Core.TerminToken toks = objByNames.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                    if (toks != null && toks.EndToken.Next == br.EndToken)
                    {
                        Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(toks.Termin.Tag as Pullenti.Ner.Referent, br.BeginToken, br.EndToken);
                        kit.EmbedToken(rt0);
                        t = rt0;
                        continue;
                    }
                }
                if (!(t is Pullenti.Ner.TextToken))
                {
                    continue;
                }
                if (!t.Chars.IsLetter)
                {
                    continue;
                }
                Pullenti.Ner.Core.TerminToken tok = models.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok == null)
                {
                    if (!t.Chars.IsAllLower)
                    {
                        tok = objByNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                    }
                    if (tok == null)
                    {
                        continue;
                    }
                }
                if (!tok.IsWhitespaceAfter)
                {
                    if (tok.EndToken.Next == null || !tok.EndToken.Next.IsCharOf(",.)"))
                    {
                        if (!Pullenti.Ner.Core.BracketHelper.IsBracket(tok.EndToken.Next, false))
                        {
                            continue;
                        }
                    }
                }
                Pullenti.Ner.Referent        tr = null;
                List <Pullenti.Ner.Referent> li = tok.Termin.Tag as List <Pullenti.Ner.Referent>;
                if (li != null && li.Count == 1)
                {
                    tr = li[0];
                }
                else
                {
                    tr = tok.Termin.Tag as Pullenti.Ner.Referent;
                }
                if (tr != null)
                {
                    Pullenti.Ner.Weapon.Internal.WeaponItemToken tit = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParse(tok.BeginToken.Previous, null, false, true);
                    if (tit != null && tit.Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Brand)
                    {
                        tr.AddSlot(WeaponReferent.ATTR_BRAND, tit.Value, false, 0);
                        tok.BeginToken = tit.BeginToken;
                    }
                    Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(tr, tok.BeginToken, tok.EndToken);
                    kit.EmbedToken(rt0);
                    t = rt0;
                    continue;
                }
            }
        }
예제 #11
0
        Pullenti.Ner.Token _addReferents(Pullenti.Ner.Core.AnalyzerData ad, Pullenti.Ner.Token t, int cur, int max)
        {
            if (!(t is Pullenti.Ner.ReferentToken))
            {
                return(t);
            }
            Pullenti.Ner.Referent r = t.GetReferent();
            if (r == null)
            {
                return(t);
            }
            if (r is Pullenti.Ner.Denomination.DenominationReferent)
            {
                Pullenti.Ner.Denomination.DenominationReferent dr = r as Pullenti.Ner.Denomination.DenominationReferent;
                KeywordReferent kref0 = new KeywordReferent()
                {
                    Typ = KeywordType.Referent
                };
                foreach (Pullenti.Ner.Slot s in dr.Slots)
                {
                    if (s.TypeName == Pullenti.Ner.Denomination.DenominationReferent.ATTR_VALUE)
                    {
                        kref0.AddSlot(KeywordReferent.ATTR_NORMAL, s.Value, false, 0);
                    }
                }
                kref0.AddSlot(KeywordReferent.ATTR_REF, dr, false, 0);
                Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref0), t, t);
                t.Kit.EmbedToken(rt0);
                return(rt0);
            }
            if ((r is Pullenti.Ner.Phone.PhoneReferent) || (r is Pullenti.Ner.Uri.UriReferent) || (r is Pullenti.Ner.Bank.BankDataReferent))
            {
                return(t);
            }
            if (r is Pullenti.Ner.Money.MoneyReferent)
            {
                Pullenti.Ner.Money.MoneyReferent mr = r as Pullenti.Ner.Money.MoneyReferent;
                KeywordReferent kref0 = new KeywordReferent()
                {
                    Typ = KeywordType.Object
                };
                kref0.AddSlot(KeywordReferent.ATTR_NORMAL, mr.Currency, false, 0);
                Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref0), t, t);
                t.Kit.EmbedToken(rt0);
                return(rt0);
            }
            if (r.TypeName == "DATE" || r.TypeName == "DATERANGE" || r.TypeName == "BOOKLINKREF")
            {
                return(t);
            }
            for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= t.EndChar; tt = tt.Next)
            {
                if (tt is Pullenti.Ner.ReferentToken)
                {
                    this._addReferents(ad, tt, cur, max);
                }
            }
            KeywordReferent kref = new KeywordReferent()
            {
                Typ = KeywordType.Referent
            };
            string norm = null;

            if (r.TypeName == "GEO")
            {
                norm = r.GetStringValue("ALPHA2");
            }
            if (norm == null)
            {
                norm = r.ToString(true, null, 0);
            }
            if (norm != null)
            {
                kref.AddSlot(KeywordReferent.ATTR_NORMAL, norm.ToUpper(), false, 0);
            }
            kref.AddSlot(KeywordReferent.ATTR_REF, t.GetReferent(), false, 0);
            _setRank(kref, cur, max);
            Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref), t, t);
            t.Kit.EmbedToken(rt1);
            return(rt1);
        }
예제 #12
0
        // Основная функция выделения телефонов
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
            bool hasDenoms = false;

            foreach (Pullenti.Ner.Analyzer a in kit.Processor.Analyzers)
            {
                if ((a is Pullenti.Ner.Denomination.DenominationAnalyzer) && !a.IgnoreThisAnalyzer)
                {
                    hasDenoms = true;
                }
            }
            if (!hasDenoms)
            {
                Pullenti.Ner.Denomination.DenominationAnalyzer a = new Pullenti.Ner.Denomination.DenominationAnalyzer();
                a.Process(kit);
            }
            List <KeywordReferent> li   = new List <KeywordReferent>();
            StringBuilder          tmp  = new StringBuilder();
            List <string>          tmp2 = new List <string>();
            int max = 0;

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                max++;
            }
            int cur = 0;

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next, cur++)
            {
                Pullenti.Ner.Referent r = t.GetReferent();
                if (r != null)
                {
                    t = this._addReferents(ad, t, cur, max);
                    continue;
                }
                if (!(t is Pullenti.Ner.TextToken))
                {
                    continue;
                }
                if (!t.Chars.IsLetter || (t.LengthChar < 3))
                {
                    continue;
                }
                string term = (t as Pullenti.Ner.TextToken).Term;
                if (term == "ЕСТЬ")
                {
                    if ((t.Previous is Pullenti.Ner.TextToken) && t.Previous.Morph.Class.IsVerb)
                    {
                    }
                    else
                    {
                        continue;
                    }
                }
                Pullenti.Ner.Core.NounPhraseToken npt = null;
                npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.AdjectiveCanBeLast | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null);
                if (npt == null)
                {
                    Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                    if (mc.IsVerb && !mc.IsPreposition)
                    {
                        if ((t as Pullenti.Ner.TextToken).IsVerbBe)
                        {
                            continue;
                        }
                        if (t.IsValue("МОЧЬ", null) || t.IsValue("WOULD", null))
                        {
                            continue;
                        }
                        KeywordReferent kref = new KeywordReferent()
                        {
                            Typ = KeywordType.Predicate
                        };
                        string norm = t.GetNormalCaseText(Pullenti.Morph.MorphClass.Verb, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
                        if (norm == null)
                        {
                            norm = (t as Pullenti.Ner.TextToken).Lemma;
                        }
                        if (norm.EndsWith("ЬСЯ"))
                        {
                            norm = norm.Substring(0, norm.Length - 2);
                        }
                        kref.AddSlot(KeywordReferent.ATTR_VALUE, norm, false, 0);
                        List <Pullenti.Semantic.Utils.DerivateGroup> drv = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, t.Morph.Language);
                        _addNormals(kref, drv, norm);
                        kref = ad.RegisterReferent(kref) as KeywordReferent;
                        _setRank(kref, cur, max);
                        Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref), t, t)
                        {
                            Morph = t.Morph
                        };
                        kit.EmbedToken(rt1);
                        t = rt1;
                        continue;
                    }
                    continue;
                }
                if (npt.InternalNoun != null)
                {
                    continue;
                }
                if (npt.EndToken.IsValue("ЦЕЛОМ", null) || npt.EndToken.IsValue("ЧАСТНОСТИ", null))
                {
                    if (npt.Preposition != null)
                    {
                        t = npt.EndToken;
                        continue;
                    }
                }
                if (npt.EndToken.IsValue("СТОРОНЫ", null) && npt.Preposition != null && npt.Preposition.Normal == "С")
                {
                    t = npt.EndToken;
                    continue;
                }
                if (npt.BeginToken == npt.EndToken)
                {
                    Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                    if (mc.IsPreposition)
                    {
                        continue;
                    }
                    else if (mc.IsAdverb)
                    {
                        if (t.IsValue("ПОТОМ", null))
                        {
                            continue;
                        }
                    }
                }
                else
                {
                }
                li.Clear();
                Pullenti.Ner.Token t0 = t;
                for (Pullenti.Ner.Token tt = t; tt != null && tt.EndChar <= npt.EndChar; tt = tt.Next)
                {
                    if (!(tt is Pullenti.Ner.TextToken))
                    {
                        continue;
                    }
                    if (tt.IsValue("NATURAL", null))
                    {
                    }
                    if ((tt.LengthChar < 3) || !tt.Chars.IsLetter)
                    {
                        continue;
                    }
                    Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary();
                    if ((mc.IsPreposition || mc.IsPronoun || mc.IsPersonalPronoun) || mc.IsConjunction)
                    {
                        if (tt.IsValue("ОТНОШЕНИЕ", null))
                        {
                        }
                        else
                        {
                            continue;
                        }
                    }
                    if (mc.IsMisc)
                    {
                        if (Pullenti.Ner.Core.MiscHelper.IsEngArticle(tt))
                        {
                            continue;
                        }
                    }
                    KeywordReferent kref = new KeywordReferent()
                    {
                        Typ = KeywordType.Object
                    };
                    string norm = (tt as Pullenti.Ner.TextToken).Lemma;
                    kref.AddSlot(KeywordReferent.ATTR_VALUE, norm, false, 0);
                    if (norm != "ЕСТЬ")
                    {
                        List <Pullenti.Semantic.Utils.DerivateGroup> drv = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, tt.Morph.Language);
                        _addNormals(kref, drv, norm);
                    }
                    kref = ad.RegisterReferent(kref) as KeywordReferent;
                    _setRank(kref, cur, max);
                    Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kref, tt, tt)
                    {
                        Morph = tt.Morph
                    };
                    kit.EmbedToken(rt1);
                    if (tt == t && li.Count == 0)
                    {
                        t0 = rt1;
                    }
                    t = rt1;
                    li.Add(kref);
                }
                if (li.Count > 1)
                {
                    KeywordReferent kref = new KeywordReferent()
                    {
                        Typ = KeywordType.Object
                    };
                    tmp.Length = 0;
                    tmp2.Clear();
                    bool hasNorm = false;
                    foreach (KeywordReferent kw in li)
                    {
                        string s = kw.GetStringValue(KeywordReferent.ATTR_VALUE);
                        if (tmp.Length > 0)
                        {
                            tmp.Append(' ');
                        }
                        tmp.Append(s);
                        string n = kw.GetStringValue(KeywordReferent.ATTR_NORMAL);
                        if (n != null)
                        {
                            hasNorm = true;
                            tmp2.Add(n);
                        }
                        else
                        {
                            tmp2.Add(s);
                        }
                        kref.AddSlot(KeywordReferent.ATTR_REF, kw, false, 0);
                    }
                    string val = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
                    kref.AddSlot(KeywordReferent.ATTR_VALUE, val, false, 0);
                    tmp.Length = 0;
                    tmp2.Sort();
                    foreach (string s in tmp2)
                    {
                        if (tmp.Length > 0)
                        {
                            tmp.Append(' ');
                        }
                        tmp.Append(s);
                    }
                    string norm = tmp.ToString();
                    if (norm != val)
                    {
                        kref.AddSlot(KeywordReferent.ATTR_NORMAL, norm, false, 0);
                    }
                    kref = ad.RegisterReferent(kref) as KeywordReferent;
                    _setRank(kref, cur, max);
                    Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kref, t0, t)
                    {
                        Morph = npt.Morph
                    };
                    kit.EmbedToken(rt1);
                    t = rt1;
                }
            }
            cur = 0;
            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next, cur++)
            {
                KeywordReferent kw = t.GetReferent() as KeywordReferent;
                if (kw == null || kw.Typ != KeywordType.Object)
                {
                    continue;
                }
                if (t.Next == null || kw.ChildWords > 2)
                {
                    continue;
                }
                Pullenti.Ner.Token t1 = t.Next;
                if (t1.IsValue("OF", null) && (t1.WhitespacesAfterCount < 3) && t1.Next != null)
                {
                    t1 = t1.Next;
                    if ((t1 is Pullenti.Ner.TextToken) && Pullenti.Ner.Core.MiscHelper.IsEngArticle(t1) && t1.Next != null)
                    {
                        t1 = t1.Next;
                    }
                }
                else if (!t1.Morph.Case.IsGenitive || t.WhitespacesAfterCount > 1)
                {
                    continue;
                }
                KeywordReferent kw2 = t1.GetReferent() as KeywordReferent;
                if (kw2 == null)
                {
                    continue;
                }
                if (kw == kw2)
                {
                    continue;
                }
                if (kw2.Typ != KeywordType.Object || (kw.ChildWords + kw2.ChildWords) > 3)
                {
                    continue;
                }
                KeywordReferent kwUn = new KeywordReferent();
                kwUn.Union(kw, kw2, Pullenti.Ner.Core.MiscHelper.GetTextValue(t1, t1, Pullenti.Ner.Core.GetTextAttr.No));
                kwUn = ad.RegisterReferent(kwUn) as KeywordReferent;
                _setRank(kwUn, cur, max);
                Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kwUn, t, t1)
                {
                    Morph = t.Morph
                };
                kit.EmbedToken(rt1);
                t = rt1;
            }
            if (SortKeywordsByRank)
            {
                List <Pullenti.Ner.Referent> all = new List <Pullenti.Ner.Referent>(ad.Referents);
                all.Sort(new CompByRank());
                ad.Referents = all;
            }
            if (AnnotationMaxSentences > 0)
            {
                KeywordReferent ano = Pullenti.Ner.Keyword.Internal.AutoannoSentToken.CreateAnnotation(kit, AnnotationMaxSentences);
                if (ano != null)
                {
                    ad.RegisterReferent(ano);
                }
            }
        }
예제 #13
0
        public List <Pullenti.Ner.ReferentToken> CreateRefenetsTokensWithRegister(Pullenti.Ner.Core.AnalyzerData ad, string name, bool regist = true)
        {
            if (name == "T =")
            {
                name = "ТЕМПЕРАТУРА";
            }
            List <Pullenti.Ner.ReferentToken> res = new List <Pullenti.Ner.ReferentToken>();

            foreach (UnitToken u in Units)
            {
                Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(u.CreateReferentWithRegister(ad), u.BeginToken, u.EndToken);
                res.Add(rt);
            }
            Pullenti.Ner.Measure.MeasureReferent mr = new Pullenti.Ner.Measure.MeasureReferent();
            string templ = "1";

            if (SingleVal != null)
            {
                mr.AddValue(SingleVal.Value);
                if (PlusMinus != null)
                {
                    templ = string.Format("[1 ±2{0}]", (PlusMinusPercent ? "%" : ""));
                    mr.AddValue(PlusMinus.Value);
                }
                else if (About)
                {
                    templ = "~1";
                }
            }
            else
            {
                if (Not && ((FromVal == null || ToVal == null)))
                {
                    bool b = FromInclude;
                    FromInclude = ToInclude;
                    ToInclude   = b;
                    double?v = FromVal;
                    FromVal = ToVal;
                    ToVal   = v;
                }
                int num = 1;
                if (FromVal != null)
                {
                    mr.AddValue(FromVal.Value);
                    templ = (FromInclude ? "[1" : "]1");
                    num++;
                }
                else
                {
                    templ = "]";
                }
                if (ToVal != null)
                {
                    mr.AddValue(ToVal.Value);
                    templ = string.Format("{0} .. {1}{2}", templ, num, (ToInclude ? ']' : '['));
                }
                else
                {
                    templ += " .. [";
                }
            }
            mr.Template = templ;
            foreach (Pullenti.Ner.ReferentToken rt in res)
            {
                mr.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_UNIT, rt.Referent, false, 0);
            }
            if (name != null)
            {
                mr.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_NAME, name, false, 0);
            }
            if (DivNum != null)
            {
                List <Pullenti.Ner.ReferentToken> dn = DivNum.CreateRefenetsTokensWithRegister(ad, null, true);
                res.AddRange(dn);
                mr.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_REF, dn[dn.Count - 1].Referent, false, 0);
            }
            Pullenti.Ner.Measure.MeasureKind ki = UnitToken.CalcKind(Units);
            if (ki != Pullenti.Ner.Measure.MeasureKind.Undefined)
            {
                mr.Kind = ki;
            }
            if (regist && ad != null)
            {
                mr = ad.RegisterReferent(mr) as Pullenti.Ner.Measure.MeasureReferent;
            }
            res.Add(new Pullenti.Ner.ReferentToken(mr, BeginToken, EndToken));
            return(res);
        }