Exemplo n.º 1
0
 public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
 {
     Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
     for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
     {
         Pullenti.Ner.ReferentToken rt = null;
         if (t.Chars.IsLetter)
         {
             Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
             if (tok != null)
             {
                 Pullenti.Ner.Token tt = tok.EndToken.Next;
                 if (tt != null && tt.IsChar(':'))
                 {
                     tt = tt.Next;
                 }
                 rt = this.TryAttach(tt, true);
                 if (rt != null)
                 {
                     rt.BeginToken = t;
                 }
             }
         }
         if (rt == null && (((t is Pullenti.Ner.ReferentToken) || t.IsNewlineBefore)))
         {
             rt = this.TryAttach(t, false);
         }
         if (rt != null)
         {
             rt.Referent = ad.RegisterReferent(rt.Referent);
             kit.EmbedToken(rt);
             t = rt;
         }
     }
 }
Exemplo n.º 2
0
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
            int delta = 100000;
            int parts = (((kit.Sofa.Text.Length + delta) - 1)) / delta;

            if (parts == 0)
            {
                parts = 1;
            }
            int cur     = 0;
            int nextPos = 0;
            List <GoodReferent> goods = new List <GoodReferent>();

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                if (!t.IsNewlineBefore)
                {
                    continue;
                }
                if (t.BeginChar > nextPos)
                {
                    nextPos += delta;
                    cur++;
                    if (!this.OnProgress(cur, parts, kit))
                    {
                        break;
                    }
                }
                if (!t.Chars.IsLetter && t.Next != null)
                {
                    t = t.Next;
                }
                List <Pullenti.Ner.ReferentToken> rts = Pullenti.Ner.Goods.Internal.GoodAttrToken.TryParseList(t);
                if (rts == null || rts.Count == 0)
                {
                    continue;
                }
                GoodReferent good = new GoodReferent();
                foreach (Pullenti.Ner.ReferentToken rt in rts)
                {
                    rt.Referent = ad.RegisterReferent(rt.Referent);
                    if (good.FindSlot(GoodReferent.ATTR_ATTR, rt.Referent, true) == null)
                    {
                        good.AddSlot(GoodReferent.ATTR_ATTR, rt.Referent, false, 0);
                    }
                    kit.EmbedToken(rt);
                }
                goods.Add(good);
                Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(good, rts[0], rts[rts.Count - 1]);
                kit.EmbedToken(rt0);
                t = rt0;
            }
            foreach (GoodReferent g in goods)
            {
                ad.Referents.Add(g);
            }
        }
Exemplo n.º 3
0
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
            Pullenti.Ner.Token             et;
            TitlePageReferent tpr = _process(kit.FirstToken, 0, kit, out et);

            if (tpr != null)
            {
                ad.RegisterReferent(tpr);
            }
        }
Exemplo n.º 4
0
 public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
 {
     Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
     for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
     {
         Pullenti.Ner.ReferentToken mon = TryParse(t);
         if (mon != null)
         {
             mon.Referent = ad.RegisterReferent(mon.Referent);
             kit.EmbedToken(mon);
             t = mon;
             continue;
         }
     }
 }
Exemplo n.º 5
0
        internal void _createRes(Pullenti.Ner.Core.AnalysisKit kit, AnalysisResult ar, ExtOntology extOntology, bool noLog)
        {
            Stopwatch sw           = Stopwatch.StartNew();
            int       ontoAttached = 0;

            for (int k = 0; k < 2; k++)
            {
                foreach (Analyzer c in Analyzers)
                {
                    if (k == 0)
                    {
                        if (!c.IsSpecific)
                        {
                            continue;
                        }
                    }
                    else if (c.IsSpecific)
                    {
                        continue;
                    }
                    Pullenti.Ner.Core.AnalyzerData dat = kit.GetAnalyzerData(c);
                    if (dat != null && dat.Referents.Count > 0)
                    {
                        if (extOntology != null)
                        {
                            foreach (Referent r in dat.Referents)
                            {
                                if (r.OntologyItems == null)
                                {
                                    if ((((r.OntologyItems = extOntology.AttachReferent(r)))) != null)
                                    {
                                        ontoAttached++;
                                    }
                                }
                            }
                        }
                        ar.Entities.AddRange(dat.Referents);
                    }
                }
            }
            sw.Stop();
            if (extOntology != null && !noLog)
            {
                string msg = string.Format("Привязано {0} объектов к внешней отнологии ({1} элементов) за {2}", ontoAttached, extOntology.Items.Count, OutSecs(sw.ElapsedMilliseconds));
                this.OnMessage(msg);
                ar.Log.Add(msg);
            }
        }
Exemplo n.º 6
0
 public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
 {
     Pullenti.Ner.Token t  = kit.FirstToken;
     Pullenti.Ner.Token t1 = t;
     if (t == null)
     {
         return;
     }
     Pullenti.Ner.Instrument.Internal.FragToken dfr = Pullenti.Ner.Instrument.Internal.FragToken.CreateDocument(t, 0, InstrumentKind.Undefined);
     if (dfr == null)
     {
         return;
     }
     Pullenti.Ner.Core.AnalyzerData ad  = kit.GetAnalyzerData(this);
     InstrumentBlockReferent        res = dfr.CreateReferent(ad);
 }
Exemplo n.º 7
0
        public Pullenti.Ner.Measure.UnitReferent CreateReferentWithRegister(Pullenti.Ner.Core.AnalyzerData ad)
        {
            Pullenti.Ner.Measure.UnitReferent ur = ExtOnto;
            if (Unit != null)
            {
                ur = _createReferent(Unit);
            }
            else if (UnknownName != null)
            {
                ur = new Pullenti.Ner.Measure.UnitReferent();
                ur.AddSlot(Pullenti.Ner.Measure.UnitReferent.ATTR_NAME, UnknownName, false, 0);
                ur.IsUnknown = true;
            }
            if (Pow != 1)
            {
                ur.AddSlot(Pullenti.Ner.Measure.UnitReferent.ATTR_POW, Pow.ToString(), false, 0);
            }
            List <Pullenti.Ner.Measure.UnitReferent> owns = new List <Pullenti.Ner.Measure.UnitReferent>();

            owns.Add(ur);
            if (Unit != null)
            {
                for (Unit uu = Unit.BaseUnit; uu != null; uu = uu.BaseUnit)
                {
                    Pullenti.Ner.Measure.UnitReferent ur0 = _createReferent(uu);
                    owns.Add(ur0);
                }
            }
            for (int i = owns.Count - 1; i >= 0; i--)
            {
                if (ad != null)
                {
                    owns[i] = ad.RegisterReferent(owns[i]) as Pullenti.Ner.Measure.UnitReferent;
                }
                if (i > 0)
                {
                    owns[i - 1].AddSlot(Pullenti.Ner.Measure.UnitReferent.ATTR_BASEUNIT, owns[i], false, 0);
                    if ((owns[i - 1].Tag as Unit).BaseMultiplier != 0)
                    {
                        owns[i - 1].AddSlot(Pullenti.Ner.Measure.UnitReferent.ATTR_BASEFACTOR, Pullenti.Ner.Core.NumberHelper.DoubleToString((owns[i - 1].Tag as Unit).BaseMultiplier), false, 0);
                    }
                }
            }
            return(owns[0]);
        }
Exemplo n.º 8
0
 public static void Process(Pullenti.Ner.Core.AnalysisKit kit, Pullenti.Ner.Core.AnalyzerData ad)
 {
     for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
     {
         if (!Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t))
         {
             continue;
         }
         Pullenti.Ner.ReferentToken rt = TryParseThesis(t);
         if (rt == null)
         {
             continue;
         }
         rt.Referent = ad.RegisterReferent(rt.Referent);
         kit.EmbedToken(rt);
         t = rt;
     }
 }
Exemplo n.º 9
0
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
            int delta = 100000;
            int parts = (((kit.Sofa.Text.Length + delta) - 1)) / delta;

            if (parts == 0)
            {
                parts = 1;
            }
            int cur     = 0;
            int nextPos = 0;

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                if (t.BeginChar > nextPos)
                {
                    nextPos += delta;
                    cur++;
                    if (!this.OnProgress(cur, parts, kit))
                    {
                        break;
                    }
                }
                Pullenti.Ner.Goods.Internal.GoodAttrToken at = Pullenti.Ner.Goods.Internal.GoodAttrToken.TryParse(t, null, true, true);
                if (at == null)
                {
                    continue;
                }
                GoodAttributeReferent attr = at._createAttr();
                if (attr == null)
                {
                    t = at.EndToken;
                    continue;
                }
                Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(attr, at.BeginToken, at.EndToken);
                rt.Referent = ad.RegisterReferent(attr);
                kit.EmbedToken(rt);
                t = rt;
            }
        }
Exemplo n.º 10
0
 static Pullenti.Ner.ReferentToken _tryAttachPureTerr(List <TerrItemToken> li, Pullenti.Ner.Core.AnalyzerData ad)
 {
     Pullenti.Ner.Address.Internal.AddressItemToken aid = null;
     Pullenti.Ner.Token t = li[0].EndToken.Next;
     if (t == null)
     {
         return(null);
     }
     Pullenti.Ner.Token tt = t;
     if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(tt, true, false))
     {
         tt = tt.Next;
     }
     if (li.Count > 1)
     {
         List <TerrItemToken> tmp = new List <TerrItemToken>(li);
         tmp.RemoveAt(0);
         Pullenti.Ner.ReferentToken rt0 = TryAttachTerritory(tmp, ad, false, null, null);
         if (rt0 == null && tmp.Count == 2)
         {
             if (((tmp[0].TerminItem == null && tmp[1].TerminItem != null)) || ((tmp[0].TerminItem != null && tmp[1].TerminItem == null)))
             {
                 if (aid == null)
                 {
                     rt0 = TryAttachTerritory(tmp, ad, true, null, null);
                 }
             }
         }
         if (rt0 != null)
         {
             if ((rt0.Referent as Pullenti.Ner.Geo.GeoReferent).IsState)
             {
                 return(null);
             }
             rt0.BeginToken = li[0].BeginToken;
             rt0.Morph      = li[0].Morph;
             return(rt0);
         }
     }
     if (aid == null)
     {
         aid = Pullenti.Ner.Address.Internal.AddressItemToken.TryAttachOrg(tt);
     }
     if (aid != null)
     {
         Pullenti.Ner.ReferentToken rt = aid.CreateGeoOrgTerr();
         if (rt == null)
         {
             return(null);
         }
         rt.BeginToken = li[0].BeginToken;
         Pullenti.Ner.Token t1 = rt.EndToken;
         if (tt != t && Pullenti.Ner.Core.BracketHelper.CanBeEndOfSequence(t1.Next, false, null, false))
         {
             rt.EndToken = (t1 = t1.Next);
         }
         return(rt);
     }
     return(null);
 }
Exemplo n.º 11
0
 public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
 {
     Pullenti.Ner.Core.AnalyzerData     ad       = kit.GetAnalyzerData(this);
     Pullenti.Ner.Core.TerminCollection addunits = null;
     if (kit.Ontology != null)
     {
         addunits = new Pullenti.Ner.Core.TerminCollection();
         foreach (Pullenti.Ner.ExtOntologyItem r in kit.Ontology.Items)
         {
             UnitReferent uu = r.Referent as UnitReferent;
             if (uu == null)
             {
                 continue;
             }
             if (uu.m_Unit != null)
             {
                 continue;
             }
             foreach (Pullenti.Ner.Slot s in uu.Slots)
             {
                 if (s.TypeName == UnitReferent.ATTR_NAME || s.TypeName == UnitReferent.ATTR_FULLNAME)
                 {
                     addunits.Add(new Pullenti.Ner.Core.Termin(s.Value as string)
                     {
                         Tag = uu
                     });
                 }
             }
         }
     }
     for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
     {
         Pullenti.Ner.Measure.Internal.MeasureToken mt = Pullenti.Ner.Measure.Internal.MeasureToken.TryParseMinimal(t, addunits, false);
         if (mt == null)
         {
             mt = Pullenti.Ner.Measure.Internal.MeasureToken.TryParse(t, addunits, true, false, false, false);
         }
         if (mt == null)
         {
             continue;
         }
         List <Pullenti.Ner.ReferentToken> rts = mt.CreateRefenetsTokensWithRegister(ad, true);
         if (rts == null)
         {
             continue;
         }
         for (int i = 0; i < rts.Count; i++)
         {
             Pullenti.Ner.ReferentToken rt = rts[i];
             t.Kit.EmbedToken(rt);
             t = rt;
             for (int j = i + 1; j < rts.Count; j++)
             {
                 if (rts[j].BeginToken == rt.BeginToken)
                 {
                     rts[j].BeginToken = t;
                 }
                 if (rts[j].EndToken == rt.EndToken)
                 {
                     rts[j].EndToken = t;
                 }
             }
         }
     }
     if (kit.Ontology != null)
     {
         foreach (Pullenti.Ner.Referent e in ad.Referents)
         {
             UnitReferent u = e as UnitReferent;
             if (u == null)
             {
                 continue;
             }
             foreach (Pullenti.Ner.ExtOntologyItem r in kit.Ontology.Items)
             {
                 UnitReferent uu = r.Referent as UnitReferent;
                 if (uu == null)
                 {
                     continue;
                 }
                 bool ok = false;
                 foreach (Pullenti.Ner.Slot s in uu.Slots)
                 {
                     if (s.TypeName == UnitReferent.ATTR_NAME || s.TypeName == UnitReferent.ATTR_FULLNAME)
                     {
                         if (u.FindSlot(null, s.Value, true) != null)
                         {
                             ok = true;
                             break;
                         }
                     }
                 }
                 if (ok)
                 {
                     u.OntologyItems = new List <Pullenti.Ner.ExtOntologyItem>();
                     u.OntologyItems.Add(r);
                     break;
                 }
             }
         }
     }
 }
Exemplo n.º 12
0
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            List <Pullenti.Ner.Mail.Internal.MailLine> lines = new List <Pullenti.Ner.Mail.Internal.MailLine>();

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0);
                if (ml == null)
                {
                    continue;
                }
                if (lines.Count == 91)
                {
                }
                lines.Add(ml);
                t = ml.EndToken;
            }
            if (lines.Count == 0)
            {
                return;
            }
            int i;
            List <List <Pullenti.Ner.Mail.Internal.MailLine> > blocks = new List <List <Pullenti.Ner.Mail.Internal.MailLine> >();
            List <Pullenti.Ner.Mail.Internal.MailLine>         blk    = null;

            for (i = 0; i < lines.Count; i++)
            {
                Pullenti.Ner.Mail.Internal.MailLine ml = lines[i];
                if (ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                {
                    bool isNew = ml.MustBeFirstLine || i == 0;
                    if (((i + 2) < lines.Count) && (((lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From || lines[i + 2].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From || lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello) || lines[i + 2].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello)))
                    {
                        isNew = true;
                    }
                    if (!isNew)
                    {
                        for (int j = i - 1; j >= 0; j--)
                        {
                            if (lines[j].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined)
                            {
                                if (lines[j].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards)
                                {
                                    isNew = true;
                                }
                                break;
                            }
                        }
                    }
                    if (!isNew)
                    {
                        for (Pullenti.Ner.Token tt = ml.BeginToken; tt != null && tt.EndChar <= ml.EndChar; tt = tt.Next)
                        {
                            if (tt.GetReferent() != null)
                            {
                                if (tt.GetReferent().TypeName == "DATE" || tt.GetReferent().TypeName == "URI")
                                {
                                    isNew = true;
                                }
                            }
                        }
                    }
                    if (isNew)
                    {
                        blk = new List <Pullenti.Ner.Mail.Internal.MailLine>();
                        blocks.Add(blk);
                        for (; i < lines.Count; i++)
                        {
                            if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                            {
                                if (blk.Count > 0 && lines[i].MustBeFirstLine)
                                {
                                    break;
                                }
                                blk.Add(lines[i]);
                            }
                            else if (((i + 1) < lines.Count) && lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                            {
                                int j;
                                for (j = 0; j < blk.Count; j++)
                                {
                                    if (blk[j].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                                    {
                                        if (blk[j].IsRealFrom || blk[j].MustBeFirstLine || blk[j].MailAddr != null)
                                        {
                                            break;
                                        }
                                    }
                                }
                                if (j >= blk.Count)
                                {
                                    blk.Add(lines[i]);
                                    continue;
                                }
                                bool ok = false;
                                for (j = i + 1; j < lines.Count; j++)
                                {
                                    if (lines[j].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                                    {
                                        break;
                                    }
                                    if (lines[j].IsRealFrom || lines[j].MustBeFirstLine)
                                    {
                                        ok = true;
                                        break;
                                    }
                                    if (lines[j].MailAddr != null)
                                    {
                                        ok = true;
                                        break;
                                    }
                                }
                                if (ok)
                                {
                                    break;
                                }
                                blk.Add(lines[i]);
                            }
                            else
                            {
                                break;
                            }
                        }
                        i--;
                        continue;
                    }
                }
                if (blk == null)
                {
                    blocks.Add((blk = new List <Pullenti.Ner.Mail.Internal.MailLine>()));
                }
                blk.Add(lines[i]);
            }
            if (blocks.Count == 0)
            {
                return;
            }
            Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
            for (int j = 0; j < blocks.Count; j++)
            {
                lines = blocks[j];
                if (lines.Count == 0)
                {
                    continue;
                }
                i = 0;
                if (lines[0].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                {
                    Pullenti.Ner.Token t1 = lines[0].EndToken;
                    for (; i < lines.Count; i++)
                    {
                        if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                        {
                            t1 = lines[i].EndToken;
                        }
                        else if (((i + 1) < lines.Count) && lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                        {
                        }
                        else
                        {
                            break;
                        }
                    }
                    MailReferent mail = new MailReferent()
                    {
                        Kind = MailKind.Head
                    };
                    Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[0].BeginToken, t1);
                    mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister);
                    ad.RegisterReferent(mail);
                    mail.AddOccurenceOfRefTok(mt);
                }
                int i0 = i;
                Pullenti.Ner.Token t2 = null;
                int err = 0;
                for (i = lines.Count - 1; i >= i0; i--)
                {
                    Pullenti.Ner.Mail.Internal.MailLine li = lines[i];
                    if (li.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards)
                    {
                        t2 = lines[i].BeginToken;
                        for (--i; i >= i0; i--)
                        {
                            if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards && (lines[i].Words < 2))
                            {
                                t2 = lines[i].BeginToken;
                            }
                            else if ((i > i0 && (lines[i].Words < 3) && lines[i - 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards) && (lines[i - 1].Words < 2))
                            {
                                i--;
                                t2 = lines[i].BeginToken;
                            }
                            else
                            {
                                break;
                            }
                        }
                        break;
                    }
                    if (li.Refs.Count > 0 && (li.Words < 3) && i > i0)
                    {
                        err = 0;
                        t2  = li.BeginToken;
                        continue;
                    }
                    if (li.Words > 10)
                    {
                        t2 = null;
                        continue;
                    }
                    if (li.Words > 2)
                    {
                        if ((++err) > 2)
                        {
                            t2 = null;
                        }
                    }
                }
                if (t2 == null)
                {
                    for (i = lines.Count - 1; i >= i0; i--)
                    {
                        Pullenti.Ner.Mail.Internal.MailLine li = lines[i];
                        if (li.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined)
                        {
                            if (li.Refs.Count > 0 && (li.Refs[0] is Pullenti.Ner.Person.PersonReferent))
                            {
                                if (li.Words == 0 && i > i0)
                                {
                                    t2 = li.BeginToken;
                                    break;
                                }
                            }
                        }
                    }
                }
                for (int ii = i0; ii < lines.Count; ii++)
                {
                    if (lines[ii].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello)
                    {
                        MailReferent mail = new MailReferent()
                        {
                            Kind = MailKind.Hello
                        };
                        Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[i0].BeginToken, lines[ii].EndToken);
                        if (mt.LengthChar > 0)
                        {
                            mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister);
                            ad.RegisterReferent(mail);
                            mail.AddOccurenceOfRefTok(mt);
                            i0 = ii + 1;
                        }
                        break;
                    }
                    else if (lines[ii].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined || lines[ii].Words > 0 || lines[ii].Refs.Count > 0)
                    {
                        break;
                    }
                }
                if (i0 < lines.Count)
                {
                    if (t2 != null && t2.Previous == null)
                    {
                    }
                    else
                    {
                        MailReferent mail = new MailReferent()
                        {
                            Kind = MailKind.Body
                        };
                        Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[i0].BeginToken, (t2 != null && t2.Previous != null ? t2.Previous : lines[lines.Count - 1].EndToken));
                        if (mt.LengthChar > 0)
                        {
                            mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister);
                            ad.RegisterReferent(mail);
                            mail.AddOccurenceOfRefTok(mt);
                        }
                    }
                    if (t2 != null)
                    {
                        MailReferent mail = new MailReferent()
                        {
                            Kind = MailKind.Tail
                        };
                        Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, t2, lines[lines.Count - 1].EndToken);
                        if (mt.LengthChar > 0)
                        {
                            mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister);
                            ad.RegisterReferent(mail);
                            mail.AddOccurenceOfRefTok(mt);
                        }
                        for (i = i0; i < lines.Count; i++)
                        {
                            if (lines[i].BeginChar >= t2.BeginChar)
                            {
                                foreach (Pullenti.Ner.Referent r in lines[i].Refs)
                                {
                                    mail.AddRef(r, 0);
                                }
                            }
                        }
                    }
                }
            }
        }
Exemplo n.º 13
0
        void _process2(Pullenti.Ner.Core.AnalysisKit kit, AnalysisResult ar, bool noLog)
        {
            string          msg;
            Stopwatch       sw            = Stopwatch.StartNew();
            bool            stopByTimeout = false;
            List <Analyzer> anals         = new List <Analyzer>(m_Analyzers);

            for (int ii = 0; ii < anals.Count; ii++)
            {
                Analyzer c = anals[ii];
                if (c.IgnoreThisAnalyzer)
                {
                    continue;
                }
                if (m_Breaked)
                {
                    if (!noLog)
                    {
                        msg = "Процесс прерван пользователем";
                        this.OnMessage(msg);
                        ar.Log.Add(msg);
                    }
                    break;
                }
                if (TimeoutSeconds > 0 && !stopByTimeout)
                {
                    if (((DateTime.Now - kit.StartDate)).TotalSeconds > TimeoutSeconds)
                    {
                        m_Breaked = true;
                        if (!noLog)
                        {
                            msg = "Процесс прерван по таймауту";
                            this.OnMessage(msg);
                            ar.Log.Add(msg);
                        }
                        stopByTimeout = true;
                    }
                }
                if (stopByTimeout)
                {
                    if (c.Name == "INSTRUMENT")
                    {
                    }
                    else
                    {
                        continue;
                    }
                }
                if (!noLog)
                {
                    this.OnProgressHandler(c, new ProgressChangedEventArgs(0, string.Format("Работа \"{0}\"", c.Caption)));
                }
                try
                {
                    sw.Reset();
                    sw.Start();
                    c.Process(kit);
                    sw.Stop();
                    Pullenti.Ner.Core.AnalyzerData dat = kit.GetAnalyzerData(c);
                    if (!noLog)
                    {
                        msg = string.Format("Анализатор \"{0}\" выделил {1} объект(ов) за {2}", c.Caption, (dat == null ? 0 : dat.Referents.Count), OutSecs(sw.ElapsedMilliseconds));
                        this.OnMessage(msg);
                        ar.Log.Add(msg);
                    }
                }
                catch (Exception ex)
                {
                    if (!noLog)
                    {
                        ex = new Exception(string.Format("Ошибка в анализаторе \"{0}\" ({1})", c.Caption, ex.Message), ex);
                        this.OnMessage(ex);
                        ar.AddException(ex);
                    }
                }
            }
            if (!noLog)
            {
                this.OnProgressHandler(null, new ProgressChangedEventArgs(0, "Пересчёт отношений обобщения"));
            }
            try
            {
                sw.Reset();
                sw.Start();
                Pullenti.Ner.Core.Internal.GeneralRelationHelper.RefreshGenerals(this, kit);
                sw.Stop();
                if (!noLog)
                {
                    msg = string.Format("Отношение обобщение пересчитано за {0}", OutSecs(sw.ElapsedMilliseconds));
                    this.OnMessage(msg);
                    ar.Log.Add(msg);
                }
            }
            catch (Exception ex)
            {
                if (!noLog)
                {
                    ex = new Exception("Ошибка пересчёта отношения обобщения", ex);
                    this.OnMessage(ex);
                    ar.AddException(ex);
                }
            }
        }
Exemplo n.º 14
0
        public List <Pullenti.Ner.ReferentToken> CreateRefenetsTokensWithRegister(Pullenti.Ner.Core.AnalyzerData ad, bool register = true)
        {
            if (Internals.Count == 0 && !Reliable)
            {
                if (Nums.Units.Count == 1 && Nums.Units[0].IsDoubt)
                {
                    if (Nums.Units[0].UnknownName != null)
                    {
                    }
                    else if (Nums.IsNewlineBefore)
                    {
                    }
                    else if (Nums.Units[0].BeginToken.LengthChar > 1 && Nums.Units[0].BeginToken.GetMorphClassInDictionary().IsUndefined)
                    {
                    }
                    else if (Nums.FromVal == null || Nums.ToVal == null)
                    {
                        return(null);
                    }
                }
            }
            List <Pullenti.Ner.ReferentToken> res = new List <Pullenti.Ner.ReferentToken>();

            if (((Nums == null || Nums.PlusMinusPercent)) && Internals.Count > 0)
            {
                List <Pullenti.Ner.ReferentToken> liEx = null;
                if (InternalEx != null)
                {
                    liEx = InternalEx.CreateRefenetsTokensWithRegister(ad, true);
                    if (liEx != null)
                    {
                        res.AddRange(liEx);
                    }
                }
                Pullenti.Ner.Measure.MeasureReferent mr = new Pullenti.Ner.Measure.MeasureReferent();
                string templ0 = "1";
                string templ  = null;
                if (Name != null)
                {
                    mr.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_NAME, Name, false, 0);
                }
                List <Pullenti.Ner.Measure.MeasureReferent> ints = new List <Pullenti.Ner.Measure.MeasureReferent>();
                for (int k = 0; k < Internals.Count; k++)
                {
                    MeasureToken ii = Internals[k];
                    ii.Reliable = true;
                    List <Pullenti.Ner.ReferentToken> li = ii.CreateRefenetsTokensWithRegister(ad, false);
                    if (li == null)
                    {
                        continue;
                    }
                    res.AddRange(li);
                    Pullenti.Ner.Measure.MeasureReferent mr0 = res[res.Count - 1].Referent as Pullenti.Ner.Measure.MeasureReferent;
                    if (liEx != null)
                    {
                        mr0.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_REF, liEx[liEx.Count - 1], false, 0);
                    }
                    if (k == 0 && !IsEmpty)
                    {
                        templ0       = mr0.Template;
                        mr0.Template = "1";
                    }
                    if (ad != null)
                    {
                        mr0 = ad.RegisterReferent(mr0) as Pullenti.Ner.Measure.MeasureReferent;
                    }
                    mr.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_VALUE, mr0, false, 0);
                    ints.Add(mr0);
                    if (templ == null)
                    {
                        templ = "1";
                    }
                    else
                    {
                        int nu = mr.GetStringValues(Pullenti.Ner.Measure.MeasureReferent.ATTR_VALUE).Count;
                        templ = string.Format("{0}{1}{2}", templ, (IsSet ? ", " : " × "), nu);
                    }
                }
                if (IsSet)
                {
                    templ = "{" + templ + "}";
                }
                if (templ0 != "1")
                {
                    templ = templ0.Replace("1", templ);
                }
                if (Nums != null && Nums.PlusMinusPercent && Nums.SingleVal != null)
                {
                    templ = string.Format("[{0} ±{1}%]", templ, Internals.Count + 1);
                    mr.AddValue(Nums.SingleVal.Value);
                }
                mr.Template = templ;
                int  i;
                bool hasLength = false;
                Pullenti.Ner.Measure.UnitReferent uref = null;
                for (i = 0; i < ints.Count; i++)
                {
                    if (ints[i].Kind == Pullenti.Ner.Measure.MeasureKind.Length)
                    {
                        hasLength = true;
                        uref      = ints[i].GetSlotValue(Pullenti.Ner.Measure.MeasureReferent.ATTR_UNIT) as Pullenti.Ner.Measure.UnitReferent;
                    }
                    else if (ints[i].Units.Count > 0)
                    {
                        break;
                    }
                }
                if (ints.Count > 1 && hasLength && uref != null)
                {
                    foreach (Pullenti.Ner.Measure.MeasureReferent ii in ints)
                    {
                        if (ii.FindSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_UNIT, null, true) == null)
                        {
                            ii.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_UNIT, uref, false, 0);
                            ii.Kind = Pullenti.Ner.Measure.MeasureKind.Length;
                        }
                    }
                }
                if (ints.Count == 3)
                {
                    if (ints[0].Kind == Pullenti.Ner.Measure.MeasureKind.Length && ints[1].Kind == Pullenti.Ner.Measure.MeasureKind.Length && ints[2].Kind == Pullenti.Ner.Measure.MeasureKind.Length)
                    {
                        mr.Kind = Pullenti.Ner.Measure.MeasureKind.Volume;
                    }
                    else if (ints[0].Units.Count == 0 && ints[1].Units.Count == 0 && ints[2].Units.Count == 0)
                    {
                        string nam = mr.GetStringValue(Pullenti.Ner.Measure.MeasureReferent.ATTR_NAME);
                        if (nam != null)
                        {
                            if (nam.Contains("РАЗМЕР") || nam.Contains("ГАБАРИТ"))
                            {
                                mr.Kind = Pullenti.Ner.Measure.MeasureKind.Volume;
                            }
                        }
                    }
                }
                if (ints.Count == 2)
                {
                    if (ints[0].Kind == Pullenti.Ner.Measure.MeasureKind.Length && ints[1].Kind == Pullenti.Ner.Measure.MeasureKind.Length)
                    {
                        mr.Kind = Pullenti.Ner.Measure.MeasureKind.Area;
                    }
                }
                if (!IsEmpty)
                {
                    if (ad != null)
                    {
                        mr = ad.RegisterReferent(mr) as Pullenti.Ner.Measure.MeasureReferent;
                    }
                    res.Add(new Pullenti.Ner.ReferentToken(mr, BeginToken, EndToken));
                }
                return(res);
            }
            List <Pullenti.Ner.ReferentToken> re2 = Nums.CreateRefenetsTokensWithRegister(ad, Name, register);

            foreach (MeasureToken ii in Internals)
            {
                List <Pullenti.Ner.ReferentToken> li = ii.CreateRefenetsTokensWithRegister(ad, true);
                if (li == null)
                {
                    continue;
                }
                res.AddRange(li);
                re2[re2.Count - 1].Referent.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_REF, res[res.Count - 1].Referent, false, 0);
            }
            re2[re2.Count - 1].BeginToken = BeginToken;
            re2[re2.Count - 1].EndToken   = EndToken;
            res.AddRange(re2);
            return(res);
        }
Exemplo n.º 15
0
        static Pullenti.Ner.ReferentToken _tryAttachMoscowAO(List <TerrItemToken> li, Pullenti.Ner.Core.AnalyzerData ad)
        {
            if (li[0].TerminItem == null || !li[0].TerminItem.IsMoscowRegion)
            {
                return(null);
            }
            if (li[0].IsDoubt)
            {
                bool ok = false;
                if (CityAttachHelper.CheckCityAfter(li[0].EndToken.Next))
                {
                    ok = true;
                }
                else
                {
                    List <Pullenti.Ner.Address.Internal.AddressItemToken> ali = Pullenti.Ner.Address.Internal.AddressItemToken.TryParseList(li[0].EndToken.Next, null, 2);
                    if (ali != null && ali.Count > 0 && ali[0].Typ == Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street)
                    {
                        ok = true;
                    }
                }
                if (!ok)
                {
                    return(null);
                }
            }
            Pullenti.Ner.Geo.GeoReferent reg = new Pullenti.Ner.Geo.GeoReferent();
            string typ = "АДМИНИСТРАТИВНЫЙ ОКРУГ";

            reg.AddTyp(typ);
            string name = li[0].TerminItem.CanonicText;

            if (Pullenti.Morph.LanguageHelper.EndsWith(name, typ))
            {
                name = name.Substring(0, name.Length - typ.Length - 1).Trim();
            }
            reg.AddName(name);
            return(new Pullenti.Ner.ReferentToken(reg, li[0].BeginToken, li[0].EndToken));
        }
Exemplo n.º 16
0
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData     ad     = kit.GetAnalyzerData(this);
            Pullenti.Ner.Core.TerminCollection models = new Pullenti.Ner.Core.TerminCollection();
            Dictionary <string, List <Pullenti.Ner.Referent> > objsByModel = new Dictionary <string, List <Pullenti.Ner.Referent> >();

            Pullenti.Ner.Core.TerminCollection objByNames = new Pullenti.Ner.Core.TerminCollection();
            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                List <Pullenti.Ner.Weapon.Internal.WeaponItemToken> its = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParseList(t, 10);
                if (its == null)
                {
                    continue;
                }
                List <Pullenti.Ner.ReferentToken> rts = this.TryAttach(its, false);
                if (rts != null)
                {
                    foreach (Pullenti.Ner.ReferentToken rt in rts)
                    {
                        rt.Referent = ad.RegisterReferent(rt.Referent);
                        kit.EmbedToken(rt);
                        t = rt;
                        foreach (Pullenti.Ner.Slot s in rt.Referent.Slots)
                        {
                            if (s.TypeName == WeaponReferent.ATTR_MODEL)
                            {
                                string mod = s.Value.ToString();
                                for (int k = 0; k < 2; k++)
                                {
                                    if (!char.IsDigit(mod[0]))
                                    {
                                        List <Pullenti.Ner.Referent> li;
                                        if (!objsByModel.TryGetValue(mod, out li))
                                        {
                                            objsByModel.Add(mod, (li = new List <Pullenti.Ner.Referent>()));
                                        }
                                        if (!li.Contains(rt.Referent))
                                        {
                                            li.Add(rt.Referent);
                                        }
                                        models.AddString(mod, li, null, false);
                                    }
                                    if (k > 0)
                                    {
                                        break;
                                    }
                                    string brand = rt.Referent.GetStringValue(WeaponReferent.ATTR_BRAND);
                                    if (brand == null)
                                    {
                                        break;
                                    }
                                    mod = string.Format("{0} {1}", brand, mod);
                                }
                            }
                            else if (s.TypeName == WeaponReferent.ATTR_NAME)
                            {
                                objByNames.Add(new Pullenti.Ner.Core.Termin(s.Value.ToString())
                                {
                                    Tag = rt.Referent
                                });
                            }
                        }
                    }
                }
            }
            if (objsByModel.Count == 0 && objByNames.Termins.Count == 0)
            {
                return;
            }
            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 10);
                if (br != null)
                {
                    Pullenti.Ner.Core.TerminToken toks = objByNames.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                    if (toks != null && toks.EndToken.Next == br.EndToken)
                    {
                        Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(toks.Termin.Tag as Pullenti.Ner.Referent, br.BeginToken, br.EndToken);
                        kit.EmbedToken(rt0);
                        t = rt0;
                        continue;
                    }
                }
                if (!(t is Pullenti.Ner.TextToken))
                {
                    continue;
                }
                if (!t.Chars.IsLetter)
                {
                    continue;
                }
                Pullenti.Ner.Core.TerminToken tok = models.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok == null)
                {
                    if (!t.Chars.IsAllLower)
                    {
                        tok = objByNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                    }
                    if (tok == null)
                    {
                        continue;
                    }
                }
                if (!tok.IsWhitespaceAfter)
                {
                    if (tok.EndToken.Next == null || !tok.EndToken.Next.IsCharOf(",.)"))
                    {
                        if (!Pullenti.Ner.Core.BracketHelper.IsBracket(tok.EndToken.Next, false))
                        {
                            continue;
                        }
                    }
                }
                Pullenti.Ner.Referent        tr = null;
                List <Pullenti.Ner.Referent> li = tok.Termin.Tag as List <Pullenti.Ner.Referent>;
                if (li != null && li.Count == 1)
                {
                    tr = li[0];
                }
                else
                {
                    tr = tok.Termin.Tag as Pullenti.Ner.Referent;
                }
                if (tr != null)
                {
                    Pullenti.Ner.Weapon.Internal.WeaponItemToken tit = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParse(tok.BeginToken.Previous, null, false, true);
                    if (tit != null && tit.Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Brand)
                    {
                        tr.AddSlot(WeaponReferent.ATTR_BRAND, tit.Value, false, 0);
                        tok.BeginToken = tit.BeginToken;
                    }
                    Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(tr, tok.BeginToken, tok.EndToken);
                    kit.EmbedToken(rt0);
                    t = rt0;
                    continue;
                }
            }
        }
Exemplo n.º 17
0
        public static void RefreshGenerals(Pullenti.Ner.Processor proc, Pullenti.Ner.Core.AnalysisKit kit)
        {
            Dictionary <string, Dictionary <string, List <Pullenti.Ner.Referent> > > all = new Dictionary <string, Dictionary <string, List <Pullenti.Ner.Referent> > >();
            List <Node> allRefs = new List <Node>();

            foreach (Pullenti.Ner.Analyzer a in proc.Analyzers)
            {
                Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(a);
                if (ad == null)
                {
                    continue;
                }
                foreach (Pullenti.Ner.Referent r in ad.Referents)
                {
                    Node nod = new Node()
                    {
                        Ref = r, Ad = ad
                    };
                    allRefs.Add(nod);
                    r.Tag = nod;
                    Dictionary <string, List <Pullenti.Ner.Referent> > si;
                    if (!all.TryGetValue(a.Name, out si))
                    {
                        all.Add(a.Name, (si = new Dictionary <string, List <Pullenti.Ner.Referent> >()));
                    }
                    List <string> strs = r.GetCompareStrings();
                    if (strs == null || strs.Count == 0)
                    {
                        continue;
                    }
                    foreach (string s in strs)
                    {
                        if (s == null)
                        {
                            continue;
                        }
                        List <Pullenti.Ner.Referent> li;
                        if (!si.TryGetValue(s, out li))
                        {
                            si.Add(s, (li = new List <Pullenti.Ner.Referent>()));
                        }
                        li.Add(r);
                    }
                }
            }
            foreach (Node r in allRefs)
            {
                foreach (Pullenti.Ner.Slot s in r.Ref.Slots)
                {
                    if (s.Value is Pullenti.Ner.Referent)
                    {
                        Pullenti.Ner.Referent to = s.Value as Pullenti.Ner.Referent;
                        Node tn = to.Tag as Node;
                        if (tn == null)
                        {
                            continue;
                        }
                        if (tn.RefsFrom == null)
                        {
                            tn.RefsFrom = new List <Node>();
                        }
                        tn.RefsFrom.Add(r);
                        if (r.RefsTo == null)
                        {
                            r.RefsTo = new List <Node>();
                        }
                        r.RefsTo.Add(tn);
                    }
                }
            }
            foreach (Dictionary <string, List <Pullenti.Ner.Referent> > ty in all.Values)
            {
                foreach (List <Pullenti.Ner.Referent> li in ty.Values)
                {
                    if (li.Count < 2)
                    {
                        continue;
                    }
                    if (li.Count > 3000)
                    {
                        continue;
                    }
                    for (int i = 0; i < li.Count; i++)
                    {
                        for (int j = i + 1; j < li.Count; j++)
                        {
                            Node n1 = null;
                            Node n2 = null;
                            if (li[i].CanBeGeneralFor(li[j]) && !li[j].CanBeGeneralFor(li[i]))
                            {
                                n1 = li[i].Tag as Node;
                                n2 = li[j].Tag as Node;
                            }
                            else if (li[j].CanBeGeneralFor(li[i]) && !li[i].CanBeGeneralFor(li[j]))
                            {
                                n1 = li[j].Tag as Node;
                                n2 = li[i].Tag as Node;
                            }
                            if (n1 != null && n2 != null)
                            {
                                if (n1.GenFrom == null)
                                {
                                    n1.GenFrom = new List <Node>();
                                }
                                if (!n1.GenFrom.Contains(n2))
                                {
                                    n1.GenFrom.Add(n2);
                                }
                                if (n2.GenTo == null)
                                {
                                    n2.GenTo = new List <Node>();
                                }
                                if (!n2.GenTo.Contains(n1))
                                {
                                    n2.GenTo.Add(n1);
                                }
                            }
                        }
                    }
                }
            }
            foreach (Node n in allRefs)
            {
                if (n.GenTo != null && n.GenTo.Count > 1)
                {
                    for (int i = n.GenTo.Count - 1; i >= 0; i--)
                    {
                        Node p   = n.GenTo[i];
                        bool del = false;
                        for (int j = 0; j < n.GenTo.Count; j++)
                        {
                            if (j != i && n.GenTo[j].IsInGenParentsOrHigher(p))
                            {
                                del = true;
                            }
                        }
                        if (del)
                        {
                            p.GenFrom.Remove(n);
                            n.GenTo.RemoveAt(i);
                        }
                    }
                }
            }
            foreach (Node n in allRefs)
            {
                if (!n.Deleted && n.GenTo != null && n.GenTo.Count == 1)
                {
                    Node p = n.GenTo[0];
                    if (p.GenFrom.Count == 1)
                    {
                        n.Ref.MergeSlots(p.Ref, true);
                        p.Ref.Tag = n.Ref;
                        p.ReplaceValues(n);
                        foreach (Pullenti.Ner.TextAnnotation o in p.Ref.Occurrence)
                        {
                            n.Ref.AddOccurence(o);
                        }
                        p.Deleted = true;
                    }
                    else
                    {
                        n.Ref.GeneralReferent = p.Ref;
                    }
                }
            }
            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                _correctReferents(t);
            }
            foreach (Node n in allRefs)
            {
                if (n.Deleted)
                {
                    n.Ad.RemoveReferent(n.Ref);
                }
                n.Ref.Tag = null;
            }
        }
Exemplo n.º 18
0
        public List <Pullenti.Ner.ReferentToken> CreateRefenetsTokensWithRegister(Pullenti.Ner.Core.AnalyzerData ad, string name, bool regist = true)
        {
            if (name == "T =")
            {
                name = "ТЕМПЕРАТУРА";
            }
            List <Pullenti.Ner.ReferentToken> res = new List <Pullenti.Ner.ReferentToken>();

            foreach (UnitToken u in Units)
            {
                Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(u.CreateReferentWithRegister(ad), u.BeginToken, u.EndToken);
                res.Add(rt);
            }
            Pullenti.Ner.Measure.MeasureReferent mr = new Pullenti.Ner.Measure.MeasureReferent();
            string templ = "1";

            if (SingleVal != null)
            {
                mr.AddValue(SingleVal.Value);
                if (PlusMinus != null)
                {
                    templ = string.Format("[1 ±2{0}]", (PlusMinusPercent ? "%" : ""));
                    mr.AddValue(PlusMinus.Value);
                }
                else if (About)
                {
                    templ = "~1";
                }
            }
            else
            {
                if (Not && ((FromVal == null || ToVal == null)))
                {
                    bool b = FromInclude;
                    FromInclude = ToInclude;
                    ToInclude   = b;
                    double?v = FromVal;
                    FromVal = ToVal;
                    ToVal   = v;
                }
                int num = 1;
                if (FromVal != null)
                {
                    mr.AddValue(FromVal.Value);
                    templ = (FromInclude ? "[1" : "]1");
                    num++;
                }
                else
                {
                    templ = "]";
                }
                if (ToVal != null)
                {
                    mr.AddValue(ToVal.Value);
                    templ = string.Format("{0} .. {1}{2}", templ, num, (ToInclude ? ']' : '['));
                }
                else
                {
                    templ += " .. [";
                }
            }
            mr.Template = templ;
            foreach (Pullenti.Ner.ReferentToken rt in res)
            {
                mr.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_UNIT, rt.Referent, false, 0);
            }
            if (name != null)
            {
                mr.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_NAME, name, false, 0);
            }
            if (DivNum != null)
            {
                List <Pullenti.Ner.ReferentToken> dn = DivNum.CreateRefenetsTokensWithRegister(ad, null, true);
                res.AddRange(dn);
                mr.AddSlot(Pullenti.Ner.Measure.MeasureReferent.ATTR_REF, dn[dn.Count - 1].Referent, false, 0);
            }
            Pullenti.Ner.Measure.MeasureKind ki = UnitToken.CalcKind(Units);
            if (ki != Pullenti.Ner.Measure.MeasureKind.Undefined)
            {
                mr.Kind = ki;
            }
            if (regist && ad != null)
            {
                mr = ad.RegisterReferent(mr) as Pullenti.Ner.Measure.MeasureReferent;
            }
            res.Add(new Pullenti.Ner.ReferentToken(mr, BeginToken, EndToken));
            return(res);
        }
Exemplo n.º 19
0
        // Основная функция выделения телефонов
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
            bool hasDenoms = false;

            foreach (Pullenti.Ner.Analyzer a in kit.Processor.Analyzers)
            {
                if ((a is Pullenti.Ner.Denomination.DenominationAnalyzer) && !a.IgnoreThisAnalyzer)
                {
                    hasDenoms = true;
                }
            }
            if (!hasDenoms)
            {
                Pullenti.Ner.Denomination.DenominationAnalyzer a = new Pullenti.Ner.Denomination.DenominationAnalyzer();
                a.Process(kit);
            }
            List <KeywordReferent> li   = new List <KeywordReferent>();
            StringBuilder          tmp  = new StringBuilder();
            List <string>          tmp2 = new List <string>();
            int max = 0;

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                max++;
            }
            int cur = 0;

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next, cur++)
            {
                Pullenti.Ner.Referent r = t.GetReferent();
                if (r != null)
                {
                    t = this._addReferents(ad, t, cur, max);
                    continue;
                }
                if (!(t is Pullenti.Ner.TextToken))
                {
                    continue;
                }
                if (!t.Chars.IsLetter || (t.LengthChar < 3))
                {
                    continue;
                }
                string term = (t as Pullenti.Ner.TextToken).Term;
                if (term == "ЕСТЬ")
                {
                    if ((t.Previous is Pullenti.Ner.TextToken) && t.Previous.Morph.Class.IsVerb)
                    {
                    }
                    else
                    {
                        continue;
                    }
                }
                Pullenti.Ner.Core.NounPhraseToken npt = null;
                npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.AdjectiveCanBeLast | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null);
                if (npt == null)
                {
                    Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                    if (mc.IsVerb && !mc.IsPreposition)
                    {
                        if ((t as Pullenti.Ner.TextToken).IsVerbBe)
                        {
                            continue;
                        }
                        if (t.IsValue("МОЧЬ", null) || t.IsValue("WOULD", null))
                        {
                            continue;
                        }
                        KeywordReferent kref = new KeywordReferent()
                        {
                            Typ = KeywordType.Predicate
                        };
                        string norm = t.GetNormalCaseText(Pullenti.Morph.MorphClass.Verb, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
                        if (norm == null)
                        {
                            norm = (t as Pullenti.Ner.TextToken).Lemma;
                        }
                        if (norm.EndsWith("ЬСЯ"))
                        {
                            norm = norm.Substring(0, norm.Length - 2);
                        }
                        kref.AddSlot(KeywordReferent.ATTR_VALUE, norm, false, 0);
                        List <Pullenti.Semantic.Utils.DerivateGroup> drv = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, t.Morph.Language);
                        _addNormals(kref, drv, norm);
                        kref = ad.RegisterReferent(kref) as KeywordReferent;
                        _setRank(kref, cur, max);
                        Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref), t, t)
                        {
                            Morph = t.Morph
                        };
                        kit.EmbedToken(rt1);
                        t = rt1;
                        continue;
                    }
                    continue;
                }
                if (npt.InternalNoun != null)
                {
                    continue;
                }
                if (npt.EndToken.IsValue("ЦЕЛОМ", null) || npt.EndToken.IsValue("ЧАСТНОСТИ", null))
                {
                    if (npt.Preposition != null)
                    {
                        t = npt.EndToken;
                        continue;
                    }
                }
                if (npt.EndToken.IsValue("СТОРОНЫ", null) && npt.Preposition != null && npt.Preposition.Normal == "С")
                {
                    t = npt.EndToken;
                    continue;
                }
                if (npt.BeginToken == npt.EndToken)
                {
                    Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                    if (mc.IsPreposition)
                    {
                        continue;
                    }
                    else if (mc.IsAdverb)
                    {
                        if (t.IsValue("ПОТОМ", null))
                        {
                            continue;
                        }
                    }
                }
                else
                {
                }
                li.Clear();
                Pullenti.Ner.Token t0 = t;
                for (Pullenti.Ner.Token tt = t; tt != null && tt.EndChar <= npt.EndChar; tt = tt.Next)
                {
                    if (!(tt is Pullenti.Ner.TextToken))
                    {
                        continue;
                    }
                    if (tt.IsValue("NATURAL", null))
                    {
                    }
                    if ((tt.LengthChar < 3) || !tt.Chars.IsLetter)
                    {
                        continue;
                    }
                    Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary();
                    if ((mc.IsPreposition || mc.IsPronoun || mc.IsPersonalPronoun) || mc.IsConjunction)
                    {
                        if (tt.IsValue("ОТНОШЕНИЕ", null))
                        {
                        }
                        else
                        {
                            continue;
                        }
                    }
                    if (mc.IsMisc)
                    {
                        if (Pullenti.Ner.Core.MiscHelper.IsEngArticle(tt))
                        {
                            continue;
                        }
                    }
                    KeywordReferent kref = new KeywordReferent()
                    {
                        Typ = KeywordType.Object
                    };
                    string norm = (tt as Pullenti.Ner.TextToken).Lemma;
                    kref.AddSlot(KeywordReferent.ATTR_VALUE, norm, false, 0);
                    if (norm != "ЕСТЬ")
                    {
                        List <Pullenti.Semantic.Utils.DerivateGroup> drv = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, tt.Morph.Language);
                        _addNormals(kref, drv, norm);
                    }
                    kref = ad.RegisterReferent(kref) as KeywordReferent;
                    _setRank(kref, cur, max);
                    Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kref, tt, tt)
                    {
                        Morph = tt.Morph
                    };
                    kit.EmbedToken(rt1);
                    if (tt == t && li.Count == 0)
                    {
                        t0 = rt1;
                    }
                    t = rt1;
                    li.Add(kref);
                }
                if (li.Count > 1)
                {
                    KeywordReferent kref = new KeywordReferent()
                    {
                        Typ = KeywordType.Object
                    };
                    tmp.Length = 0;
                    tmp2.Clear();
                    bool hasNorm = false;
                    foreach (KeywordReferent kw in li)
                    {
                        string s = kw.GetStringValue(KeywordReferent.ATTR_VALUE);
                        if (tmp.Length > 0)
                        {
                            tmp.Append(' ');
                        }
                        tmp.Append(s);
                        string n = kw.GetStringValue(KeywordReferent.ATTR_NORMAL);
                        if (n != null)
                        {
                            hasNorm = true;
                            tmp2.Add(n);
                        }
                        else
                        {
                            tmp2.Add(s);
                        }
                        kref.AddSlot(KeywordReferent.ATTR_REF, kw, false, 0);
                    }
                    string val = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
                    kref.AddSlot(KeywordReferent.ATTR_VALUE, val, false, 0);
                    tmp.Length = 0;
                    tmp2.Sort();
                    foreach (string s in tmp2)
                    {
                        if (tmp.Length > 0)
                        {
                            tmp.Append(' ');
                        }
                        tmp.Append(s);
                    }
                    string norm = tmp.ToString();
                    if (norm != val)
                    {
                        kref.AddSlot(KeywordReferent.ATTR_NORMAL, norm, false, 0);
                    }
                    kref = ad.RegisterReferent(kref) as KeywordReferent;
                    _setRank(kref, cur, max);
                    Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kref, t0, t)
                    {
                        Morph = npt.Morph
                    };
                    kit.EmbedToken(rt1);
                    t = rt1;
                }
            }
            cur = 0;
            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next, cur++)
            {
                KeywordReferent kw = t.GetReferent() as KeywordReferent;
                if (kw == null || kw.Typ != KeywordType.Object)
                {
                    continue;
                }
                if (t.Next == null || kw.ChildWords > 2)
                {
                    continue;
                }
                Pullenti.Ner.Token t1 = t.Next;
                if (t1.IsValue("OF", null) && (t1.WhitespacesAfterCount < 3) && t1.Next != null)
                {
                    t1 = t1.Next;
                    if ((t1 is Pullenti.Ner.TextToken) && Pullenti.Ner.Core.MiscHelper.IsEngArticle(t1) && t1.Next != null)
                    {
                        t1 = t1.Next;
                    }
                }
                else if (!t1.Morph.Case.IsGenitive || t.WhitespacesAfterCount > 1)
                {
                    continue;
                }
                KeywordReferent kw2 = t1.GetReferent() as KeywordReferent;
                if (kw2 == null)
                {
                    continue;
                }
                if (kw == kw2)
                {
                    continue;
                }
                if (kw2.Typ != KeywordType.Object || (kw.ChildWords + kw2.ChildWords) > 3)
                {
                    continue;
                }
                KeywordReferent kwUn = new KeywordReferent();
                kwUn.Union(kw, kw2, Pullenti.Ner.Core.MiscHelper.GetTextValue(t1, t1, Pullenti.Ner.Core.GetTextAttr.No));
                kwUn = ad.RegisterReferent(kwUn) as KeywordReferent;
                _setRank(kwUn, cur, max);
                Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kwUn, t, t1)
                {
                    Morph = t.Morph
                };
                kit.EmbedToken(rt1);
                t = rt1;
            }
            if (SortKeywordsByRank)
            {
                List <Pullenti.Ner.Referent> all = new List <Pullenti.Ner.Referent>(ad.Referents);
                all.Sort(new CompByRank());
                ad.Referents = all;
            }
            if (AnnotationMaxSentences > 0)
            {
                KeywordReferent ano = Pullenti.Ner.Keyword.Internal.AutoannoSentToken.CreateAnnotation(kit, AnnotationMaxSentences);
                if (ano != null)
                {
                    ad.RegisterReferent(ano);
                }
            }
        }
Exemplo n.º 20
0
        public static Pullenti.Ner.ReferentToken TryAttachTerritory(List <TerrItemToken> li, Pullenti.Ner.Core.AnalyzerData ad, bool attachAlways = false, List <CityItemToken> cits = null, List <Pullenti.Ner.Geo.GeoReferent> exists = null)
        {
            if (li == null || li.Count == 0)
            {
                return(null);
            }
            TerrItemToken        exObj   = null;
            TerrItemToken        newName = null;
            List <TerrItemToken> adjList = new List <TerrItemToken>();
            TerrItemToken        noun    = null;
            TerrItemToken        addNoun = null;

            Pullenti.Ner.ReferentToken rt = _tryAttachMoscowAO(li, ad);
            if (rt != null)
            {
                return(rt);
            }
            if (li[0].TerminItem != null && li[0].TerminItem.CanonicText == "ТЕРРИТОРИЯ")
            {
                Pullenti.Ner.ReferentToken res2 = _tryAttachPureTerr(li, ad);
                return(res2);
            }
            if (li.Count == 2)
            {
                if (li[0].Rzd != null && li[1].RzdDir != null)
                {
                    Pullenti.Ner.Geo.GeoReferent rzd = new Pullenti.Ner.Geo.GeoReferent();
                    rzd.AddName(li[1].RzdDir);
                    rzd.AddTypTer(li[0].Kit.BaseLanguage);
                    rzd.AddSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_REF, li[0].Rzd.Referent, false, 0);
                    rzd.AddExtReferent(li[0].Rzd);
                    return(new Pullenti.Ner.ReferentToken(rzd, li[0].BeginToken, li[1].EndToken));
                }
                if (li[1].Rzd != null && li[0].RzdDir != null)
                {
                    Pullenti.Ner.Geo.GeoReferent rzd = new Pullenti.Ner.Geo.GeoReferent();
                    rzd.AddName(li[0].RzdDir);
                    rzd.AddTypTer(li[0].Kit.BaseLanguage);
                    rzd.AddSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_REF, li[1].Rzd.Referent, false, 0);
                    rzd.AddExtReferent(li[1].Rzd);
                    return(new Pullenti.Ner.ReferentToken(rzd, li[0].BeginToken, li[1].EndToken));
                }
            }
            bool canBeCityBefore = false;
            bool adjTerrBefore   = false;

            if (cits != null)
            {
                if (cits[0].Typ == CityItemToken.ItemType.City)
                {
                    canBeCityBefore = true;
                }
                else if (cits[0].Typ == CityItemToken.ItemType.Noun && cits.Count > 1)
                {
                    canBeCityBefore = true;
                }
            }
            int k;

            for (k = 0; k < li.Count; k++)
            {
                if (li[k].OntoItem != null)
                {
                    if (exObj != null || newName != null)
                    {
                        break;
                    }
                    if (noun != null)
                    {
                        if (k == 1)
                        {
                            if (noun.TerminItem.CanonicText == "РАЙОН" || noun.TerminItem.CanonicText == "ОБЛАСТЬ" || noun.TerminItem.CanonicText == "СОЮЗ")
                            {
                                if (li[k].OntoItem.Referent is Pullenti.Ner.Geo.GeoReferent)
                                {
                                    if ((li[k].OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent).IsState)
                                    {
                                        break;
                                    }
                                }
                                bool ok = false;
                                Pullenti.Ner.Token tt = li[k].EndToken.Next;
                                if (tt == null)
                                {
                                    ok = true;
                                }
                                else if (tt.IsCharOf(",."))
                                {
                                    ok = true;
                                }
                                if (!ok)
                                {
                                    ok = MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken);
                                }
                                if (!ok)
                                {
                                    Pullenti.Ner.Address.Internal.AddressItemToken adr = Pullenti.Ner.Address.Internal.AddressItemToken.TryParse(tt, null, false, false, null);
                                    if (adr != null)
                                    {
                                        if (adr.Typ == Pullenti.Ner.Address.Internal.AddressItemToken.ItemType.Street)
                                        {
                                            ok = true;
                                        }
                                    }
                                }
                                if (!ok)
                                {
                                    break;
                                }
                            }
                            if (li[k].OntoItem != null)
                            {
                                if (noun.BeginToken.IsValue("МО", null) || noun.BeginToken.IsValue("ЛО", null))
                                {
                                    return(null);
                                }
                            }
                        }
                    }
                    exObj = li[k];
                }
                else if (li[k].TerminItem != null)
                {
                    if (noun != null)
                    {
                        break;
                    }
                    if (li[k].TerminItem.IsAlwaysPrefix && k > 0)
                    {
                        break;
                    }
                    if (k > 0 && li[k].IsDoubt)
                    {
                        if (li[k].BeginToken == li[k].EndToken && li[k].BeginToken.IsValue("ЗАО", null))
                        {
                            break;
                        }
                    }
                    if (li[k].TerminItem.IsAdjective || li[k].IsGeoInDictionary)
                    {
                        adjList.Add(li[k]);
                    }
                    else
                    {
                        if (exObj != null)
                        {
                            Pullenti.Ner.Geo.GeoReferent geo = exObj.OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent;
                            if (geo == null)
                            {
                                break;
                            }
                            if (exObj.IsAdjective && ((li[k].TerminItem.CanonicText == "СОЮЗ" || li[k].TerminItem.CanonicText == "ФЕДЕРАЦИЯ")))
                            {
                                string str = exObj.OntoItem.ToString();
                                if (!str.Contains(li[k].TerminItem.CanonicText))
                                {
                                    return(null);
                                }
                            }
                            if (li[k].TerminItem.CanonicText == "РАЙОН" || li[k].TerminItem.CanonicText == "ОКРУГ" || li[k].TerminItem.CanonicText == "КРАЙ")
                            {
                                StringBuilder tmp = new StringBuilder();
                                foreach (Pullenti.Ner.Slot s in geo.Slots)
                                {
                                    if (s.TypeName == Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE)
                                    {
                                        tmp.AppendFormat("{0};", s.Value);
                                    }
                                }
                                if (!tmp.ToString().ToUpper().Contains(li[k].TerminItem.CanonicText))
                                {
                                    if (k != 1 || newName != null)
                                    {
                                        break;
                                    }
                                    newName             = li[0];
                                    newName.IsAdjective = true;
                                    newName.OntoItem    = null;
                                    exObj = null;
                                }
                            }
                        }
                        noun = li[k];
                        if (k == 0)
                        {
                            TerrItemToken tt = TerrItemToken.TryParse(li[k].BeginToken.Previous, null, true, false, null);
                            if (tt != null && tt.Morph.Class.IsAdjective)
                            {
                                adjTerrBefore = true;
                            }
                        }
                    }
                }
                else
                {
                    if (exObj != null)
                    {
                        break;
                    }
                    if (newName != null)
                    {
                        break;
                    }
                    newName = li[k];
                }
            }
            string name     = null;
            string altName  = null;
            string fullName = null;

            Pullenti.Ner.MorphCollection morph = null;
            if (exObj != null)
            {
                if (exObj.IsAdjective && !exObj.Morph.Language.IsEn && noun == null)
                {
                    if (attachAlways && exObj.EndToken.Next != null)
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(exObj.BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                        if (exObj.EndToken.Next.IsCommaAnd)
                        {
                        }
                        else if (npt == null)
                        {
                        }
                        else
                        {
                            Pullenti.Ner.Address.Internal.StreetItemToken str = Pullenti.Ner.Address.Internal.StreetItemToken.TryParse(exObj.EndToken.Next, null, false, null, false);
                            if (str != null)
                            {
                                if (str.Typ == Pullenti.Ner.Address.Internal.StreetItemType.Noun && str.EndToken == npt.EndToken)
                                {
                                    return(null);
                                }
                            }
                        }
                    }
                    else
                    {
                        CityItemToken cit = CityItemToken.TryParse(exObj.EndToken.Next, null, false, null);
                        if (cit != null && ((cit.Typ == CityItemToken.ItemType.Noun || cit.Typ == CityItemToken.ItemType.City)))
                        {
                            Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(exObj.BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                            if (npt != null && npt.EndToken == cit.EndToken)
                            {
                            }
                            else
                            {
                                return(null);
                            }
                        }
                        else if (exObj.BeginToken.IsValue("ПОДНЕБЕСНЫЙ", null))
                        {
                        }
                        else
                        {
                            return(null);
                        }
                    }
                }
                if (noun == null && exObj.CanBeCity)
                {
                    CityItemToken cit0 = CityItemToken.TryParseBack(exObj.BeginToken.Previous);
                    if (cit0 != null && cit0.Typ != CityItemToken.ItemType.ProperName)
                    {
                        return(null);
                    }
                }
                if (exObj.IsDoubt && noun == null)
                {
                    bool ok2 = false;
                    if (_canBeGeoAfter(exObj.EndToken.Next))
                    {
                        ok2 = true;
                    }
                    else if (!exObj.CanBeSurname && !exObj.CanBeCity)
                    {
                        if ((exObj.EndToken.Next != null && exObj.EndToken.Next.IsChar(')') && exObj.BeginToken.Previous != null) && exObj.BeginToken.Previous.IsChar('('))
                        {
                            ok2 = true;
                        }
                        else if (exObj.Chars.IsLatinLetter && exObj.BeginToken.Previous != null)
                        {
                            if (exObj.BeginToken.Previous.IsValue("IN", null))
                            {
                                ok2 = true;
                            }
                            else if (exObj.BeginToken.Previous.IsValue("THE", null) && exObj.BeginToken.Previous.Previous != null && exObj.BeginToken.Previous.Previous.IsValue("IN", null))
                            {
                                ok2 = true;
                            }
                        }
                    }
                    if (!ok2)
                    {
                        CityItemToken cit0 = CityItemToken.TryParseBack(exObj.BeginToken.Previous);
                        if (cit0 != null && cit0.Typ != CityItemToken.ItemType.ProperName)
                        {
                        }
                        else if (MiscLocationHelper.CheckGeoObjectBefore(exObj.BeginToken.Previous))
                        {
                        }
                        else
                        {
                            return(null);
                        }
                    }
                }
                name  = exObj.OntoItem.CanonicText;
                morph = exObj.Morph;
            }
            else if (newName != null)
            {
                if (noun == null)
                {
                    return(null);
                }
                for (int j = 1; j < k; j++)
                {
                    if (li[j].IsNewlineBefore && !li[0].IsNewlineBefore)
                    {
                        if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(li[j].BeginToken, false, false))
                        {
                        }
                        else
                        {
                            return(null);
                        }
                    }
                }
                morph = noun.Morph;
                if (newName.IsAdjective)
                {
                    if (noun.TerminItem.Acronym == "АО")
                    {
                        if (noun.BeginToken != noun.EndToken)
                        {
                            return(null);
                        }
                        if (newName.Morph.Gender != Pullenti.Morph.MorphGender.Feminie)
                        {
                            return(null);
                        }
                    }
                    Pullenti.Ner.Geo.GeoReferent geoBefore = null;
                    Pullenti.Ner.Token           tt0       = li[0].BeginToken.Previous;
                    if (tt0 != null && tt0.IsCommaAnd)
                    {
                        tt0 = tt0.Previous;
                    }
                    if (!li[0].IsNewlineBefore && tt0 != null)
                    {
                        geoBefore = tt0.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                    }
                    if (li.IndexOf(noun) < li.IndexOf(newName))
                    {
                        if (noun.TerminItem.IsState)
                        {
                            return(null);
                        }
                        if (newName.CanBeSurname && geoBefore == null)
                        {
                            if (((noun.Morph.Case & newName.Morph.Case)).IsUndefined)
                            {
                                return(null);
                            }
                        }
                        if (Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Pronoun | Pullenti.Morph.MorphClass.Verb))
                        {
                            if (noun.BeginToken != newName.BeginToken)
                            {
                                if (geoBefore == null)
                                {
                                    if (li.Count == 2 && _canBeGeoAfter(li[1].EndToken.Next))
                                    {
                                    }
                                    else if (li.Count == 3 && li[2].TerminItem != null && _canBeGeoAfter(li[2].EndToken.Next))
                                    {
                                    }
                                    else if (newName.IsGeoInDictionary)
                                    {
                                    }
                                    else if (newName.EndToken.IsNewlineAfter)
                                    {
                                    }
                                    else
                                    {
                                        return(null);
                                    }
                                }
                            }
                        }
                        Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(newName.EndToken, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns, 0, null);
                        if (npt != null && npt.EndToken != newName.EndToken)
                        {
                            if (li.Count >= 3 && li[2].TerminItem != null && npt.EndToken == li[2].EndToken)
                            {
                                addNoun = li[2];
                            }
                            else
                            {
                                return(null);
                            }
                        }
                        Pullenti.Ner.ReferentToken rtp = newName.Kit.ProcessReferent("PERSON", newName.BeginToken);
                        if (rtp != null)
                        {
                            return(null);
                        }
                        name = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false);
                    }
                    else
                    {
                        bool ok = false;
                        if (((k + 1) < li.Count) && li[k].TerminItem == null && li[k + 1].TerminItem != null)
                        {
                            ok = true;
                        }
                        else if ((k < li.Count) && li[k].OntoItem != null)
                        {
                            ok = true;
                        }
                        else if (k == li.Count && !newName.IsAdjInDictionary)
                        {
                            ok = true;
                        }
                        else if (MiscLocationHelper.CheckGeoObjectBefore(li[0].BeginToken) || canBeCityBefore)
                        {
                            ok = true;
                        }
                        else if (MiscLocationHelper.CheckGeoObjectAfter(li[k - 1].EndToken, false))
                        {
                            ok = true;
                        }
                        else if (li.Count == 3 && k == 2)
                        {
                            CityItemToken cit = CityItemToken.TryParse(li[2].BeginToken, null, false, null);
                            if (cit != null)
                            {
                                if (cit.Typ == CityItemToken.ItemType.City || cit.Typ == CityItemToken.ItemType.Noun)
                                {
                                    ok = true;
                                }
                            }
                        }
                        else if (li.Count == 2)
                        {
                            ok = _canBeGeoAfter(li[li.Count - 1].EndToken.Next);
                        }
                        if (!ok && !li[0].IsNewlineBefore && !li[0].Chars.IsAllLower)
                        {
                            Pullenti.Ner.ReferentToken rt00 = li[0].Kit.ProcessReferent("PERSONPROPERTY", li[0].BeginToken.Previous);
                            if (rt00 != null)
                            {
                                ok = true;
                            }
                        }
                        if (noun.TerminItem != null && noun.TerminItem.IsStrong && newName.IsAdjective)
                        {
                            ok = true;
                        }
                        if (noun.IsDoubt && adjList.Count == 0 && geoBefore == null)
                        {
                            return(null);
                        }
                        name = Pullenti.Ner.Core.ProperNameHelper.GetNameEx(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false);
                        if (!ok && !attachAlways)
                        {
                            if (Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Adjective | Pullenti.Morph.MorphClass.Pronoun | Pullenti.Morph.MorphClass.Verb))
                            {
                                if (exists != null)
                                {
                                    foreach (Pullenti.Ner.Geo.GeoReferent e in exists)
                                    {
                                        if (e.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_NAME, name, true) != null)
                                        {
                                            ok = true;
                                            break;
                                        }
                                    }
                                }
                                if (!ok)
                                {
                                    return(null);
                                }
                            }
                        }
                        fullName = string.Format("{0} {1}", Pullenti.Ner.Core.ProperNameHelper.GetNameEx(li[0].BeginToken, noun.BeginToken.Previous, Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphCase.Undefined, noun.TerminItem.Gender, false, false), noun.TerminItem.CanonicText);
                    }
                }
                else
                {
                    if (!attachAlways || ((noun.TerminItem != null && noun.TerminItem.CanonicText == "ФЕДЕРАЦИЯ")))
                    {
                        bool isLatin = noun.Chars.IsLatinLetter && newName.Chars.IsLatinLetter;
                        if (li.IndexOf(noun) > li.IndexOf(newName))
                        {
                            if (!isLatin)
                            {
                                return(null);
                            }
                        }
                        if (!newName.IsDistrictName && !Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(newName.BeginToken, false, false))
                        {
                            if (adjList.Count == 0 && Pullenti.Ner.Core.MiscHelper.IsExistsInDictionary(newName.BeginToken, newName.EndToken, Pullenti.Morph.MorphClass.Noun | Pullenti.Morph.MorphClass.Pronoun))
                            {
                                if (li.Count == 2 && noun.IsCityRegion && (noun.WhitespacesAfterCount < 2))
                                {
                                }
                                else
                                {
                                    return(null);
                                }
                            }
                            if (!isLatin)
                            {
                                if ((noun.TerminItem.IsRegion && !attachAlways && ((!adjTerrBefore || newName.IsDoubt))) && !noun.IsCityRegion && !noun.TerminItem.IsSpecificPrefix)
                                {
                                    if (!MiscLocationHelper.CheckGeoObjectBefore(noun.BeginToken))
                                    {
                                        if (!noun.IsDoubt && noun.BeginToken != noun.EndToken)
                                        {
                                        }
                                        else if ((noun.TerminItem.IsAlwaysPrefix && li.Count == 2 && li[0] == noun) && li[1] == newName)
                                        {
                                        }
                                        else
                                        {
                                            return(null);
                                        }
                                    }
                                }
                                if (noun.IsDoubt && adjList.Count == 0)
                                {
                                    if (noun.TerminItem.Acronym == "МО" || noun.TerminItem.Acronym == "ЛО")
                                    {
                                        if (k == (li.Count - 1) && li[k].TerminItem != null)
                                        {
                                            addNoun = li[k];
                                            k++;
                                        }
                                        else if (li.Count == 2 && noun == li[0] && newName.ToString().EndsWith("совет"))
                                        {
                                        }
                                        else
                                        {
                                            return(null);
                                        }
                                    }
                                    else
                                    {
                                        return(null);
                                    }
                                }
                                Pullenti.Ner.ReferentToken pers = newName.Kit.ProcessReferent("PERSON", newName.BeginToken);
                                if (pers != null)
                                {
                                    return(null);
                                }
                            }
                        }
                    }
                    name = Pullenti.Ner.Core.MiscHelper.GetTextValue(newName.BeginToken, newName.EndToken, Pullenti.Ner.Core.GetTextAttr.No);
                    if (newName.BeginToken != newName.EndToken)
                    {
                        for (Pullenti.Ner.Token ttt = newName.BeginToken.Next; ttt != null && ttt.EndChar <= newName.EndChar; ttt = ttt.Next)
                        {
                            if (ttt.Chars.IsLetter)
                            {
                                TerrItemToken ty = TerrItemToken.TryParse(ttt, null, false, false, null);
                                if ((ty != null && ty.TerminItem != null && noun != null) && ((ty.TerminItem.CanonicText.Contains(noun.TerminItem.CanonicText) || noun.TerminItem.CanonicText.Contains(ty.TerminItem.CanonicText))))
                                {
                                    name = Pullenti.Ner.Core.MiscHelper.GetTextValue(newName.BeginToken, ttt.Previous, Pullenti.Ner.Core.GetTextAttr.No);
                                    break;
                                }
                            }
                        }
                    }
                    if (adjList.Count > 0)
                    {
                        Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(adjList[0].BeginToken, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                        if (npt != null && npt.EndToken == noun.EndToken)
                        {
                            altName = string.Format("{0} {1}", npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false), name);
                        }
                    }
                }
            }
            else
            {
                if ((li.Count == 1 && noun != null && noun.EndToken.Next != null) && (noun.EndToken.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
                {
                    Pullenti.Ner.Geo.GeoReferent g = noun.EndToken.Next.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                    if (noun.TerminItem != null)
                    {
                        string tyy = noun.TerminItem.CanonicText.ToLower();
                        bool   ooo = false;
                        if (g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, tyy, true) != null)
                        {
                            ooo = true;
                        }
                        else if (tyy.EndsWith("район") && g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, "район", true) != null)
                        {
                            ooo = true;
                        }
                        if (ooo)
                        {
                            return new Pullenti.Ner.ReferentToken(g, noun.BeginToken, noun.EndToken.Next)
                                   {
                                       Morph = noun.BeginToken.Morph
                                   }
                        }
                        ;
                    }
                }
                if ((li.Count == 1 && noun == li[0] && li[0].TerminItem != null) && TerrItemToken.TryParse(li[0].EndToken.Next, null, true, false, null) == null && TerrItemToken.TryParse(li[0].BeginToken.Previous, null, true, false, null) == null)
                {
                    if (li[0].Morph.Number == Pullenti.Morph.MorphNumber.Plural)
                    {
                        return(null);
                    }
                    int    cou = 0;
                    string str = li[0].TerminItem.CanonicText.ToLower();
                    for (Pullenti.Ner.Token tt = li[0].BeginToken.Previous; tt != null; tt = tt.Previous)
                    {
                        if (tt.IsNewlineAfter)
                        {
                            cou += 10;
                        }
                        else
                        {
                            cou++;
                        }
                        if (cou > 500)
                        {
                            break;
                        }
                        Pullenti.Ner.Geo.GeoReferent g = tt.GetReferent() as Pullenti.Ner.Geo.GeoReferent;
                        if (g == null)
                        {
                            continue;
                        }
                        bool ok = true;
                        cou = 0;
                        for (tt = li[0].EndToken.Next; tt != null; tt = tt.Next)
                        {
                            if (tt.IsNewlineBefore)
                            {
                                cou += 10;
                            }
                            else
                            {
                                cou++;
                            }
                            if (cou > 500)
                            {
                                break;
                            }
                            TerrItemToken tee = TerrItemToken.TryParse(tt, null, true, false, null);
                            if (tee == null)
                            {
                                continue;
                            }
                            ok = false;
                            break;
                        }
                        if (ok)
                        {
                            for (int ii = 0; g != null && (ii < 3); g = g.Higher, ii++)
                            {
                                if (g.FindSlot(Pullenti.Ner.Geo.GeoReferent.ATTR_TYPE, str, true) != null)
                                {
                                    return new Pullenti.Ner.ReferentToken(g, li[0].BeginToken, li[0].EndToken)
                                           {
                                               Morph = noun.BeginToken.Morph
                                           }
                                }
                                ;
                            }
                        }
                        break;
                    }
                }
                return(null);
            }
            Pullenti.Ner.Geo.GeoReferent ter = null;
            if (exObj != null && (exObj.Tag is Pullenti.Ner.Geo.GeoReferent))
            {
                ter = exObj.Tag as Pullenti.Ner.Geo.GeoReferent;
            }
            else
            {
                ter = new Pullenti.Ner.Geo.GeoReferent();
                if (exObj != null)
                {
                    Pullenti.Ner.Geo.GeoReferent geo = exObj.OntoItem.Referent as Pullenti.Ner.Geo.GeoReferent;
                    if (geo != null && !geo.IsCity)
                    {
                        ter.MergeSlots2(geo, li[0].Kit.BaseLanguage);
                    }
                    else
                    {
                        ter.AddName(name);
                    }
                    if (noun == null && exObj.CanBeCity)
                    {
                        ter.AddTypCity(li[0].Kit.BaseLanguage);
                    }
                    else
                    {
                    }
                }
                else if (newName != null)
                {
                    ter.AddName(name);
                    if (altName != null)
                    {
                        ter.AddName(altName);
                    }
                }
                if (noun != null)
                {
                    if (noun.TerminItem.CanonicText == "АО")
                    {
                        ter.AddTyp((li[0].Kit.BaseLanguage.IsUa ? "АВТОНОМНИЙ ОКРУГ" : "АВТОНОМНЫЙ ОКРУГ"));
                    }
                    else if (noun.TerminItem.CanonicText == "МУНИЦИПАЛЬНОЕ СОБРАНИЕ" || noun.TerminItem.CanonicText == "МУНІЦИПАЛЬНЕ ЗБОРИ")
                    {
                        ter.AddTyp((li[0].Kit.BaseLanguage.IsUa ? "МУНІЦИПАЛЬНЕ УТВОРЕННЯ" : "МУНИЦИПАЛЬНОЕ ОБРАЗОВАНИЕ"));
                    }
                    else if (noun.TerminItem.Acronym == "МО" && addNoun != null)
                    {
                        ter.AddTyp(addNoun.TerminItem.CanonicText);
                    }
                    else
                    {
                        if (noun.TerminItem.CanonicText == "СОЮЗ" && exObj != null && exObj.EndChar > noun.EndChar)
                        {
                            return new Pullenti.Ner.ReferentToken(ter, exObj.BeginToken, exObj.EndToken)
                                   {
                                       Morph = exObj.Morph
                                   }
                        }
                        ;
                        ter.AddTyp(noun.TerminItem.CanonicText);
                        if (noun.TerminItem.IsRegion && ter.IsState)
                        {
                            ter.AddTypReg(li[0].Kit.BaseLanguage);
                        }
                    }
                }
                if (ter.IsState && ter.IsRegion)
                {
                    foreach (TerrItemToken a in adjList)
                    {
                        if (a.TerminItem.IsRegion)
                        {
                            ter.AddTypReg(li[0].Kit.BaseLanguage);
                            break;
                        }
                    }
                }
                if (ter.IsState)
                {
                    if (fullName != null)
                    {
                        ter.AddName(fullName);
                    }
                }
            }
            Pullenti.Ner.ReferentToken res = new Pullenti.Ner.ReferentToken(ter, li[0].BeginToken, li[k - 1].EndToken);
            if (noun != null && noun.Morph.Class.IsNoun)
            {
                res.Morph = noun.Morph;
            }
            else
            {
                res.Morph = new Pullenti.Ner.MorphCollection();
                for (int ii = 0; ii < k; ii++)
                {
                    foreach (Pullenti.Morph.MorphBaseInfo v in li[ii].Morph.Items)
                    {
                        Pullenti.Morph.MorphBaseInfo bi = new Pullenti.Morph.MorphBaseInfo();
                        bi.CopyFrom(v);
                        if (noun != null)
                        {
                            if (bi.Class.IsAdjective)
                            {
                                bi.Class = Pullenti.Morph.MorphClass.Noun;
                            }
                        }
                        res.Morph.AddItem(bi);
                    }
                }
            }
            if (li[0].TerminItem != null && li[0].TerminItem.IsSpecificPrefix)
            {
                res.BeginToken = li[0].EndToken.Next;
            }
            if (addNoun != null && addNoun.EndChar > res.EndChar)
            {
                res.EndToken = addNoun.EndToken;
            }
            if ((res.BeginToken.Previous is Pullenti.Ner.TextToken) && (res.WhitespacesBeforeCount < 2))
            {
                Pullenti.Ner.TextToken tt = res.BeginToken.Previous as Pullenti.Ner.TextToken;
                if (tt.Term == "АР")
                {
                    foreach (string ty in ter.Typs)
                    {
                        if (ty.Contains("республика") || ty.Contains("республіка"))
                        {
                            res.BeginToken = tt;
                            break;
                        }
                    }
                }
            }
            return(res);
        }
Exemplo n.º 21
0
        Pullenti.Ner.Token _addReferents(Pullenti.Ner.Core.AnalyzerData ad, Pullenti.Ner.Token t, int cur, int max)
        {
            if (!(t is Pullenti.Ner.ReferentToken))
            {
                return(t);
            }
            Pullenti.Ner.Referent r = t.GetReferent();
            if (r == null)
            {
                return(t);
            }
            if (r is Pullenti.Ner.Denomination.DenominationReferent)
            {
                Pullenti.Ner.Denomination.DenominationReferent dr = r as Pullenti.Ner.Denomination.DenominationReferent;
                KeywordReferent kref0 = new KeywordReferent()
                {
                    Typ = KeywordType.Referent
                };
                foreach (Pullenti.Ner.Slot s in dr.Slots)
                {
                    if (s.TypeName == Pullenti.Ner.Denomination.DenominationReferent.ATTR_VALUE)
                    {
                        kref0.AddSlot(KeywordReferent.ATTR_NORMAL, s.Value, false, 0);
                    }
                }
                kref0.AddSlot(KeywordReferent.ATTR_REF, dr, false, 0);
                Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref0), t, t);
                t.Kit.EmbedToken(rt0);
                return(rt0);
            }
            if ((r is Pullenti.Ner.Phone.PhoneReferent) || (r is Pullenti.Ner.Uri.UriReferent) || (r is Pullenti.Ner.Bank.BankDataReferent))
            {
                return(t);
            }
            if (r is Pullenti.Ner.Money.MoneyReferent)
            {
                Pullenti.Ner.Money.MoneyReferent mr = r as Pullenti.Ner.Money.MoneyReferent;
                KeywordReferent kref0 = new KeywordReferent()
                {
                    Typ = KeywordType.Object
                };
                kref0.AddSlot(KeywordReferent.ATTR_NORMAL, mr.Currency, false, 0);
                Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref0), t, t);
                t.Kit.EmbedToken(rt0);
                return(rt0);
            }
            if (r.TypeName == "DATE" || r.TypeName == "DATERANGE" || r.TypeName == "BOOKLINKREF")
            {
                return(t);
            }
            for (Pullenti.Ner.Token tt = (t as Pullenti.Ner.MetaToken).BeginToken; tt != null && tt.EndChar <= t.EndChar; tt = tt.Next)
            {
                if (tt is Pullenti.Ner.ReferentToken)
                {
                    this._addReferents(ad, tt, cur, max);
                }
            }
            KeywordReferent kref = new KeywordReferent()
            {
                Typ = KeywordType.Referent
            };
            string norm = null;

            if (r.TypeName == "GEO")
            {
                norm = r.GetStringValue("ALPHA2");
            }
            if (norm == null)
            {
                norm = r.ToString(true, null, 0);
            }
            if (norm != null)
            {
                kref.AddSlot(KeywordReferent.ATTR_NORMAL, norm.ToUpper(), false, 0);
            }
            kref.AddSlot(KeywordReferent.ATTR_REF, t.GetReferent(), false, 0);
            _setRank(kref, cur, max);
            Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref), t, t);
            t.Kit.EmbedToken(rt1);
            return(rt1);
        }