Exemplo n.º 1
0
        protected bool OnProgress(int pos, int max, Pullenti.Ner.Core.AnalysisKit kit)
        {
            bool ret = true;

            if (Progress != null)
            {
                if (pos >= 0 && pos <= max && max > 0)
                {
                    int percent = pos;
                    if (max > 1000000)
                    {
                        percent /= ((max / 1000));
                    }
                    else
                    {
                        percent = ((100 * percent)) / max;
                    }
                    if (percent != lastPercent)
                    {
                        ProgressChangedEventArgs arg = new ProgressChangedEventArgs((int)percent, null);
                        Progress(this, arg) /* error */;
                        if (Cancel != null)
                        {
                            CancelEventArgs cea = new CancelEventArgs();
                            Cancel(kit, cea) /* error */;
                            ret = !cea.Cancel;
                        }
                    }
                    lastPercent = percent;
                }
            }
            return(ret);
        }
Exemplo n.º 2
0
 public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
 {
     Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
     for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
     {
         Pullenti.Ner.ReferentToken rt = null;
         if (t.Chars.IsLetter)
         {
             Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
             if (tok != null)
             {
                 Pullenti.Ner.Token tt = tok.EndToken.Next;
                 if (tt != null && tt.IsChar(':'))
                 {
                     tt = tt.Next;
                 }
                 rt = this.TryAttach(tt, true);
                 if (rt != null)
                 {
                     rt.BeginToken = t;
                 }
             }
         }
         if (rt == null && (((t is Pullenti.Ner.ReferentToken) || t.IsNewlineBefore)))
         {
             rt = this.TryAttach(t, false);
         }
         if (rt != null)
         {
             rt.Referent = ad.RegisterReferent(rt.Referent);
             kit.EmbedToken(rt);
             t = rt;
         }
     }
 }
Exemplo n.º 3
0
 public ReferentToken(Referent entity, Token begin, Token end, Pullenti.Ner.Core.AnalysisKit kit = null) : base(begin, end, kit)
 {
     Referent = entity;
     if (Morph == null)
     {
         Morph = new MorphCollection();
     }
 }
Exemplo n.º 4
0
 internal override void Deserialize(Stream stream, Pullenti.Ner.Core.AnalysisKit kit, int vers)
 {
     base.Deserialize(stream, kit, vers);
     Term  = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeString(stream);
     Lemma = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeString(stream);
     InvariantPrefixLengthOfMorphVars = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeShort(stream);
     MaxLengthOfMorphVars             = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeShort(stream);
 }
Exemplo n.º 5
0
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
            int delta = 100000;
            int parts = (((kit.Sofa.Text.Length + delta) - 1)) / delta;

            if (parts == 0)
            {
                parts = 1;
            }
            int cur     = 0;
            int nextPos = 0;
            List <GoodReferent> goods = new List <GoodReferent>();

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                if (!t.IsNewlineBefore)
                {
                    continue;
                }
                if (t.BeginChar > nextPos)
                {
                    nextPos += delta;
                    cur++;
                    if (!this.OnProgress(cur, parts, kit))
                    {
                        break;
                    }
                }
                if (!t.Chars.IsLetter && t.Next != null)
                {
                    t = t.Next;
                }
                List <Pullenti.Ner.ReferentToken> rts = Pullenti.Ner.Goods.Internal.GoodAttrToken.TryParseList(t);
                if (rts == null || rts.Count == 0)
                {
                    continue;
                }
                GoodReferent good = new GoodReferent();
                foreach (Pullenti.Ner.ReferentToken rt in rts)
                {
                    rt.Referent = ad.RegisterReferent(rt.Referent);
                    if (good.FindSlot(GoodReferent.ATTR_ATTR, rt.Referent, true) == null)
                    {
                        good.AddSlot(GoodReferent.ATTR_ATTR, rt.Referent, false, 0);
                    }
                    kit.EmbedToken(rt);
                }
                goods.Add(good);
                Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(good, rts[0], rts[rts.Count - 1]);
                kit.EmbedToken(rt0);
                t = rt0;
            }
            foreach (GoodReferent g in goods)
            {
                ad.Referents.Add(g);
            }
        }
Exemplo n.º 6
0
        internal override void Deserialize(Stream stream, Pullenti.Ner.Core.AnalysisKit kit, int vers)
        {
            base.Deserialize(stream, kit, vers);
            int id = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeInt(stream);

            if (id > 0)
            {
                Referent = kit.Entities[id - 1];
            }
        }
Exemplo n.º 7
0
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
            Pullenti.Ner.Token             et;
            TitlePageReferent tpr = _process(kit.FirstToken, 0, kit, out et);

            if (tpr != null)
            {
                ad.RegisterReferent(tpr);
            }
        }
Exemplo n.º 8
0
        public static void Initialize()
        {
            if (m_Inited)
            {
                return;
            }
            m_Inited = true;
            string obj = ResourceHelper.GetString("ShortNames.txt");

            if (obj != null)
            {
                Pullenti.Ner.Core.AnalysisKit kit = new Pullenti.Ner.Core.AnalysisKit(new Pullenti.Ner.SourceOfAnalysis(obj));
                for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
                {
                    if (t.IsNewlineBefore)
                    {
                        Pullenti.Morph.MorphGender g = (t.IsValue("F", null) ? Pullenti.Morph.MorphGender.Feminie : Pullenti.Morph.MorphGender.Masculine);
                        t = t.Next;
                        string        nam  = (t as Pullenti.Ner.TextToken).Term;
                        List <string> shos = new List <string>();
                        for (t = t.Next; t != null; t = t.Next)
                        {
                            if (t.IsNewlineBefore)
                            {
                                break;
                            }
                            else
                            {
                                shos.Add((t as Pullenti.Ner.TextToken).Term);
                            }
                        }
                        foreach (string s in shos)
                        {
                            List <ShortnameVar> li = null;
                            if (!m_Shorts_Names.TryGetValue(s, out li))
                            {
                                m_Shorts_Names.Add(s, (li = new List <ShortnameVar>()));
                            }
                            li.Add(new ShortnameVar()
                            {
                                Name = nam, Gender = g
                            });
                        }
                        if (t == null)
                        {
                            break;
                        }
                        t = t.Previous;
                    }
                }
            }
        }
Exemplo n.º 9
0
 internal virtual void Deserialize(Stream stream, Pullenti.Ner.Core.AnalysisKit kit, int vers)
 {
     Kit         = kit;
     m_BeginChar = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeInt(stream);
     m_EndChar   = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeInt(stream);
     m_Attrs     = (short)Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeInt(stream);
     Chars       = new Pullenti.Morph.CharsInfo()
     {
         Value = (short)Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeInt(stream)
     };
     m_Morph = new MorphCollection();
     m_Morph.Deserialize(stream);
 }
Exemplo n.º 10
0
 public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
 {
     Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
     for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
     {
         Pullenti.Ner.ReferentToken mon = TryParse(t);
         if (mon != null)
         {
             mon.Referent = ad.RegisterReferent(mon.Referent);
             kit.EmbedToken(mon);
             t = mon;
             continue;
         }
     }
 }
Exemplo n.º 11
0
 /// <summary>
 /// Доделать результат, который был сделан другим процессором
 /// </summary>
 /// <param name="ar">то, что было сделано другим процессором</param>
 public void ProcessNext(AnalysisResult ar)
 {
     if (ar == null)
     {
         return;
     }
     Pullenti.Ner.Core.AnalysisKit kit = new Pullenti.Ner.Core.AnalysisKit()
     {
         Processor = this, Ontology = ar.Ontology
     };
     kit.InitFrom(ar);
     this._process2(kit, ar, false);
     this._createRes(kit, ar, ar.Ontology, false);
     ar.FirstToken = kit.FirstToken;
 }
Exemplo n.º 12
0
        internal void _createRes(Pullenti.Ner.Core.AnalysisKit kit, AnalysisResult ar, ExtOntology extOntology, bool noLog)
        {
            Stopwatch sw           = Stopwatch.StartNew();
            int       ontoAttached = 0;

            for (int k = 0; k < 2; k++)
            {
                foreach (Analyzer c in Analyzers)
                {
                    if (k == 0)
                    {
                        if (!c.IsSpecific)
                        {
                            continue;
                        }
                    }
                    else if (c.IsSpecific)
                    {
                        continue;
                    }
                    Pullenti.Ner.Core.AnalyzerData dat = kit.GetAnalyzerData(c);
                    if (dat != null && dat.Referents.Count > 0)
                    {
                        if (extOntology != null)
                        {
                            foreach (Referent r in dat.Referents)
                            {
                                if (r.OntologyItems == null)
                                {
                                    if ((((r.OntologyItems = extOntology.AttachReferent(r)))) != null)
                                    {
                                        ontoAttached++;
                                    }
                                }
                            }
                        }
                        ar.Entities.AddRange(dat.Referents);
                    }
                }
            }
            sw.Stop();
            if (extOntology != null && !noLog)
            {
                string msg = string.Format("Привязано {0} объектов к внешней отнологии ({1} элементов) за {2}", ontoAttached, extOntology.Items.Count, OutSecs(sw.ElapsedMilliseconds));
                this.OnMessage(msg);
                ar.Log.Add(msg);
            }
        }
Exemplo n.º 13
0
 internal override void Deserialize(Stream stream, Pullenti.Ner.Core.AnalysisKit kit, int vers)
 {
     base.Deserialize(stream, kit, vers);
     if (vers == 0)
     {
         byte[] buf = new byte[(int)8];
         stream.Read(buf, 0, 8);
         long lo = BitConverter.ToInt64(buf, 0);
         Value = lo.ToString();
     }
     else
     {
         Value = Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeString(stream);
     }
     Typ = (NumberSpellingType)Pullenti.Ner.Core.Internal.SerializerHelper.DeserializeInt(stream);
 }
Exemplo n.º 14
0
 public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
 {
     Pullenti.Ner.Token t  = kit.FirstToken;
     Pullenti.Ner.Token t1 = t;
     if (t == null)
     {
         return;
     }
     Pullenti.Ner.Instrument.Internal.FragToken dfr = Pullenti.Ner.Instrument.Internal.FragToken.CreateDocument(t, 0, InstrumentKind.Undefined);
     if (dfr == null)
     {
         return;
     }
     Pullenti.Ner.Core.AnalyzerData ad  = kit.GetAnalyzerData(this);
     InstrumentBlockReferent        res = dfr.CreateReferent(ad);
 }
Exemplo n.º 15
0
 public static void Process(Pullenti.Ner.Core.AnalysisKit kit, Pullenti.Ner.Core.AnalyzerData ad)
 {
     for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
     {
         if (!Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t))
         {
             continue;
         }
         Pullenti.Ner.ReferentToken rt = TryParseThesis(t);
         if (rt == null)
         {
             continue;
         }
         rt.Referent = ad.RegisterReferent(rt.Referent);
         kit.EmbedToken(rt);
         t = rt;
     }
 }
Exemplo n.º 16
0
 public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
 {
     Pullenti.Ner.Core.AnalyzerDataWithOntology ad = kit.GetAnalyzerData(this) as Pullenti.Ner.Core.AnalyzerDataWithOntology;
     for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
     {
         List <Pullenti.Ner.Named.Internal.NamedItemToken> li = Pullenti.Ner.Named.Internal.NamedItemToken.TryParseList(t, ad.LocalOntology);
         if (li == null || li.Count == 0)
         {
             continue;
         }
         Pullenti.Ner.ReferentToken rt = _tryAttach(li);
         if (rt != null)
         {
             rt.Referent = ad.RegisterReferent(rt.Referent);
             kit.EmbedToken(rt);
             t = rt;
             continue;
         }
     }
 }
Exemplo n.º 17
0
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
            int delta = 100000;
            int parts = (((kit.Sofa.Text.Length + delta) - 1)) / delta;

            if (parts == 0)
            {
                parts = 1;
            }
            int cur     = 0;
            int nextPos = 0;

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                if (t.BeginChar > nextPos)
                {
                    nextPos += delta;
                    cur++;
                    if (!this.OnProgress(cur, parts, kit))
                    {
                        break;
                    }
                }
                Pullenti.Ner.Goods.Internal.GoodAttrToken at = Pullenti.Ner.Goods.Internal.GoodAttrToken.TryParse(t, null, true, true);
                if (at == null)
                {
                    continue;
                }
                GoodAttributeReferent attr = at._createAttr();
                if (attr == null)
                {
                    t = at.EndToken;
                    continue;
                }
                Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(attr, at.BeginToken, at.EndToken);
                rt.Referent = ad.RegisterReferent(attr);
                kit.EmbedToken(rt);
                t = rt;
            }
        }
Exemplo n.º 18
0
        static Pullenti.Ner.Token DeserializeToken(Stream stream, Pullenti.Ner.Core.AnalysisKit kit, int vers)
        {
            short typ = DeserializeShort(stream);

            if (typ == 0)
            {
                return(null);
            }
            Pullenti.Ner.Token t = null;
            if (typ == 1)
            {
                t = new Pullenti.Ner.TextToken(null, kit);
            }
            else if (typ == 2)
            {
                t = new Pullenti.Ner.NumberToken(null, null, null, Pullenti.Ner.NumberSpellingType.Digit, kit);
            }
            else if (typ == 3)
            {
                t = new Pullenti.Ner.ReferentToken(null, null, null, kit);
            }
            else
            {
                t = new Pullenti.Ner.MetaToken(null, null, kit);
            }
            t.Deserialize(stream, kit, vers);
            if (t is Pullenti.Ner.MetaToken)
            {
                Pullenti.Ner.Token tt = DeserializeTokens(stream, kit, vers);
                if (tt != null)
                {
                    (t as Pullenti.Ner.MetaToken).m_BeginToken = tt;
                    for (; tt != null; tt = tt.Next)
                    {
                        (t as Pullenti.Ner.MetaToken).m_EndToken = tt;
                    }
                }
            }
            return(t);
        }
Exemplo n.º 19
0
        public static Pullenti.Ner.Token DeserializeTokens(Stream stream, Pullenti.Ner.Core.AnalysisKit kit, int vers)
        {
            int cou = DeserializeInt(stream);

            if (cou == 0)
            {
                return(null);
            }
            Pullenti.Ner.Token res  = null;
            Pullenti.Ner.Token prev = null;
            for (; cou > 0; cou--)
            {
                Pullenti.Ner.Token t = DeserializeToken(stream, kit, vers);
                if (t == null)
                {
                    continue;
                }
                if (res == null)
                {
                    res = t;
                }
                if (prev != null)
                {
                    t.Previous = prev;
                }
                prev = t;
            }
            for (Pullenti.Ner.Token t = res; t != null; t = t.Next)
            {
                if (t is Pullenti.Ner.MetaToken)
                {
                    _corrPrevNext(t as Pullenti.Ner.MetaToken, t.Previous, t.Next);
                }
            }
            return(res);
        }
Exemplo n.º 20
0
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            List <Pullenti.Ner.Mail.Internal.MailLine> lines = new List <Pullenti.Ner.Mail.Internal.MailLine>();

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                Pullenti.Ner.Mail.Internal.MailLine ml = Pullenti.Ner.Mail.Internal.MailLine.Parse(t, 0, 0);
                if (ml == null)
                {
                    continue;
                }
                if (lines.Count == 91)
                {
                }
                lines.Add(ml);
                t = ml.EndToken;
            }
            if (lines.Count == 0)
            {
                return;
            }
            int i;
            List <List <Pullenti.Ner.Mail.Internal.MailLine> > blocks = new List <List <Pullenti.Ner.Mail.Internal.MailLine> >();
            List <Pullenti.Ner.Mail.Internal.MailLine>         blk    = null;

            for (i = 0; i < lines.Count; i++)
            {
                Pullenti.Ner.Mail.Internal.MailLine ml = lines[i];
                if (ml.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                {
                    bool isNew = ml.MustBeFirstLine || i == 0;
                    if (((i + 2) < lines.Count) && (((lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From || lines[i + 2].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From || lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello) || lines[i + 2].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello)))
                    {
                        isNew = true;
                    }
                    if (!isNew)
                    {
                        for (int j = i - 1; j >= 0; j--)
                        {
                            if (lines[j].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined)
                            {
                                if (lines[j].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards)
                                {
                                    isNew = true;
                                }
                                break;
                            }
                        }
                    }
                    if (!isNew)
                    {
                        for (Pullenti.Ner.Token tt = ml.BeginToken; tt != null && tt.EndChar <= ml.EndChar; tt = tt.Next)
                        {
                            if (tt.GetReferent() != null)
                            {
                                if (tt.GetReferent().TypeName == "DATE" || tt.GetReferent().TypeName == "URI")
                                {
                                    isNew = true;
                                }
                            }
                        }
                    }
                    if (isNew)
                    {
                        blk = new List <Pullenti.Ner.Mail.Internal.MailLine>();
                        blocks.Add(blk);
                        for (; i < lines.Count; i++)
                        {
                            if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                            {
                                if (blk.Count > 0 && lines[i].MustBeFirstLine)
                                {
                                    break;
                                }
                                blk.Add(lines[i]);
                            }
                            else if (((i + 1) < lines.Count) && lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                            {
                                int j;
                                for (j = 0; j < blk.Count; j++)
                                {
                                    if (blk[j].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                                    {
                                        if (blk[j].IsRealFrom || blk[j].MustBeFirstLine || blk[j].MailAddr != null)
                                        {
                                            break;
                                        }
                                    }
                                }
                                if (j >= blk.Count)
                                {
                                    blk.Add(lines[i]);
                                    continue;
                                }
                                bool ok = false;
                                for (j = i + 1; j < lines.Count; j++)
                                {
                                    if (lines[j].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                                    {
                                        break;
                                    }
                                    if (lines[j].IsRealFrom || lines[j].MustBeFirstLine)
                                    {
                                        ok = true;
                                        break;
                                    }
                                    if (lines[j].MailAddr != null)
                                    {
                                        ok = true;
                                        break;
                                    }
                                }
                                if (ok)
                                {
                                    break;
                                }
                                blk.Add(lines[i]);
                            }
                            else
                            {
                                break;
                            }
                        }
                        i--;
                        continue;
                    }
                }
                if (blk == null)
                {
                    blocks.Add((blk = new List <Pullenti.Ner.Mail.Internal.MailLine>()));
                }
                blk.Add(lines[i]);
            }
            if (blocks.Count == 0)
            {
                return;
            }
            Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
            for (int j = 0; j < blocks.Count; j++)
            {
                lines = blocks[j];
                if (lines.Count == 0)
                {
                    continue;
                }
                i = 0;
                if (lines[0].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                {
                    Pullenti.Ner.Token t1 = lines[0].EndToken;
                    for (; i < lines.Count; i++)
                    {
                        if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                        {
                            t1 = lines[i].EndToken;
                        }
                        else if (((i + 1) < lines.Count) && lines[i + 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.From)
                        {
                        }
                        else
                        {
                            break;
                        }
                    }
                    MailReferent mail = new MailReferent()
                    {
                        Kind = MailKind.Head
                    };
                    Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[0].BeginToken, t1);
                    mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister);
                    ad.RegisterReferent(mail);
                    mail.AddOccurenceOfRefTok(mt);
                }
                int i0 = i;
                Pullenti.Ner.Token t2 = null;
                int err = 0;
                for (i = lines.Count - 1; i >= i0; i--)
                {
                    Pullenti.Ner.Mail.Internal.MailLine li = lines[i];
                    if (li.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards)
                    {
                        t2 = lines[i].BeginToken;
                        for (--i; i >= i0; i--)
                        {
                            if (lines[i].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards && (lines[i].Words < 2))
                            {
                                t2 = lines[i].BeginToken;
                            }
                            else if ((i > i0 && (lines[i].Words < 3) && lines[i - 1].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.BestRegards) && (lines[i - 1].Words < 2))
                            {
                                i--;
                                t2 = lines[i].BeginToken;
                            }
                            else
                            {
                                break;
                            }
                        }
                        break;
                    }
                    if (li.Refs.Count > 0 && (li.Words < 3) && i > i0)
                    {
                        err = 0;
                        t2  = li.BeginToken;
                        continue;
                    }
                    if (li.Words > 10)
                    {
                        t2 = null;
                        continue;
                    }
                    if (li.Words > 2)
                    {
                        if ((++err) > 2)
                        {
                            t2 = null;
                        }
                    }
                }
                if (t2 == null)
                {
                    for (i = lines.Count - 1; i >= i0; i--)
                    {
                        Pullenti.Ner.Mail.Internal.MailLine li = lines[i];
                        if (li.Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined)
                        {
                            if (li.Refs.Count > 0 && (li.Refs[0] is Pullenti.Ner.Person.PersonReferent))
                            {
                                if (li.Words == 0 && i > i0)
                                {
                                    t2 = li.BeginToken;
                                    break;
                                }
                            }
                        }
                    }
                }
                for (int ii = i0; ii < lines.Count; ii++)
                {
                    if (lines[ii].Typ == Pullenti.Ner.Mail.Internal.MailLine.Types.Hello)
                    {
                        MailReferent mail = new MailReferent()
                        {
                            Kind = MailKind.Hello
                        };
                        Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[i0].BeginToken, lines[ii].EndToken);
                        if (mt.LengthChar > 0)
                        {
                            mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister);
                            ad.RegisterReferent(mail);
                            mail.AddOccurenceOfRefTok(mt);
                            i0 = ii + 1;
                        }
                        break;
                    }
                    else if (lines[ii].Typ != Pullenti.Ner.Mail.Internal.MailLine.Types.Undefined || lines[ii].Words > 0 || lines[ii].Refs.Count > 0)
                    {
                        break;
                    }
                }
                if (i0 < lines.Count)
                {
                    if (t2 != null && t2.Previous == null)
                    {
                    }
                    else
                    {
                        MailReferent mail = new MailReferent()
                        {
                            Kind = MailKind.Body
                        };
                        Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, lines[i0].BeginToken, (t2 != null && t2.Previous != null ? t2.Previous : lines[lines.Count - 1].EndToken));
                        if (mt.LengthChar > 0)
                        {
                            mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister);
                            ad.RegisterReferent(mail);
                            mail.AddOccurenceOfRefTok(mt);
                        }
                    }
                    if (t2 != null)
                    {
                        MailReferent mail = new MailReferent()
                        {
                            Kind = MailKind.Tail
                        };
                        Pullenti.Ner.ReferentToken mt = new Pullenti.Ner.ReferentToken(mail, t2, lines[lines.Count - 1].EndToken);
                        if (mt.LengthChar > 0)
                        {
                            mail.Text = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(mt, Pullenti.Ner.Core.GetTextAttr.KeepRegister);
                            ad.RegisterReferent(mail);
                            mail.AddOccurenceOfRefTok(mt);
                        }
                        for (i = i0; i < lines.Count; i++)
                        {
                            if (lines[i].BeginChar >= t2.BeginChar)
                            {
                                foreach (Pullenti.Ner.Referent r in lines[i].Refs)
                                {
                                    mail.AddRef(r, 0);
                                }
                            }
                        }
                    }
                }
            }
        }
Exemplo n.º 21
0
 // Основная функция выделения объектов
 public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
 {
     Pullenti.Ner.Core.AnalyzerDataWithOntology ad = kit.GetAnalyzerData(this) as Pullenti.Ner.Core.AnalyzerDataWithOntology;
     for (int k = 0; k < 2; k++)
     {
         bool     detectNewDenoms = false;
         DateTime dt = DateTime.Now;
         for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
         {
             if (t.IsWhitespaceBefore)
             {
             }
             else if (t.Previous != null && ((t.Previous.IsCharOf(",") || Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(t.Previous, false, false))))
             {
             }
             else
             {
                 continue;
             }
             Pullenti.Ner.ReferentToken rt0 = this.TryAttachSpec(t);
             if (rt0 != null)
             {
                 rt0.Referent = ad.RegisterReferent(rt0.Referent);
                 kit.EmbedToken(rt0);
                 t = rt0;
                 continue;
             }
             if (!t.Chars.IsLetter)
             {
                 continue;
             }
             if (!this.CanBeStartOfDenom(t))
             {
                 continue;
             }
             if (((DateTime.Now - dt)).TotalMinutes > 1)
             {
                 break;
             }
             List <Pullenti.Ner.Core.IntOntologyToken> ot = null;
             ot = ad.LocalOntology.TryAttach(t, null, false);
             if (ot != null && (ot[0].Item.Referent is DenominationReferent))
             {
                 if (this.CheckAttach(ot[0].BeginToken, ot[0].EndToken))
                 {
                     DenominationReferent cl = ot[0].Item.Referent.Clone() as DenominationReferent;
                     cl.Occurrence.Clear();
                     Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(cl, ot[0].BeginToken, ot[0].EndToken);
                     kit.EmbedToken(rt);
                     t = rt;
                     continue;
                 }
             }
             if (k > 0)
             {
                 continue;
             }
             if (t != null && t.Kit.Ontology != null)
             {
                 if ((((ot = t.Kit.Ontology.AttachToken(DenominationReferent.OBJ_TYPENAME, t)))) != null)
                 {
                     if (this.CheckAttach(ot[0].BeginToken, ot[0].EndToken))
                     {
                         DenominationReferent dr = new DenominationReferent();
                         dr.MergeSlots(ot[0].Item.Referent, true);
                         Pullenti.Ner.ReferentToken rt = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(dr), ot[0].BeginToken, ot[0].EndToken);
                         kit.EmbedToken(rt);
                         t = rt;
                         continue;
                     }
                 }
             }
             rt0 = this.TryAttach(t, false);
             if (rt0 != null)
             {
                 rt0.Referent = ad.RegisterReferent(rt0.Referent);
                 kit.EmbedToken(rt0);
                 detectNewDenoms = true;
                 t = rt0;
                 if (ad.LocalOntology.Items.Count > 1000)
                 {
                     break;
                 }
             }
         }
         if (!detectNewDenoms)
         {
             break;
         }
     }
 }
Exemplo n.º 22
0
 /// <summary>
 /// Запустить анализ
 /// </summary>
 /// <param name="kit">контейнер с данными</param>
 public virtual void Process(Pullenti.Ner.Core.AnalysisKit kit)
 {
 }
Exemplo n.º 23
0
 public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
 {
     Pullenti.Ner.Core.AnalyzerData     ad       = kit.GetAnalyzerData(this);
     Pullenti.Ner.Core.TerminCollection addunits = null;
     if (kit.Ontology != null)
     {
         addunits = new Pullenti.Ner.Core.TerminCollection();
         foreach (Pullenti.Ner.ExtOntologyItem r in kit.Ontology.Items)
         {
             UnitReferent uu = r.Referent as UnitReferent;
             if (uu == null)
             {
                 continue;
             }
             if (uu.m_Unit != null)
             {
                 continue;
             }
             foreach (Pullenti.Ner.Slot s in uu.Slots)
             {
                 if (s.TypeName == UnitReferent.ATTR_NAME || s.TypeName == UnitReferent.ATTR_FULLNAME)
                 {
                     addunits.Add(new Pullenti.Ner.Core.Termin(s.Value as string)
                     {
                         Tag = uu
                     });
                 }
             }
         }
     }
     for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
     {
         Pullenti.Ner.Measure.Internal.MeasureToken mt = Pullenti.Ner.Measure.Internal.MeasureToken.TryParseMinimal(t, addunits, false);
         if (mt == null)
         {
             mt = Pullenti.Ner.Measure.Internal.MeasureToken.TryParse(t, addunits, true, false, false, false);
         }
         if (mt == null)
         {
             continue;
         }
         List <Pullenti.Ner.ReferentToken> rts = mt.CreateRefenetsTokensWithRegister(ad, true);
         if (rts == null)
         {
             continue;
         }
         for (int i = 0; i < rts.Count; i++)
         {
             Pullenti.Ner.ReferentToken rt = rts[i];
             t.Kit.EmbedToken(rt);
             t = rt;
             for (int j = i + 1; j < rts.Count; j++)
             {
                 if (rts[j].BeginToken == rt.BeginToken)
                 {
                     rts[j].BeginToken = t;
                 }
                 if (rts[j].EndToken == rt.EndToken)
                 {
                     rts[j].EndToken = t;
                 }
             }
         }
     }
     if (kit.Ontology != null)
     {
         foreach (Pullenti.Ner.Referent e in ad.Referents)
         {
             UnitReferent u = e as UnitReferent;
             if (u == null)
             {
                 continue;
             }
             foreach (Pullenti.Ner.ExtOntologyItem r in kit.Ontology.Items)
             {
                 UnitReferent uu = r.Referent as UnitReferent;
                 if (uu == null)
                 {
                     continue;
                 }
                 bool ok = false;
                 foreach (Pullenti.Ner.Slot s in uu.Slots)
                 {
                     if (s.TypeName == UnitReferent.ATTR_NAME || s.TypeName == UnitReferent.ATTR_FULLNAME)
                     {
                         if (u.FindSlot(null, s.Value, true) != null)
                         {
                             ok = true;
                             break;
                         }
                     }
                 }
                 if (ok)
                 {
                     u.OntologyItems = new List <Pullenti.Ner.ExtOntologyItem>();
                     u.OntologyItems.Add(r);
                     break;
                 }
             }
         }
     }
 }
Exemplo n.º 24
0
        internal AnalysisResult _process(SourceOfAnalysis text, bool ontoRegine, bool noLog, ExtOntology extOntology = null, Pullenti.Morph.MorphLang lang = null)
        {
            m_Breaked = false;
            this.PrepareProgress();
            Stopwatch sw0 = Stopwatch.StartNew();

            this.ManageReferentLinks();
            if (!noLog)
            {
                this.OnProgressHandler(this, new ProgressChangedEventArgs(0, "Морфологический анализ"));
            }
            Pullenti.Ner.Core.AnalysisKit kit = new Pullenti.Ner.Core.AnalysisKit(text, false, lang, OnProgressHandler)
            {
                Ontology = extOntology, Processor = this, OntoRegime = ontoRegine
            };
            AnalysisResult ar = new AnalysisResult();

            sw0.Stop();
            string msg;

            this.OnProgressHandler(this, new ProgressChangedEventArgs(100, string.Format("Морфологический анализ завершён")));
            int k = 0;

            for (Token t = kit.FirstToken; t != null; t = t.Next)
            {
                k++;
            }
            if (!noLog)
            {
                msg = string.Format("Из {0} символов текста выделено {1} термов за {2} ms", text.Text.Length, k, sw0.ElapsedMilliseconds);
                if (!kit.BaseLanguage.IsUndefined)
                {
                    msg += string.Format(", базовый язык {0}", kit.BaseLanguage.ToString());
                }
                this.OnMessage(msg);
                ar.Log.Add(msg);
                if (text.CrlfCorrectedCount > 0)
                {
                    ar.Log.Add(string.Format("{0} переходов на новую строку заменены на пробел", text.CrlfCorrectedCount));
                }
                if (kit.FirstToken == null)
                {
                    ar.Log.Add("Пустой текст");
                }
            }
            sw0.Start();
            if (kit.FirstToken != null)
            {
                this._process2(kit, ar, noLog);
            }
            if (!ontoRegine)
            {
                this._createRes(kit, ar, extOntology, noLog);
            }
            sw0.Stop();
            if (!noLog)
            {
                if (sw0.ElapsedMilliseconds > 5000)
                {
                    float f = (float)text.Text.Length;
                    f  /= sw0.ElapsedMilliseconds;
                    msg = string.Format("Обработка {0} знаков выполнена за {1} ({2} Kb/sec)", text.Text.Length, OutSecs(sw0.ElapsedMilliseconds), f);
                }
                else
                {
                    msg = string.Format("Обработка {0} знаков выполнена за {1}", text.Text.Length, OutSecs(sw0.ElapsedMilliseconds));
                }
                this.OnMessage(msg);
                ar.Log.Add(msg);
            }
            if (TimeoutSeconds > 0)
            {
                if (((DateTime.Now - kit.StartDate)).TotalSeconds > TimeoutSeconds)
                {
                    ar.IsTimeoutBreaked = true;
                }
            }
            ar.Sofa = text;
            if (!ontoRegine)
            {
                ar.Entities.AddRange(kit.Entities);
            }
            ar.FirstToken   = kit.FirstToken;
            ar.Ontology     = extOntology;
            ar.BaseLanguage = kit.BaseLanguage;
            return(ar);
        }
Exemplo n.º 25
0
        public static void RefreshGenerals(Pullenti.Ner.Processor proc, Pullenti.Ner.Core.AnalysisKit kit)
        {
            Dictionary <string, Dictionary <string, List <Pullenti.Ner.Referent> > > all = new Dictionary <string, Dictionary <string, List <Pullenti.Ner.Referent> > >();
            List <Node> allRefs = new List <Node>();

            foreach (Pullenti.Ner.Analyzer a in proc.Analyzers)
            {
                Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(a);
                if (ad == null)
                {
                    continue;
                }
                foreach (Pullenti.Ner.Referent r in ad.Referents)
                {
                    Node nod = new Node()
                    {
                        Ref = r, Ad = ad
                    };
                    allRefs.Add(nod);
                    r.Tag = nod;
                    Dictionary <string, List <Pullenti.Ner.Referent> > si;
                    if (!all.TryGetValue(a.Name, out si))
                    {
                        all.Add(a.Name, (si = new Dictionary <string, List <Pullenti.Ner.Referent> >()));
                    }
                    List <string> strs = r.GetCompareStrings();
                    if (strs == null || strs.Count == 0)
                    {
                        continue;
                    }
                    foreach (string s in strs)
                    {
                        if (s == null)
                        {
                            continue;
                        }
                        List <Pullenti.Ner.Referent> li;
                        if (!si.TryGetValue(s, out li))
                        {
                            si.Add(s, (li = new List <Pullenti.Ner.Referent>()));
                        }
                        li.Add(r);
                    }
                }
            }
            foreach (Node r in allRefs)
            {
                foreach (Pullenti.Ner.Slot s in r.Ref.Slots)
                {
                    if (s.Value is Pullenti.Ner.Referent)
                    {
                        Pullenti.Ner.Referent to = s.Value as Pullenti.Ner.Referent;
                        Node tn = to.Tag as Node;
                        if (tn == null)
                        {
                            continue;
                        }
                        if (tn.RefsFrom == null)
                        {
                            tn.RefsFrom = new List <Node>();
                        }
                        tn.RefsFrom.Add(r);
                        if (r.RefsTo == null)
                        {
                            r.RefsTo = new List <Node>();
                        }
                        r.RefsTo.Add(tn);
                    }
                }
            }
            foreach (Dictionary <string, List <Pullenti.Ner.Referent> > ty in all.Values)
            {
                foreach (List <Pullenti.Ner.Referent> li in ty.Values)
                {
                    if (li.Count < 2)
                    {
                        continue;
                    }
                    if (li.Count > 3000)
                    {
                        continue;
                    }
                    for (int i = 0; i < li.Count; i++)
                    {
                        for (int j = i + 1; j < li.Count; j++)
                        {
                            Node n1 = null;
                            Node n2 = null;
                            if (li[i].CanBeGeneralFor(li[j]) && !li[j].CanBeGeneralFor(li[i]))
                            {
                                n1 = li[i].Tag as Node;
                                n2 = li[j].Tag as Node;
                            }
                            else if (li[j].CanBeGeneralFor(li[i]) && !li[i].CanBeGeneralFor(li[j]))
                            {
                                n1 = li[j].Tag as Node;
                                n2 = li[i].Tag as Node;
                            }
                            if (n1 != null && n2 != null)
                            {
                                if (n1.GenFrom == null)
                                {
                                    n1.GenFrom = new List <Node>();
                                }
                                if (!n1.GenFrom.Contains(n2))
                                {
                                    n1.GenFrom.Add(n2);
                                }
                                if (n2.GenTo == null)
                                {
                                    n2.GenTo = new List <Node>();
                                }
                                if (!n2.GenTo.Contains(n1))
                                {
                                    n2.GenTo.Add(n1);
                                }
                            }
                        }
                    }
                }
            }
            foreach (Node n in allRefs)
            {
                if (n.GenTo != null && n.GenTo.Count > 1)
                {
                    for (int i = n.GenTo.Count - 1; i >= 0; i--)
                    {
                        Node p   = n.GenTo[i];
                        bool del = false;
                        for (int j = 0; j < n.GenTo.Count; j++)
                        {
                            if (j != i && n.GenTo[j].IsInGenParentsOrHigher(p))
                            {
                                del = true;
                            }
                        }
                        if (del)
                        {
                            p.GenFrom.Remove(n);
                            n.GenTo.RemoveAt(i);
                        }
                    }
                }
            }
            foreach (Node n in allRefs)
            {
                if (!n.Deleted && n.GenTo != null && n.GenTo.Count == 1)
                {
                    Node p = n.GenTo[0];
                    if (p.GenFrom.Count == 1)
                    {
                        n.Ref.MergeSlots(p.Ref, true);
                        p.Ref.Tag = n.Ref;
                        p.ReplaceValues(n);
                        foreach (Pullenti.Ner.TextAnnotation o in p.Ref.Occurrence)
                        {
                            n.Ref.AddOccurence(o);
                        }
                        p.Deleted = true;
                    }
                    else
                    {
                        n.Ref.GeneralReferent = p.Ref;
                    }
                }
            }
            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                _correctReferents(t);
            }
            foreach (Node n in allRefs)
            {
                if (n.Deleted)
                {
                    n.Ad.RemoveReferent(n.Ref);
                }
                n.Ref.Tag = null;
            }
        }
Exemplo n.º 26
0
        void _process2(Pullenti.Ner.Core.AnalysisKit kit, AnalysisResult ar, bool noLog)
        {
            string          msg;
            Stopwatch       sw            = Stopwatch.StartNew();
            bool            stopByTimeout = false;
            List <Analyzer> anals         = new List <Analyzer>(m_Analyzers);

            for (int ii = 0; ii < anals.Count; ii++)
            {
                Analyzer c = anals[ii];
                if (c.IgnoreThisAnalyzer)
                {
                    continue;
                }
                if (m_Breaked)
                {
                    if (!noLog)
                    {
                        msg = "Процесс прерван пользователем";
                        this.OnMessage(msg);
                        ar.Log.Add(msg);
                    }
                    break;
                }
                if (TimeoutSeconds > 0 && !stopByTimeout)
                {
                    if (((DateTime.Now - kit.StartDate)).TotalSeconds > TimeoutSeconds)
                    {
                        m_Breaked = true;
                        if (!noLog)
                        {
                            msg = "Процесс прерван по таймауту";
                            this.OnMessage(msg);
                            ar.Log.Add(msg);
                        }
                        stopByTimeout = true;
                    }
                }
                if (stopByTimeout)
                {
                    if (c.Name == "INSTRUMENT")
                    {
                    }
                    else
                    {
                        continue;
                    }
                }
                if (!noLog)
                {
                    this.OnProgressHandler(c, new ProgressChangedEventArgs(0, string.Format("Работа \"{0}\"", c.Caption)));
                }
                try
                {
                    sw.Reset();
                    sw.Start();
                    c.Process(kit);
                    sw.Stop();
                    Pullenti.Ner.Core.AnalyzerData dat = kit.GetAnalyzerData(c);
                    if (!noLog)
                    {
                        msg = string.Format("Анализатор \"{0}\" выделил {1} объект(ов) за {2}", c.Caption, (dat == null ? 0 : dat.Referents.Count), OutSecs(sw.ElapsedMilliseconds));
                        this.OnMessage(msg);
                        ar.Log.Add(msg);
                    }
                }
                catch (Exception ex)
                {
                    if (!noLog)
                    {
                        ex = new Exception(string.Format("Ошибка в анализаторе \"{0}\" ({1})", c.Caption, ex.Message), ex);
                        this.OnMessage(ex);
                        ar.AddException(ex);
                    }
                }
            }
            if (!noLog)
            {
                this.OnProgressHandler(null, new ProgressChangedEventArgs(0, "Пересчёт отношений обобщения"));
            }
            try
            {
                sw.Reset();
                sw.Start();
                Pullenti.Ner.Core.Internal.GeneralRelationHelper.RefreshGenerals(this, kit);
                sw.Stop();
                if (!noLog)
                {
                    msg = string.Format("Отношение обобщение пересчитано за {0}", OutSecs(sw.ElapsedMilliseconds));
                    this.OnMessage(msg);
                    ar.Log.Add(msg);
                }
            }
            catch (Exception ex)
            {
                if (!noLog)
                {
                    ex = new Exception("Ошибка пересчёта отношения обобщения", ex);
                    this.OnMessage(ex);
                    ar.AddException(ex);
                }
            }
        }
Exemplo n.º 27
0
 public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
 {
     PhoneAnalizerData ad = kit.GetAnalyzerData(this) as PhoneAnalizerData;
     for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) 
     {
         List<Pullenti.Ner.Phone.Internal.PhoneItemToken> pli = Pullenti.Ner.Phone.Internal.PhoneItemToken.TryAttachAll(t, 15);
         if (pli == null || pli.Count == 0) 
             continue;
         PhoneReferent prevPhone = null;
         int kkk = 0;
         for (Pullenti.Ner.Token tt = t.Previous; tt != null; tt = tt.Previous) 
         {
             if (tt.GetReferent() is PhoneReferent) 
             {
                 prevPhone = tt.GetReferent() as PhoneReferent;
                 break;
             }
             else if (tt is Pullenti.Ner.ReferentToken) 
             {
             }
             else if (tt.IsChar(')')) 
             {
                 Pullenti.Ner.Token ttt = tt.Previous;
                 int cou = 0;
                 for (; ttt != null; ttt = ttt.Previous) 
                 {
                     if (ttt.IsChar('(')) 
                         break;
                     else if ((++cou) > 100) 
                         break;
                 }
                 if (ttt == null || !ttt.IsChar('(')) 
                     break;
                 tt = ttt;
             }
             else if (!tt.IsCharOf(",;/\\") && !tt.IsAnd) 
             {
                 if ((++kkk) > 5) 
                     break;
                 if (tt.IsNewlineBefore || tt.IsNewlineAfter) 
                     break;
             }
         }
         int j = 0;
         bool isPhoneBefore = false;
         bool isPref = false;
         PhoneKind ki = PhoneKind.Undefined;
         while (j < pli.Count) 
         {
             if (pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix) 
             {
                 if (ki == PhoneKind.Undefined) 
                     ki = pli[j].Kind;
                 isPref = true;
                 isPhoneBefore = true;
                 j++;
                 if ((j < pli.Count) && pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Delim) 
                     j++;
             }
             else if (((j + 1) < pli.Count) && pli[j + 1].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix && j == 0) 
             {
                 if (ki == PhoneKind.Undefined) 
                     ki = pli[0].Kind;
                 isPref = true;
                 pli.RemoveAt(0);
             }
             else 
                 break;
         }
         if (prevPhone != null) 
             isPhoneBefore = true;
         if (pli.Count == 1 && pli[0].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Number) 
         {
             Pullenti.Ner.Token tt = t.Previous;
             if ((tt is Pullenti.Ner.TextToken) && !tt.Chars.IsLetter) 
                 tt = tt.Previous;
             if (tt is Pullenti.Ner.TextToken) 
             {
                 if (Pullenti.Ner.Uri.UriAnalyzer.m_Schemes.TryParse(tt, Pullenti.Ner.Core.TerminParseAttr.No) != null) 
                     continue;
             }
         }
         List<Pullenti.Ner.ReferentToken> rts = this.TryAttach(pli, j, isPhoneBefore, prevPhone);
         if (rts == null) 
         {
             for (j = 1; j < pli.Count; j++) 
             {
                 if (pli[j].ItemType == Pullenti.Ner.Phone.Internal.PhoneItemToken.PhoneItemType.Prefix) 
                 {
                     pli.RemoveRange(0, j);
                     rts = this.TryAttach(pli, 1, true, prevPhone);
                     break;
                 }
             }
         }
         if (rts == null) 
             t = pli[pli.Count - 1].EndToken;
         else 
         {
             if ((ki == PhoneKind.Undefined && prevPhone != null && !isPref) && prevPhone.Kind != PhoneKind.Mobile && kkk == 0) 
                 ki = prevPhone.Kind;
             foreach (Pullenti.Ner.ReferentToken rt in rts) 
             {
                 PhoneReferent ph = rt.Referent as PhoneReferent;
                 if (ki != PhoneKind.Undefined) 
                     ph.Kind = ki;
                 else 
                 {
                     if (rt == rts[0] && (rt.WhitespacesBeforeCount < 3)) 
                     {
                         Pullenti.Ner.Token tt1 = rt.BeginToken.Previous;
                         if (tt1 != null && tt1.IsTableControlChar) 
                             tt1 = tt1.Previous;
                         if ((tt1 is Pullenti.Ner.TextToken) && ((tt1.IsNewlineBefore || ((tt1.Previous != null && tt1.Previous.IsTableControlChar))))) 
                         {
                             string term = (tt1 as Pullenti.Ner.TextToken).Term;
                             if (term == "T" || term == "Т") 
                                 rt.BeginToken = tt1;
                             else if (term == "Ф" || term == "F") 
                             {
                                 ph.Kind = (ki = PhoneKind.Fax);
                                 rt.BeginToken = tt1;
                             }
                             else if (term == "M" || term == "М") 
                             {
                                 ph.Kind = (ki = PhoneKind.Mobile);
                                 rt.BeginToken = tt1;
                             }
                         }
                     }
                     ph.Correct();
                 }
                 rt.Referent = ad.RegisterReferent(rt.Referent);
                 kit.EmbedToken(rt);
                 t = rt;
             }
         }
     }
 }
Exemplo n.º 28
0
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData     ad     = kit.GetAnalyzerData(this);
            Pullenti.Ner.Core.TerminCollection models = new Pullenti.Ner.Core.TerminCollection();
            Dictionary <string, List <Pullenti.Ner.Referent> > objsByModel = new Dictionary <string, List <Pullenti.Ner.Referent> >();

            Pullenti.Ner.Core.TerminCollection objByNames = new Pullenti.Ner.Core.TerminCollection();
            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                List <Pullenti.Ner.Weapon.Internal.WeaponItemToken> its = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParseList(t, 10);
                if (its == null)
                {
                    continue;
                }
                List <Pullenti.Ner.ReferentToken> rts = this.TryAttach(its, false);
                if (rts != null)
                {
                    foreach (Pullenti.Ner.ReferentToken rt in rts)
                    {
                        rt.Referent = ad.RegisterReferent(rt.Referent);
                        kit.EmbedToken(rt);
                        t = rt;
                        foreach (Pullenti.Ner.Slot s in rt.Referent.Slots)
                        {
                            if (s.TypeName == WeaponReferent.ATTR_MODEL)
                            {
                                string mod = s.Value.ToString();
                                for (int k = 0; k < 2; k++)
                                {
                                    if (!char.IsDigit(mod[0]))
                                    {
                                        List <Pullenti.Ner.Referent> li;
                                        if (!objsByModel.TryGetValue(mod, out li))
                                        {
                                            objsByModel.Add(mod, (li = new List <Pullenti.Ner.Referent>()));
                                        }
                                        if (!li.Contains(rt.Referent))
                                        {
                                            li.Add(rt.Referent);
                                        }
                                        models.AddString(mod, li, null, false);
                                    }
                                    if (k > 0)
                                    {
                                        break;
                                    }
                                    string brand = rt.Referent.GetStringValue(WeaponReferent.ATTR_BRAND);
                                    if (brand == null)
                                    {
                                        break;
                                    }
                                    mod = string.Format("{0} {1}", brand, mod);
                                }
                            }
                            else if (s.TypeName == WeaponReferent.ATTR_NAME)
                            {
                                objByNames.Add(new Pullenti.Ner.Core.Termin(s.Value.ToString())
                                {
                                    Tag = rt.Referent
                                });
                            }
                        }
                    }
                }
            }
            if (objsByModel.Count == 0 && objByNames.Termins.Count == 0)
            {
                return;
            }
            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 10);
                if (br != null)
                {
                    Pullenti.Ner.Core.TerminToken toks = objByNames.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No);
                    if (toks != null && toks.EndToken.Next == br.EndToken)
                    {
                        Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(toks.Termin.Tag as Pullenti.Ner.Referent, br.BeginToken, br.EndToken);
                        kit.EmbedToken(rt0);
                        t = rt0;
                        continue;
                    }
                }
                if (!(t is Pullenti.Ner.TextToken))
                {
                    continue;
                }
                if (!t.Chars.IsLetter)
                {
                    continue;
                }
                Pullenti.Ner.Core.TerminToken tok = models.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                if (tok == null)
                {
                    if (!t.Chars.IsAllLower)
                    {
                        tok = objByNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
                    }
                    if (tok == null)
                    {
                        continue;
                    }
                }
                if (!tok.IsWhitespaceAfter)
                {
                    if (tok.EndToken.Next == null || !tok.EndToken.Next.IsCharOf(",.)"))
                    {
                        if (!Pullenti.Ner.Core.BracketHelper.IsBracket(tok.EndToken.Next, false))
                        {
                            continue;
                        }
                    }
                }
                Pullenti.Ner.Referent        tr = null;
                List <Pullenti.Ner.Referent> li = tok.Termin.Tag as List <Pullenti.Ner.Referent>;
                if (li != null && li.Count == 1)
                {
                    tr = li[0];
                }
                else
                {
                    tr = tok.Termin.Tag as Pullenti.Ner.Referent;
                }
                if (tr != null)
                {
                    Pullenti.Ner.Weapon.Internal.WeaponItemToken tit = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParse(tok.BeginToken.Previous, null, false, true);
                    if (tit != null && tit.Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Brand)
                    {
                        tr.AddSlot(WeaponReferent.ATTR_BRAND, tit.Value, false, 0);
                        tok.BeginToken = tit.BeginToken;
                    }
                    Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(tr, tok.BeginToken, tok.EndToken);
                    kit.EmbedToken(rt0);
                    t = rt0;
                    continue;
                }
            }
        }
Exemplo n.º 29
0
        // Основная функция выделения телефонов
        public override void Process(Pullenti.Ner.Core.AnalysisKit kit)
        {
            Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this);
            bool hasDenoms = false;

            foreach (Pullenti.Ner.Analyzer a in kit.Processor.Analyzers)
            {
                if ((a is Pullenti.Ner.Denomination.DenominationAnalyzer) && !a.IgnoreThisAnalyzer)
                {
                    hasDenoms = true;
                }
            }
            if (!hasDenoms)
            {
                Pullenti.Ner.Denomination.DenominationAnalyzer a = new Pullenti.Ner.Denomination.DenominationAnalyzer();
                a.Process(kit);
            }
            List <KeywordReferent> li   = new List <KeywordReferent>();
            StringBuilder          tmp  = new StringBuilder();
            List <string>          tmp2 = new List <string>();
            int max = 0;

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                max++;
            }
            int cur = 0;

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next, cur++)
            {
                Pullenti.Ner.Referent r = t.GetReferent();
                if (r != null)
                {
                    t = this._addReferents(ad, t, cur, max);
                    continue;
                }
                if (!(t is Pullenti.Ner.TextToken))
                {
                    continue;
                }
                if (!t.Chars.IsLetter || (t.LengthChar < 3))
                {
                    continue;
                }
                string term = (t as Pullenti.Ner.TextToken).Term;
                if (term == "ЕСТЬ")
                {
                    if ((t.Previous is Pullenti.Ner.TextToken) && t.Previous.Morph.Class.IsVerb)
                    {
                    }
                    else
                    {
                        continue;
                    }
                }
                Pullenti.Ner.Core.NounPhraseToken npt = null;
                npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.AdjectiveCanBeLast | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null);
                if (npt == null)
                {
                    Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                    if (mc.IsVerb && !mc.IsPreposition)
                    {
                        if ((t as Pullenti.Ner.TextToken).IsVerbBe)
                        {
                            continue;
                        }
                        if (t.IsValue("МОЧЬ", null) || t.IsValue("WOULD", null))
                        {
                            continue;
                        }
                        KeywordReferent kref = new KeywordReferent()
                        {
                            Typ = KeywordType.Predicate
                        };
                        string norm = t.GetNormalCaseText(Pullenti.Morph.MorphClass.Verb, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
                        if (norm == null)
                        {
                            norm = (t as Pullenti.Ner.TextToken).Lemma;
                        }
                        if (norm.EndsWith("ЬСЯ"))
                        {
                            norm = norm.Substring(0, norm.Length - 2);
                        }
                        kref.AddSlot(KeywordReferent.ATTR_VALUE, norm, false, 0);
                        List <Pullenti.Semantic.Utils.DerivateGroup> drv = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, t.Morph.Language);
                        _addNormals(kref, drv, norm);
                        kref = ad.RegisterReferent(kref) as KeywordReferent;
                        _setRank(kref, cur, max);
                        Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(ad.RegisterReferent(kref), t, t)
                        {
                            Morph = t.Morph
                        };
                        kit.EmbedToken(rt1);
                        t = rt1;
                        continue;
                    }
                    continue;
                }
                if (npt.InternalNoun != null)
                {
                    continue;
                }
                if (npt.EndToken.IsValue("ЦЕЛОМ", null) || npt.EndToken.IsValue("ЧАСТНОСТИ", null))
                {
                    if (npt.Preposition != null)
                    {
                        t = npt.EndToken;
                        continue;
                    }
                }
                if (npt.EndToken.IsValue("СТОРОНЫ", null) && npt.Preposition != null && npt.Preposition.Normal == "С")
                {
                    t = npt.EndToken;
                    continue;
                }
                if (npt.BeginToken == npt.EndToken)
                {
                    Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                    if (mc.IsPreposition)
                    {
                        continue;
                    }
                    else if (mc.IsAdverb)
                    {
                        if (t.IsValue("ПОТОМ", null))
                        {
                            continue;
                        }
                    }
                }
                else
                {
                }
                li.Clear();
                Pullenti.Ner.Token t0 = t;
                for (Pullenti.Ner.Token tt = t; tt != null && tt.EndChar <= npt.EndChar; tt = tt.Next)
                {
                    if (!(tt is Pullenti.Ner.TextToken))
                    {
                        continue;
                    }
                    if (tt.IsValue("NATURAL", null))
                    {
                    }
                    if ((tt.LengthChar < 3) || !tt.Chars.IsLetter)
                    {
                        continue;
                    }
                    Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary();
                    if ((mc.IsPreposition || mc.IsPronoun || mc.IsPersonalPronoun) || mc.IsConjunction)
                    {
                        if (tt.IsValue("ОТНОШЕНИЕ", null))
                        {
                        }
                        else
                        {
                            continue;
                        }
                    }
                    if (mc.IsMisc)
                    {
                        if (Pullenti.Ner.Core.MiscHelper.IsEngArticle(tt))
                        {
                            continue;
                        }
                    }
                    KeywordReferent kref = new KeywordReferent()
                    {
                        Typ = KeywordType.Object
                    };
                    string norm = (tt as Pullenti.Ner.TextToken).Lemma;
                    kref.AddSlot(KeywordReferent.ATTR_VALUE, norm, false, 0);
                    if (norm != "ЕСТЬ")
                    {
                        List <Pullenti.Semantic.Utils.DerivateGroup> drv = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, tt.Morph.Language);
                        _addNormals(kref, drv, norm);
                    }
                    kref = ad.RegisterReferent(kref) as KeywordReferent;
                    _setRank(kref, cur, max);
                    Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kref, tt, tt)
                    {
                        Morph = tt.Morph
                    };
                    kit.EmbedToken(rt1);
                    if (tt == t && li.Count == 0)
                    {
                        t0 = rt1;
                    }
                    t = rt1;
                    li.Add(kref);
                }
                if (li.Count > 1)
                {
                    KeywordReferent kref = new KeywordReferent()
                    {
                        Typ = KeywordType.Object
                    };
                    tmp.Length = 0;
                    tmp2.Clear();
                    bool hasNorm = false;
                    foreach (KeywordReferent kw in li)
                    {
                        string s = kw.GetStringValue(KeywordReferent.ATTR_VALUE);
                        if (tmp.Length > 0)
                        {
                            tmp.Append(' ');
                        }
                        tmp.Append(s);
                        string n = kw.GetStringValue(KeywordReferent.ATTR_NORMAL);
                        if (n != null)
                        {
                            hasNorm = true;
                            tmp2.Add(n);
                        }
                        else
                        {
                            tmp2.Add(s);
                        }
                        kref.AddSlot(KeywordReferent.ATTR_REF, kw, false, 0);
                    }
                    string val = npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false);
                    kref.AddSlot(KeywordReferent.ATTR_VALUE, val, false, 0);
                    tmp.Length = 0;
                    tmp2.Sort();
                    foreach (string s in tmp2)
                    {
                        if (tmp.Length > 0)
                        {
                            tmp.Append(' ');
                        }
                        tmp.Append(s);
                    }
                    string norm = tmp.ToString();
                    if (norm != val)
                    {
                        kref.AddSlot(KeywordReferent.ATTR_NORMAL, norm, false, 0);
                    }
                    kref = ad.RegisterReferent(kref) as KeywordReferent;
                    _setRank(kref, cur, max);
                    Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kref, t0, t)
                    {
                        Morph = npt.Morph
                    };
                    kit.EmbedToken(rt1);
                    t = rt1;
                }
            }
            cur = 0;
            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next, cur++)
            {
                KeywordReferent kw = t.GetReferent() as KeywordReferent;
                if (kw == null || kw.Typ != KeywordType.Object)
                {
                    continue;
                }
                if (t.Next == null || kw.ChildWords > 2)
                {
                    continue;
                }
                Pullenti.Ner.Token t1 = t.Next;
                if (t1.IsValue("OF", null) && (t1.WhitespacesAfterCount < 3) && t1.Next != null)
                {
                    t1 = t1.Next;
                    if ((t1 is Pullenti.Ner.TextToken) && Pullenti.Ner.Core.MiscHelper.IsEngArticle(t1) && t1.Next != null)
                    {
                        t1 = t1.Next;
                    }
                }
                else if (!t1.Morph.Case.IsGenitive || t.WhitespacesAfterCount > 1)
                {
                    continue;
                }
                KeywordReferent kw2 = t1.GetReferent() as KeywordReferent;
                if (kw2 == null)
                {
                    continue;
                }
                if (kw == kw2)
                {
                    continue;
                }
                if (kw2.Typ != KeywordType.Object || (kw.ChildWords + kw2.ChildWords) > 3)
                {
                    continue;
                }
                KeywordReferent kwUn = new KeywordReferent();
                kwUn.Union(kw, kw2, Pullenti.Ner.Core.MiscHelper.GetTextValue(t1, t1, Pullenti.Ner.Core.GetTextAttr.No));
                kwUn = ad.RegisterReferent(kwUn) as KeywordReferent;
                _setRank(kwUn, cur, max);
                Pullenti.Ner.ReferentToken rt1 = new Pullenti.Ner.ReferentToken(kwUn, t, t1)
                {
                    Morph = t.Morph
                };
                kit.EmbedToken(rt1);
                t = rt1;
            }
            if (SortKeywordsByRank)
            {
                List <Pullenti.Ner.Referent> all = new List <Pullenti.Ner.Referent>(ad.Referents);
                all.Sort(new CompByRank());
                ad.Referents = all;
            }
            if (AnnotationMaxSentences > 0)
            {
                KeywordReferent ano = Pullenti.Ner.Keyword.Internal.AutoannoSentToken.CreateAnnotation(kit, AnnotationMaxSentences);
                if (ano != null)
                {
                    ad.RegisterReferent(ano);
                }
            }
        }
Exemplo n.º 30
0
 public Token(Pullenti.Ner.Core.AnalysisKit kit, int begin, int end)
 {
     Kit         = kit;
     m_BeginChar = begin;
     m_EndChar   = end;
 }