示例#1
0
 static void _processBlock(Pullenti.Semantic.SemDocument res, Pullenti.Ner.AnalysisResult ar, Pullenti.Ner.Token t0, Pullenti.Ner.Token t1)
 {
     Pullenti.Semantic.SemBlock blk = new Pullenti.Semantic.SemBlock(res);
     for (Pullenti.Ner.Token t = t0; t != null && t.EndChar <= t1.EndChar; t = t.Next)
     {
         Pullenti.Ner.Token te = t;
         for (Pullenti.Ner.Token tt = t.Next; tt != null && tt.EndChar <= t1.EndChar; tt = tt.Next)
         {
             if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt))
             {
                 break;
             }
             else
             {
                 te = tt;
             }
         }
         _processSentence(blk, ar, t, te);
         t = te;
     }
     if (blk.Fragments.Count > 0)
     {
         res.Blocks.Add(blk);
     }
 }
示例#2
0
 internal void InitFrom(Pullenti.Ner.AnalysisResult ar)
 {
     m_Sofa       = ar.Sofa;
     FirstToken   = ar.FirstToken;
     BaseLanguage = ar.BaseLanguage;
     this.CreateStatistics();
 }
示例#3
0
        static void _processSentence(Pullenti.Semantic.SemBlock blk, Pullenti.Ner.AnalysisResult ar, Pullenti.Ner.Token t0, Pullenti.Ner.Token t1)
        {
            int cou = 0;

            for (Pullenti.Ner.Token t = t0; t != null && (t.EndChar < t1.EndChar); t = t.Next, cou++)
            {
            }
            if (cou > 70)
            {
                int cou2 = 0;
                for (Pullenti.Ner.Token t = t0; t != null && (t.EndChar < t1.EndChar); t = t.Next, cou2++)
                {
                    if (cou2 >= 70)
                    {
                        t1 = t;
                        break;
                    }
                }
            }
            List <Sentence> sents = Sentence.ParseVariants(t0, t1, 0, 100, SentItemType.Undefined);

            if (sents == null)
            {
                return;
            }
            double   max  = (double)-1;
            Sentence best = null;
            Sentence alt  = null;

            foreach (Sentence s in sents)
            {
                if ((t1 is Pullenti.Ner.TextToken) && !t1.Chars.IsLetter)
                {
                    s.LastChar = t1 as Pullenti.Ner.TextToken;
                }
                s.CalcCoef(false);
                if (s.Coef > max)
                {
                    max  = s.Coef;
                    best = s;
                    alt  = null;
                }
                else if (s.Coef == max && max > 0)
                {
                    alt = s;
                }
            }
            if (best != null && best.ResBlock != null)
            {
                best.AddToBlock(blk, null);
            }
        }
示例#4
0
 /// <summary>
 /// Сделать семантический анализ поверх результатов морфологического анализа и NER
 /// </summary>
 /// <param name="ar">результат обработки Processor</param>
 /// <param name="pars">дополнительные параметры</param>
 /// <return>результат анализа текста</return>
 public static SemDocument Process(Pullenti.Ner.AnalysisResult ar, SemProcessParams pars = null)
 {
     return(Pullenti.Semantic.Internal.AnalyzeHelper.Process(ar, pars ?? new SemProcessParams()));
 }
示例#5
0
        public static void Main(string[] args)
        {
            Stopwatch sw = Stopwatch.StartNew();

            // инициализация - необходимо проводить один раз до обработки текстов
            Console.Write("Initializing SDK Pullenti ver {0} ({1}) ... ", Pullenti.Sdk.Version, Pullenti.Sdk.VersionDate);
            // инициализируются движок и все имеющиеся анализаторы
            Pullenti.Sdk.InitializeAll();
            sw.Stop();
            Console.WriteLine("OK (by {0} ms), version {1}", (int)sw.ElapsedMilliseconds, Pullenti.Ner.ProcessorService.Version);
            // посмотрим, какие анализаторы доступны
            foreach (Pullenti.Ner.Analyzer a in Pullenti.Ner.ProcessorService.Analyzers)
            {
                Console.WriteLine("   {0} {1} \"{2}\"", (a.IsSpecific ? "Specific analyzer" : "Common analyzer"), a.Name, a.Caption);
            }
            // анализируемый текст
            string txt = "Система разрабатывается с 2011 года российским программистом Михаилом Жуковым, проживающим в Москве на Красной площади в доме номер один на втором этаже. Конкурентов у него много: Abbyy, Yandex, ООО \"Russian Context Optimizer\" (RCO) и другие компании. Он планирует продать SDK за 1.120.000.001,99 (миллиард сто двадцать миллионов один рубль 99 копеек) рублей, без НДС.";

            Console.WriteLine("Text: {0}", txt);
            // запускаем обработку на пустом процессоре (без анализаторов NER)
            Pullenti.Ner.AnalysisResult are = Pullenti.Ner.ProcessorService.EmptyProcessor.Process(new Pullenti.Ner.SourceOfAnalysis(txt), null, null);
            Console.Write("Noun groups: ");
            // перебираем токены
            for (Pullenti.Ner.Token t = are.FirstToken; t != null; t = t.Next)
            {
                // выделяем именную группу с текущего токена
                Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
                // не получилось
                if (npt == null)
                {
                    continue;
                }
                // получилось, выводим в нормализованном виде
                Console.Write("[{0}=>{1}] ", npt.GetSourceText(), npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false));
                // указатель на последний токен именной группы
                t = npt.EndToken;
            }
            using (Pullenti.Ner.Processor proc = Pullenti.Ner.ProcessorService.CreateProcessor())
            {
                // анализируем текст
                Pullenti.Ner.AnalysisResult ar = proc.Process(new Pullenti.Ner.SourceOfAnalysis(txt), null, null);
                // результирующие сущности
                Console.WriteLine("\r\n==========================================\r\nEntities: ");
                foreach (Pullenti.Ner.Referent e in ar.Entities)
                {
                    Console.WriteLine("{0}: {1}", e.TypeName, e.ToString());
                    foreach (Pullenti.Ner.Slot s in e.Slots)
                    {
                        Console.WriteLine("   {0}: {1}", s.TypeName, s.Value);
                    }
                }
                // пример выделения именных групп
                Console.WriteLine("\r\n==========================================\r\nNoun groups: ");
                for (Pullenti.Ner.Token t = ar.FirstToken; t != null; t = t.Next)
                {
                    // токены с сущностями игнорируем
                    if (t.GetReferent() != null)
                    {
                        continue;
                    }
                    // пробуем создать именную группу
                    Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.AdjectiveCanBeLast, 0, null);
                    // не получилось
                    if (npt == null)
                    {
                        continue;
                    }
                    Console.WriteLine(npt);
                    // указатель перемещаем на последний токен группы
                    t = npt.EndToken;
                }
            }
            using (Pullenti.Ner.Processor proc = Pullenti.Ner.ProcessorService.CreateSpecificProcessor(Pullenti.Ner.Keyword.KeywordAnalyzer.ANALYZER_NAME))
            {
                Pullenti.Ner.AnalysisResult ar = proc.Process(new Pullenti.Ner.SourceOfAnalysis(txt), null, null);
                Console.WriteLine("\r\n==========================================\r\nKeywords1: ");
                foreach (Pullenti.Ner.Referent e in ar.Entities)
                {
                    if (e is Pullenti.Ner.Keyword.KeywordReferent)
                    {
                        Console.WriteLine(e);
                    }
                }
                Console.WriteLine("\r\n==========================================\r\nKeywords2: ");
                for (Pullenti.Ner.Token t = ar.FirstToken; t != null; t = t.Next)
                {
                    if (t is Pullenti.Ner.ReferentToken)
                    {
                        Pullenti.Ner.Keyword.KeywordReferent kw = t.GetReferent() as Pullenti.Ner.Keyword.KeywordReferent;
                        if (kw == null)
                        {
                            continue;
                        }
                        string kwstr = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(t as Pullenti.Ner.ReferentToken, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominativeSingle | Pullenti.Ner.Core.GetTextAttr.KeepRegister);
                        Console.WriteLine("{0} = {1}", kwstr, kw);
                    }
                }
            }
            Console.WriteLine("Over!");
        }
示例#6
0
 public static void Initialize()
 {
     if (GlobalOrgs != null)
     {
         return;
     }
     GlobalOrgs = new Pullenti.Ner.Core.IntOntologyCollection();
     Pullenti.Ner.Org.OrganizationReferent org;
     Pullenti.Ner.Core.IntOntologyItem     oi;
     using (Pullenti.Ner.Processor geoProc = Pullenti.Ner.ProcessorService.CreateEmptyProcessor())
     {
         geoProc.AddAnalyzer(new Pullenti.Ner.Geo.GeoAnalyzer());
         Dictionary <string, Pullenti.Ner.Geo.GeoReferent> geos = new Dictionary <string, Pullenti.Ner.Geo.GeoReferent>();
         for (int k = 0; k < 3; k++)
         {
             Pullenti.Morph.MorphLang lang = (k == 0 ? Pullenti.Morph.MorphLang.RU : (k == 1 ? Pullenti.Morph.MorphLang.EN : Pullenti.Morph.MorphLang.UA));
             string name = (k == 0 ? "Orgs_ru.dat" : (k == 1 ? "Orgs_en.dat" : "Orgs_ua.dat"));
             byte[] dat  = ResourceHelper.GetBytes(name);
             if (dat == null)
             {
                 throw new Exception(string.Format("Can't file resource file {0} in Organization analyzer", name));
             }
             using (MemoryStream tmp = new MemoryStream(OrgItemTypeToken.Deflate(dat)))
             {
                 tmp.Position = 0;
                 XmlDocument xml = new XmlDocument();
                 xml.Load(tmp);
                 foreach (XmlNode x in xml.DocumentElement.ChildNodes)
                 {
                     org = new Pullenti.Ner.Org.OrganizationReferent();
                     string abbr = null;
                     foreach (XmlNode xx in x.ChildNodes)
                     {
                         if (xx.LocalName == "typ")
                         {
                             org.AddSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_TYPE, xx.InnerText, false, 0);
                         }
                         else if (xx.LocalName == "nam")
                         {
                             org.AddSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_NAME, xx.InnerText, false, 0);
                         }
                         else if (xx.LocalName == "epo")
                         {
                             org.AddSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_EPONYM, xx.InnerText, false, 0);
                         }
                         else if (xx.LocalName == "prof")
                         {
                             org.AddSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_PROFILE, xx.InnerText, false, 0);
                         }
                         else if (xx.LocalName == "abbr")
                         {
                             abbr = xx.InnerText;
                         }
                         else if (xx.LocalName == "geo")
                         {
                             Pullenti.Ner.Geo.GeoReferent geo;
                             if (!geos.TryGetValue(xx.InnerText, out geo))
                             {
                                 Pullenti.Ner.AnalysisResult ar = geoProc.Process(new Pullenti.Ner.SourceOfAnalysis(xx.InnerText), null, lang);
                                 if (ar != null && ar.Entities.Count == 1 && (ar.Entities[0] is Pullenti.Ner.Geo.GeoReferent))
                                 {
                                     geo = ar.Entities[0] as Pullenti.Ner.Geo.GeoReferent;
                                     geos.Add(xx.InnerText, geo);
                                 }
                                 else
                                 {
                                 }
                             }
                             if (geo != null)
                             {
                                 org.AddSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_GEO, geo, false, 0);
                             }
                         }
                     }
                     oi = org.CreateOntologyItemEx(2, true, true);
                     if (oi == null)
                     {
                         continue;
                     }
                     if (abbr != null)
                     {
                         oi.Termins.Add(new Pullenti.Ner.Core.Termin(abbr, null, true));
                     }
                     if (k == 2)
                     {
                         GlobalOrgsUa.AddItem(oi);
                     }
                     else
                     {
                         GlobalOrgs.AddItem(oi);
                     }
                 }
             }
         }
     }
     return;
 }
示例#7
0
        public static Pullenti.Semantic.SemDocument Process(Pullenti.Ner.AnalysisResult ar, Pullenti.Semantic.SemProcessParams pars)
        {
            Pullenti.Semantic.SemDocument txt = new Pullenti.Semantic.SemDocument();
            for (Pullenti.Ner.Token t = ar.FirstToken; t != null; t = t.Next)
            {
                t.Tag = null;
            }
            if (pars.Progress != null)
            {
                pars.Progress(null, new ProgressChangedEventArgs(0, null)) /* error */;
            }
            int pers0 = 0;

            for (Pullenti.Ner.Token t = ar.FirstToken; t != null; t = t.Next)
            {
                if (pars.Progress != null)
                {
                    int p = t.BeginChar;
                    if (ar.Sofa.Text.Length < 100000)
                    {
                        p = (p * 100) / ar.Sofa.Text.Length;
                    }
                    else
                    {
                        p /= ((ar.Sofa.Text.Length / 100));
                    }
                    if (p != pers0)
                    {
                        pers0 = p;
                        pars.Progress(null, new ProgressChangedEventArgs(p, null)) /* error */;
                    }
                }
                Pullenti.Ner.Token t1 = t;
                for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next)
                {
                    if (tt.IsNewlineBefore)
                    {
                        if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt))
                        {
                            break;
                        }
                    }
                    t1 = tt;
                }
                try
                {
                    _processBlock(txt, ar, t, t1);
                }
                catch (Exception ex)
                {
                }
                t = t1;
                if (pars.MaxChar > 0 && t.EndChar > pars.MaxChar)
                {
                    break;
                }
            }
            OptimizerHelper.Optimize(txt, pars);
            if (pars.Progress != null)
            {
                pars.Progress(null, new ProgressChangedEventArgs(100, null)) /* error */;
            }
            return(txt);
        }