Beispiel #1
0
        public static List <NamedItemToken> TryParseList(Pullenti.Ner.Token t, Pullenti.Ner.Core.IntOntologyCollection locOnto)
        {
            NamedItemToken ne = TryParse(t, locOnto);

            if (ne == null)
            {
                return(null);
            }
            List <NamedItemToken> res = new List <NamedItemToken>();

            res.Add(ne);
            for (t = ne.EndToken.Next; t != null; t = t.Next)
            {
                if (t.WhitespacesBeforeCount > 2)
                {
                    break;
                }
                ne = TryParse(t, locOnto);
                if (ne == null)
                {
                    break;
                }
                if (t.IsValue("НЕТ", null))
                {
                    break;
                }
                res.Add(ne);
                t = ne.EndToken;
            }
            return(res);
        }
Beispiel #2
0
 void _initHash()
 {
     m_Hash = new Dictionary <string, Pullenti.Ner.Core.IntOntologyCollection>();
     foreach (ExtOntologyItem it in Items)
     {
         if (it.Referent != null)
         {
             it.Referent.OntologyItems = null;
         }
     }
     foreach (ExtOntologyItem it in Items)
     {
         if (it.Referent != null)
         {
             Pullenti.Ner.Core.IntOntologyCollection ont;
             if (!m_Hash.TryGetValue(it.Referent.TypeName, out ont))
             {
                 m_Hash.Add(it.Referent.TypeName, (ont = new Pullenti.Ner.Core.IntOntologyCollection()
                 {
                     IsExtOntology = true
                 }));
             }
             if (it.Referent.OntologyItems == null)
             {
                 it.Referent.OntologyItems = new List <ExtOntologyItem>();
             }
             it.Referent.OntologyItems.Add(it);
             it.Referent.IntOntologyItem = null;
             ont.AddReferent(it.Referent);
         }
     }
 }
Beispiel #3
0
 public static void Initialize()
 {
     if (GlobalOrgs != null)
     {
         return;
     }
     GlobalOrgs = new Pullenti.Ner.Core.IntOntologyCollection();
     Pullenti.Ner.Org.OrganizationReferent org;
     Pullenti.Ner.Core.IntOntologyItem     oi;
     using (Pullenti.Ner.Processor geoProc = Pullenti.Ner.ProcessorService.CreateEmptyProcessor())
     {
         geoProc.AddAnalyzer(new Pullenti.Ner.Geo.GeoAnalyzer());
         Dictionary <string, Pullenti.Ner.Geo.GeoReferent> geos = new Dictionary <string, Pullenti.Ner.Geo.GeoReferent>();
         for (int k = 0; k < 3; k++)
         {
             Pullenti.Morph.MorphLang lang = (k == 0 ? Pullenti.Morph.MorphLang.RU : (k == 1 ? Pullenti.Morph.MorphLang.EN : Pullenti.Morph.MorphLang.UA));
             string name = (k == 0 ? "Orgs_ru.dat" : (k == 1 ? "Orgs_en.dat" : "Orgs_ua.dat"));
             byte[] dat  = ResourceHelper.GetBytes(name);
             if (dat == null)
             {
                 throw new Exception(string.Format("Can't file resource file {0} in Organization analyzer", name));
             }
             using (MemoryStream tmp = new MemoryStream(OrgItemTypeToken.Deflate(dat)))
             {
                 tmp.Position = 0;
                 XmlDocument xml = new XmlDocument();
                 xml.Load(tmp);
                 foreach (XmlNode x in xml.DocumentElement.ChildNodes)
                 {
                     org = new Pullenti.Ner.Org.OrganizationReferent();
                     string abbr = null;
                     foreach (XmlNode xx in x.ChildNodes)
                     {
                         if (xx.LocalName == "typ")
                         {
                             org.AddSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_TYPE, xx.InnerText, false, 0);
                         }
                         else if (xx.LocalName == "nam")
                         {
                             org.AddSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_NAME, xx.InnerText, false, 0);
                         }
                         else if (xx.LocalName == "epo")
                         {
                             org.AddSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_EPONYM, xx.InnerText, false, 0);
                         }
                         else if (xx.LocalName == "prof")
                         {
                             org.AddSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_PROFILE, xx.InnerText, false, 0);
                         }
                         else if (xx.LocalName == "abbr")
                         {
                             abbr = xx.InnerText;
                         }
                         else if (xx.LocalName == "geo")
                         {
                             Pullenti.Ner.Geo.GeoReferent geo;
                             if (!geos.TryGetValue(xx.InnerText, out geo))
                             {
                                 Pullenti.Ner.AnalysisResult ar = geoProc.Process(new Pullenti.Ner.SourceOfAnalysis(xx.InnerText), null, lang);
                                 if (ar != null && ar.Entities.Count == 1 && (ar.Entities[0] is Pullenti.Ner.Geo.GeoReferent))
                                 {
                                     geo = ar.Entities[0] as Pullenti.Ner.Geo.GeoReferent;
                                     geos.Add(xx.InnerText, geo);
                                 }
                                 else
                                 {
                                 }
                             }
                             if (geo != null)
                             {
                                 org.AddSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_GEO, geo, false, 0);
                             }
                         }
                     }
                     oi = org.CreateOntologyItemEx(2, true, true);
                     if (oi == null)
                     {
                         continue;
                     }
                     if (abbr != null)
                     {
                         oi.Termins.Add(new Pullenti.Ner.Core.Termin(abbr, null, true));
                     }
                     if (k == 2)
                     {
                         GlobalOrgsUa.AddItem(oi);
                     }
                     else
                     {
                         GlobalOrgs.AddItem(oi);
                     }
                 }
             }
         }
     }
     return;
 }
Beispiel #4
0
 public static NamedItemToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.IntOntologyCollection locOnto)
 {
     if (t == null)
     {
         return(null);
     }
     if (t is Pullenti.Ner.ReferentToken)
     {
         Pullenti.Ner.Referent r = t.GetReferent();
         if ((r.TypeName == "PERSON" || r.TypeName == "PERSONPROPERTY" || (r is Pullenti.Ner.Geo.GeoReferent)) || r.TypeName == "ORGANIZATION")
         {
             return new NamedItemToken(t, t)
                    {
                        Ref = r, Morph = t.Morph
                    }
         }
         ;
         return(null);
     }
     Pullenti.Ner.Core.TerminToken typ = m_Types.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
     Pullenti.Ner.Core.TerminToken nam = m_Names.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No);
     if (typ != null)
     {
         if (!(t is Pullenti.Ner.TextToken))
         {
             return(null);
         }
         NamedItemToken res = new NamedItemToken(typ.BeginToken, typ.EndToken)
         {
             Morph = typ.Morph, Chars = typ.Chars
         };
         res.Kind      = (Pullenti.Ner.Named.NamedEntityKind)typ.Termin.Tag;
         res.TypeValue = typ.Termin.CanonicText;
         if ((nam != null && nam.EndToken == typ.EndToken && !t.Chars.IsAllLower) && ((Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag) == res.Kind)
         {
             res.NameValue   = nam.Termin.CanonicText;
             res.IsWellknown = true;
         }
         return(res);
     }
     if (nam != null)
     {
         if (nam.BeginToken.Chars.IsAllLower)
         {
             return(null);
         }
         NamedItemToken res = new NamedItemToken(nam.BeginToken, nam.EndToken)
         {
             Morph = nam.Morph, Chars = nam.Chars
         };
         res.Kind      = (Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag;
         res.NameValue = nam.Termin.CanonicText;
         bool ok = true;
         if (!t.IsWhitespaceBefore && t.Previous != null)
         {
             ok = false;
         }
         else if (!t.IsWhitespaceAfter && t.Next != null)
         {
             if (t.Next.IsCharOf(",.;!?") && t.Next.IsWhitespaceAfter)
             {
             }
             else
             {
                 ok = false;
             }
         }
         if (ok)
         {
             res.IsWellknown = true;
             res.TypeValue   = nam.Termin.Tag2 as string;
         }
         return(res);
     }
     Pullenti.Ner.MetaToken adj = Pullenti.Ner.Geo.Internal.MiscLocationHelper.TryAttachNordWest(t);
     if (adj != null)
     {
         if (adj.Morph.Class.IsNoun)
         {
             if (adj.EndToken.IsValue("ВОСТОК", null))
             {
                 if (adj.BeginToken == adj.EndToken)
                 {
                     return(null);
                 }
                 NamedItemToken re = new NamedItemToken(t, adj.EndToken)
                 {
                     Morph = adj.Morph
                 };
                 re.Kind        = Pullenti.Ner.Named.NamedEntityKind.Location;
                 re.NameValue   = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, adj.EndToken, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative);
                 re.IsWellknown = true;
                 return(re);
             }
             return(null);
         }
         if (adj.WhitespacesAfterCount > 2)
         {
             return(null);
         }
         if ((adj.EndToken.Next is Pullenti.Ner.ReferentToken) && (adj.EndToken.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent))
         {
             NamedItemToken re = new NamedItemToken(t, adj.EndToken.Next)
             {
                 Morph = adj.EndToken.Next.Morph
             };
             re.Kind        = Pullenti.Ner.Named.NamedEntityKind.Location;
             re.NameValue   = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, adj.EndToken.Next, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative);
             re.IsWellknown = true;
             re.Ref         = adj.EndToken.Next.GetReferent();
             return(re);
         }
         NamedItemToken res = TryParse(adj.EndToken.Next, locOnto);
         if (res != null && res.Kind == Pullenti.Ner.Named.NamedEntityKind.Location)
         {
             string s = adj.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, res.Morph.Gender, false);
             if (s != null)
             {
                 if (res.NameValue == null)
                 {
                     res.NameValue = s.ToUpper();
                 }
                 else
                 {
                     res.NameValue = string.Format("{0} {1}", s.ToUpper(), res.NameValue);
                     res.TypeValue = null;
                 }
                 res.BeginToken  = t;
                 res.Chars       = t.Chars;
                 res.IsWellknown = true;
                 return(res);
             }
         }
     }
     if (t.Chars.IsCapitalUpper && !Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t))
     {
         Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
         if (npt != null && npt.Adjectives.Count > 0)
         {
             NamedItemToken test = TryParse(npt.Noun.BeginToken, locOnto);
             if (test != null && test.EndToken == npt.EndToken && test.TypeValue != null)
             {
                 test.BeginToken = t;
                 StringBuilder tmp = new StringBuilder();
                 foreach (Pullenti.Ner.MetaToken a in npt.Adjectives)
                 {
                     string s = a.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, test.Morph.Gender, false);
                     if (tmp.Length > 0)
                     {
                         tmp.Append(' ');
                     }
                     tmp.Append(s);
                 }
                 test.NameValue = tmp.ToString();
                 test.Chars     = t.Chars;
                 if (test.Kind == Pullenti.Ner.Named.NamedEntityKind.Location)
                 {
                     test.IsWellknown = true;
                 }
                 return(test);
             }
         }
     }
     if ((Pullenti.Ner.Core.BracketHelper.IsBracket(t, true) && t.Next != null && t.Next.Chars.IsLetter) && !t.Next.Chars.IsAllLower)
     {
         Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100);
         if (br != null)
         {
             NamedItemToken res = new NamedItemToken(t, br.EndToken);
             res.IsInBracket = true;
             res.NameValue   = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, br.EndToken, Pullenti.Ner.Core.GetTextAttr.No);
             nam             = m_Names.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No);
             if (nam != null && nam.EndToken == br.EndToken.Previous)
             {
                 res.Kind        = (Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag;
                 res.IsWellknown = true;
                 res.NameValue   = nam.Termin.CanonicText;
             }
             return(res);
         }
     }
     if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && !t.Chars.IsAllLower) && t.LengthChar > 2)
     {
         NamedItemToken res = new NamedItemToken(t, t)
         {
             Morph = t.Morph
         };
         string str = (t as Pullenti.Ner.TextToken).Term;
         if (str.EndsWith("О") || str.EndsWith("И") || str.EndsWith("Ы"))
         {
             res.NameValue = str;
         }
         else
         {
             res.NameValue = t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false);
         }
         res.Chars = t.Chars;
         if (((!t.IsWhitespaceAfter && t.Next != null && t.Next.IsHiphen) && (t.Next.Next is Pullenti.Ner.TextToken) && !t.Next.Next.IsWhitespaceAfter) && t.Chars.IsCyrillicLetter == t.Next.Next.Chars.IsCyrillicLetter)
         {
             t             = (res.EndToken = t.Next.Next);
             res.NameValue = string.Format("{0}-{1}", res.NameValue, t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false));
         }
         return(res);
     }
     return(null);
 }