public static void Initialize() { if (GlobalOrgs != null) { return; } GlobalOrgs = new Pullenti.Ner.Core.IntOntologyCollection(); Pullenti.Ner.Org.OrganizationReferent org; Pullenti.Ner.Core.IntOntologyItem oi; using (Pullenti.Ner.Processor geoProc = Pullenti.Ner.ProcessorService.CreateEmptyProcessor()) { geoProc.AddAnalyzer(new Pullenti.Ner.Geo.GeoAnalyzer()); Dictionary <string, Pullenti.Ner.Geo.GeoReferent> geos = new Dictionary <string, Pullenti.Ner.Geo.GeoReferent>(); for (int k = 0; k < 3; k++) { Pullenti.Morph.MorphLang lang = (k == 0 ? Pullenti.Morph.MorphLang.RU : (k == 1 ? Pullenti.Morph.MorphLang.EN : Pullenti.Morph.MorphLang.UA)); string name = (k == 0 ? "Orgs_ru.dat" : (k == 1 ? "Orgs_en.dat" : "Orgs_ua.dat")); byte[] dat = ResourceHelper.GetBytes(name); if (dat == null) { throw new Exception(string.Format("Can't file resource file {0} in Organization analyzer", name)); } using (MemoryStream tmp = new MemoryStream(OrgItemTypeToken.Deflate(dat))) { tmp.Position = 0; XmlDocument xml = new XmlDocument(); xml.Load(tmp); foreach (XmlNode x in xml.DocumentElement.ChildNodes) { org = new Pullenti.Ner.Org.OrganizationReferent(); string abbr = null; foreach (XmlNode xx in x.ChildNodes) { if (xx.LocalName == "typ") { org.AddSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_TYPE, xx.InnerText, false, 0); } else if (xx.LocalName == "nam") { org.AddSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_NAME, xx.InnerText, false, 0); } else if (xx.LocalName == "epo") { org.AddSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_EPONYM, xx.InnerText, false, 0); } else if (xx.LocalName == "prof") { org.AddSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_PROFILE, xx.InnerText, false, 0); } else if (xx.LocalName == "abbr") { abbr = xx.InnerText; } else if (xx.LocalName == "geo") { Pullenti.Ner.Geo.GeoReferent geo; if (!geos.TryGetValue(xx.InnerText, out geo)) { Pullenti.Ner.AnalysisResult ar = geoProc.Process(new Pullenti.Ner.SourceOfAnalysis(xx.InnerText), null, lang); if (ar != null && ar.Entities.Count == 1 && (ar.Entities[0] is Pullenti.Ner.Geo.GeoReferent)) { geo = ar.Entities[0] as Pullenti.Ner.Geo.GeoReferent; geos.Add(xx.InnerText, geo); } else { } } if (geo != null) { org.AddSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_GEO, geo, false, 0); } } } oi = org.CreateOntologyItemEx(2, true, true); if (oi == null) { continue; } if (abbr != null) { oi.Termins.Add(new Pullenti.Ner.Core.Termin(abbr, null, true)); } if (k == 2) { GlobalOrgsUa.AddItem(oi); } else { GlobalOrgs.AddItem(oi); } } } } } return; }
public static void Initialize() { m_StdTails = new Pullenti.Ner.Core.TerminCollection(); m_StdNames = new Pullenti.Ner.Core.TerminCollection(); m_VervotWords = new Pullenti.Ner.Core.TerminCollection(); Pullenti.Ner.Core.Termin t; t = new Pullenti.Ner.Core.Termin("INCORPORATED"); t.AddAbridge("INC."); m_StdTails.Add(t); t = new Pullenti.Ner.Core.Termin("CORPORATION"); t.AddAbridge("CORP."); m_StdTails.Add(t); t = new Pullenti.Ner.Core.Termin("LIMITED"); t.AddAbridge("LTD."); m_StdTails.Add(t); t = new Pullenti.Ner.Core.Termin("AG"); m_StdTails.Add(t); t = new Pullenti.Ner.Core.Termin("GMBH"); m_StdTails.Add(t); foreach (string s in new string[] { "ЗАКАЗЧИК", "ИСПОЛНИТЕЛЬ", "РАЗРАБОТЧИК", "БЕНЕФИЦИАР", "ПОЛУЧАТЕЛЬ", "ОТПРАВИТЕЛЬ", "ИЗГОТОВИТЕЛЬ", "ПРОИЗВОДИТЕЛЬ", "ПОСТАВЩИК", "АБОНЕНТ", "КЛИЕНТ", "ВКЛАДЧИК", "СУБЪЕКТ", "ПРОДАВЕЦ", "ПОКУПАТЕЛЬ", "АРЕНДОДАТЕЛЬ", "АРЕНДАТОР", "СУБАРЕНДАТОР", "НАЙМОДАТЕЛЬ", "НАНИМАТЕЛЬ", "АГЕНТ", "ПРИНЦИПАЛ", "ПРОДАВЕЦ", "ПОСТАВЩИК", "ПОДРЯДЧИК", "СУБПОДРЯДЧИК" }) { m_StdTails.Add(new Pullenti.Ner.Core.Termin(s) { Tag = s }); } foreach (string s in new string[] { "ЗАМОВНИК", "ВИКОНАВЕЦЬ", "РОЗРОБНИК", "БЕНЕФІЦІАР", "ОДЕРЖУВАЧ", "ВІДПРАВНИК", "ВИРОБНИК", "ВИРОБНИК", "ПОСТАЧАЛЬНИК", "АБОНЕНТ", "КЛІЄНТ", "ВКЛАДНИК", "СУБ'ЄКТ", "ПРОДАВЕЦЬ", "ПОКУПЕЦЬ", "ОРЕНДОДАВЕЦЬ", "ОРЕНДАР", "СУБОРЕНДАР", "НАЙМОДАВЕЦЬ", "НАЙМАЧ", "АГЕНТ", "ПРИНЦИПАЛ", "ПРОДАВЕЦЬ", "ПОСТАЧАЛЬНИК", "ПІДРЯДНИК", "СУБПІДРЯДНИК" }) { m_StdTails.Add(new Pullenti.Ner.Core.Termin(s, Pullenti.Morph.MorphLang.UA) { Tag = s }); } t = new Pullenti.Ner.Core.Termin("РАЗРАБОТКА ПРОГРАММНОГО ОБЕСПЕЧЕНИЯ"); t.AddAbridge("РАЗРАБОТКИ ПО"); m_StdNames.Add(t); foreach (string s in new string[] { "СПЕЦИАЛЬНОСТЬ", "ДИАГНОЗ" }) { m_VervotWords.Add(new Pullenti.Ner.Core.Termin(s)); } foreach (string s in new string[] { "СПЕЦІАЛЬНІСТЬ", "ДІАГНОЗ" }) { m_VervotWords.Add(new Pullenti.Ner.Core.Termin(s, Pullenti.Morph.MorphLang.UA)); } m_StdNouns = new Pullenti.Ner.Core.TerminCollection(); for (int k = 0; k < 2; k++) { string name = (k == 0 ? "NameNouns_ru.dat" : "NameNouns_ua.dat"); byte[] dat = ResourceHelper.GetBytes(name); if (dat == null) { throw new Exception(string.Format("Can't file resource file {0} in Organization analyzer", name)); } string str = Encoding.UTF8.GetString(OrgItemTypeToken.Deflate(dat)); foreach (string line0 in str.Split('\n')) { string line = line0.Trim(); if (string.IsNullOrEmpty(line)) { continue; } if (k == 0) { m_StdNouns.Add(new Pullenti.Ner.Core.Termin(line)); } else { m_StdNouns.Add(new Pullenti.Ner.Core.Termin(line) { Lang = Pullenti.Morph.MorphLang.UA }); } } } }