public static List <NamedItemToken> TryParseList(Pullenti.Ner.Token t, Pullenti.Ner.Core.IntOntologyCollection locOnto) { NamedItemToken ne = TryParse(t, locOnto); if (ne == null) { return(null); } List <NamedItemToken> res = new List <NamedItemToken>(); res.Add(ne); for (t = ne.EndToken.Next; t != null; t = t.Next) { if (t.WhitespacesBeforeCount > 2) { break; } ne = TryParse(t, locOnto); if (ne == null) { break; } if (t.IsValue("НЕТ", null)) { break; } res.Add(ne); t = ne.EndToken; } return(res); }
void _initHash() { m_Hash = new Dictionary <string, Pullenti.Ner.Core.IntOntologyCollection>(); foreach (ExtOntologyItem it in Items) { if (it.Referent != null) { it.Referent.OntologyItems = null; } } foreach (ExtOntologyItem it in Items) { if (it.Referent != null) { Pullenti.Ner.Core.IntOntologyCollection ont; if (!m_Hash.TryGetValue(it.Referent.TypeName, out ont)) { m_Hash.Add(it.Referent.TypeName, (ont = new Pullenti.Ner.Core.IntOntologyCollection() { IsExtOntology = true })); } if (it.Referent.OntologyItems == null) { it.Referent.OntologyItems = new List <ExtOntologyItem>(); } it.Referent.OntologyItems.Add(it); it.Referent.IntOntologyItem = null; ont.AddReferent(it.Referent); } } }
public static void Initialize() { if (GlobalOrgs != null) { return; } GlobalOrgs = new Pullenti.Ner.Core.IntOntologyCollection(); Pullenti.Ner.Org.OrganizationReferent org; Pullenti.Ner.Core.IntOntologyItem oi; using (Pullenti.Ner.Processor geoProc = Pullenti.Ner.ProcessorService.CreateEmptyProcessor()) { geoProc.AddAnalyzer(new Pullenti.Ner.Geo.GeoAnalyzer()); Dictionary <string, Pullenti.Ner.Geo.GeoReferent> geos = new Dictionary <string, Pullenti.Ner.Geo.GeoReferent>(); for (int k = 0; k < 3; k++) { Pullenti.Morph.MorphLang lang = (k == 0 ? Pullenti.Morph.MorphLang.RU : (k == 1 ? Pullenti.Morph.MorphLang.EN : Pullenti.Morph.MorphLang.UA)); string name = (k == 0 ? "Orgs_ru.dat" : (k == 1 ? "Orgs_en.dat" : "Orgs_ua.dat")); byte[] dat = ResourceHelper.GetBytes(name); if (dat == null) { throw new Exception(string.Format("Can't file resource file {0} in Organization analyzer", name)); } using (MemoryStream tmp = new MemoryStream(OrgItemTypeToken.Deflate(dat))) { tmp.Position = 0; XmlDocument xml = new XmlDocument(); xml.Load(tmp); foreach (XmlNode x in xml.DocumentElement.ChildNodes) { org = new Pullenti.Ner.Org.OrganizationReferent(); string abbr = null; foreach (XmlNode xx in x.ChildNodes) { if (xx.LocalName == "typ") { org.AddSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_TYPE, xx.InnerText, false, 0); } else if (xx.LocalName == "nam") { org.AddSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_NAME, xx.InnerText, false, 0); } else if (xx.LocalName == "epo") { org.AddSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_EPONYM, xx.InnerText, false, 0); } else if (xx.LocalName == "prof") { org.AddSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_PROFILE, xx.InnerText, false, 0); } else if (xx.LocalName == "abbr") { abbr = xx.InnerText; } else if (xx.LocalName == "geo") { Pullenti.Ner.Geo.GeoReferent geo; if (!geos.TryGetValue(xx.InnerText, out geo)) { Pullenti.Ner.AnalysisResult ar = geoProc.Process(new Pullenti.Ner.SourceOfAnalysis(xx.InnerText), null, lang); if (ar != null && ar.Entities.Count == 1 && (ar.Entities[0] is Pullenti.Ner.Geo.GeoReferent)) { geo = ar.Entities[0] as Pullenti.Ner.Geo.GeoReferent; geos.Add(xx.InnerText, geo); } else { } } if (geo != null) { org.AddSlot(Pullenti.Ner.Org.OrganizationReferent.ATTR_GEO, geo, false, 0); } } } oi = org.CreateOntologyItemEx(2, true, true); if (oi == null) { continue; } if (abbr != null) { oi.Termins.Add(new Pullenti.Ner.Core.Termin(abbr, null, true)); } if (k == 2) { GlobalOrgsUa.AddItem(oi); } else { GlobalOrgs.AddItem(oi); } } } } } return; }
public static NamedItemToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.IntOntologyCollection locOnto) { if (t == null) { return(null); } if (t is Pullenti.Ner.ReferentToken) { Pullenti.Ner.Referent r = t.GetReferent(); if ((r.TypeName == "PERSON" || r.TypeName == "PERSONPROPERTY" || (r is Pullenti.Ner.Geo.GeoReferent)) || r.TypeName == "ORGANIZATION") { return new NamedItemToken(t, t) { Ref = r, Morph = t.Morph } } ; return(null); } Pullenti.Ner.Core.TerminToken typ = m_Types.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); Pullenti.Ner.Core.TerminToken nam = m_Names.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (typ != null) { if (!(t is Pullenti.Ner.TextToken)) { return(null); } NamedItemToken res = new NamedItemToken(typ.BeginToken, typ.EndToken) { Morph = typ.Morph, Chars = typ.Chars }; res.Kind = (Pullenti.Ner.Named.NamedEntityKind)typ.Termin.Tag; res.TypeValue = typ.Termin.CanonicText; if ((nam != null && nam.EndToken == typ.EndToken && !t.Chars.IsAllLower) && ((Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag) == res.Kind) { res.NameValue = nam.Termin.CanonicText; res.IsWellknown = true; } return(res); } if (nam != null) { if (nam.BeginToken.Chars.IsAllLower) { return(null); } NamedItemToken res = new NamedItemToken(nam.BeginToken, nam.EndToken) { Morph = nam.Morph, Chars = nam.Chars }; res.Kind = (Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag; res.NameValue = nam.Termin.CanonicText; bool ok = true; if (!t.IsWhitespaceBefore && t.Previous != null) { ok = false; } else if (!t.IsWhitespaceAfter && t.Next != null) { if (t.Next.IsCharOf(",.;!?") && t.Next.IsWhitespaceAfter) { } else { ok = false; } } if (ok) { res.IsWellknown = true; res.TypeValue = nam.Termin.Tag2 as string; } return(res); } Pullenti.Ner.MetaToken adj = Pullenti.Ner.Geo.Internal.MiscLocationHelper.TryAttachNordWest(t); if (adj != null) { if (adj.Morph.Class.IsNoun) { if (adj.EndToken.IsValue("ВОСТОК", null)) { if (adj.BeginToken == adj.EndToken) { return(null); } NamedItemToken re = new NamedItemToken(t, adj.EndToken) { Morph = adj.Morph }; re.Kind = Pullenti.Ner.Named.NamedEntityKind.Location; re.NameValue = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, adj.EndToken, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative); re.IsWellknown = true; return(re); } return(null); } if (adj.WhitespacesAfterCount > 2) { return(null); } if ((adj.EndToken.Next is Pullenti.Ner.ReferentToken) && (adj.EndToken.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { NamedItemToken re = new NamedItemToken(t, adj.EndToken.Next) { Morph = adj.EndToken.Next.Morph }; re.Kind = Pullenti.Ner.Named.NamedEntityKind.Location; re.NameValue = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, adj.EndToken.Next, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative); re.IsWellknown = true; re.Ref = adj.EndToken.Next.GetReferent(); return(re); } NamedItemToken res = TryParse(adj.EndToken.Next, locOnto); if (res != null && res.Kind == Pullenti.Ner.Named.NamedEntityKind.Location) { string s = adj.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, res.Morph.Gender, false); if (s != null) { if (res.NameValue == null) { res.NameValue = s.ToUpper(); } else { res.NameValue = string.Format("{0} {1}", s.ToUpper(), res.NameValue); res.TypeValue = null; } res.BeginToken = t; res.Chars = t.Chars; res.IsWellknown = true; return(res); } } } if (t.Chars.IsCapitalUpper && !Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t)) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.Adjectives.Count > 0) { NamedItemToken test = TryParse(npt.Noun.BeginToken, locOnto); if (test != null && test.EndToken == npt.EndToken && test.TypeValue != null) { test.BeginToken = t; StringBuilder tmp = new StringBuilder(); foreach (Pullenti.Ner.MetaToken a in npt.Adjectives) { string s = a.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, test.Morph.Gender, false); if (tmp.Length > 0) { tmp.Append(' '); } tmp.Append(s); } test.NameValue = tmp.ToString(); test.Chars = t.Chars; if (test.Kind == Pullenti.Ner.Named.NamedEntityKind.Location) { test.IsWellknown = true; } return(test); } } } if ((Pullenti.Ner.Core.BracketHelper.IsBracket(t, true) && t.Next != null && t.Next.Chars.IsLetter) && !t.Next.Chars.IsAllLower) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { NamedItemToken res = new NamedItemToken(t, br.EndToken); res.IsInBracket = true; res.NameValue = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, br.EndToken, Pullenti.Ner.Core.GetTextAttr.No); nam = m_Names.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (nam != null && nam.EndToken == br.EndToken.Previous) { res.Kind = (Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag; res.IsWellknown = true; res.NameValue = nam.Termin.CanonicText; } return(res); } } if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && !t.Chars.IsAllLower) && t.LengthChar > 2) { NamedItemToken res = new NamedItemToken(t, t) { Morph = t.Morph }; string str = (t as Pullenti.Ner.TextToken).Term; if (str.EndsWith("О") || str.EndsWith("И") || str.EndsWith("Ы")) { res.NameValue = str; } else { res.NameValue = t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); } res.Chars = t.Chars; if (((!t.IsWhitespaceAfter && t.Next != null && t.Next.IsHiphen) && (t.Next.Next is Pullenti.Ner.TextToken) && !t.Next.Next.IsWhitespaceAfter) && t.Chars.IsCyrillicLetter == t.Next.Next.Chars.IsCyrillicLetter) { t = (res.EndToken = t.Next.Next); res.NameValue = string.Format("{0}-{1}", res.NameValue, t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)); } return(res); } return(null); }