public DelimToken(Pullenti.Ner.Token b, Pullenti.Ner.Token e) : base(b, e, null) { }
public static NamedItemToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.IntOntologyCollection locOnto) { if (t == null) { return(null); } if (t is Pullenti.Ner.ReferentToken) { Pullenti.Ner.Referent r = t.GetReferent(); if ((r.TypeName == "PERSON" || r.TypeName == "PERSONPROPERTY" || (r is Pullenti.Ner.Geo.GeoReferent)) || r.TypeName == "ORGANIZATION") { return new NamedItemToken(t, t) { Ref = r, Morph = t.Morph } } ; return(null); } Pullenti.Ner.Core.TerminToken typ = m_Types.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); Pullenti.Ner.Core.TerminToken nam = m_Names.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (typ != null) { if (!(t is Pullenti.Ner.TextToken)) { return(null); } NamedItemToken res = new NamedItemToken(typ.BeginToken, typ.EndToken) { Morph = typ.Morph, Chars = typ.Chars }; res.Kind = (Pullenti.Ner.Named.NamedEntityKind)typ.Termin.Tag; res.TypeValue = typ.Termin.CanonicText; if ((nam != null && nam.EndToken == typ.EndToken && !t.Chars.IsAllLower) && ((Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag) == res.Kind) { res.NameValue = nam.Termin.CanonicText; res.IsWellknown = true; } return(res); } if (nam != null) { if (nam.BeginToken.Chars.IsAllLower) { return(null); } NamedItemToken res = new NamedItemToken(nam.BeginToken, nam.EndToken) { Morph = nam.Morph, Chars = nam.Chars }; res.Kind = (Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag; res.NameValue = nam.Termin.CanonicText; bool ok = true; if (!t.IsWhitespaceBefore && t.Previous != null) { ok = false; } else if (!t.IsWhitespaceAfter && t.Next != null) { if (t.Next.IsCharOf(",.;!?") && t.Next.IsWhitespaceAfter) { } else { ok = false; } } if (ok) { res.IsWellknown = true; res.TypeValue = nam.Termin.Tag2 as string; } return(res); } Pullenti.Ner.MetaToken adj = Pullenti.Ner.Geo.Internal.MiscLocationHelper.TryAttachNordWest(t); if (adj != null) { if (adj.Morph.Class.IsNoun) { if (adj.EndToken.IsValue("ВОСТОК", null)) { if (adj.BeginToken == adj.EndToken) { return(null); } NamedItemToken re = new NamedItemToken(t, adj.EndToken) { Morph = adj.Morph }; re.Kind = Pullenti.Ner.Named.NamedEntityKind.Location; re.NameValue = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, adj.EndToken, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative); re.IsWellknown = true; return(re); } return(null); } if (adj.WhitespacesAfterCount > 2) { return(null); } if ((adj.EndToken.Next is Pullenti.Ner.ReferentToken) && (adj.EndToken.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { NamedItemToken re = new NamedItemToken(t, adj.EndToken.Next) { Morph = adj.EndToken.Next.Morph }; re.Kind = Pullenti.Ner.Named.NamedEntityKind.Location; re.NameValue = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, adj.EndToken.Next, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative); re.IsWellknown = true; re.Ref = adj.EndToken.Next.GetReferent(); return(re); } NamedItemToken res = TryParse(adj.EndToken.Next, locOnto); if (res != null && res.Kind == Pullenti.Ner.Named.NamedEntityKind.Location) { string s = adj.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, res.Morph.Gender, false); if (s != null) { if (res.NameValue == null) { res.NameValue = s.ToUpper(); } else { res.NameValue = string.Format("{0} {1}", s.ToUpper(), res.NameValue); res.TypeValue = null; } res.BeginToken = t; res.Chars = t.Chars; res.IsWellknown = true; return(res); } } } if (t.Chars.IsCapitalUpper && !Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t)) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.Adjectives.Count > 0) { NamedItemToken test = TryParse(npt.Noun.BeginToken, locOnto); if (test != null && test.EndToken == npt.EndToken && test.TypeValue != null) { test.BeginToken = t; StringBuilder tmp = new StringBuilder(); foreach (Pullenti.Ner.MetaToken a in npt.Adjectives) { string s = a.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, test.Morph.Gender, false); if (tmp.Length > 0) { tmp.Append(' '); } tmp.Append(s); } test.NameValue = tmp.ToString(); test.Chars = t.Chars; if (test.Kind == Pullenti.Ner.Named.NamedEntityKind.Location) { test.IsWellknown = true; } return(test); } } } if ((Pullenti.Ner.Core.BracketHelper.IsBracket(t, true) && t.Next != null && t.Next.Chars.IsLetter) && !t.Next.Chars.IsAllLower) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { NamedItemToken res = new NamedItemToken(t, br.EndToken); res.IsInBracket = true; res.NameValue = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, br.EndToken, Pullenti.Ner.Core.GetTextAttr.No); nam = m_Names.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (nam != null && nam.EndToken == br.EndToken.Previous) { res.Kind = (Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag; res.IsWellknown = true; res.NameValue = nam.Termin.CanonicText; } return(res); } } if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && !t.Chars.IsAllLower) && t.LengthChar > 2) { NamedItemToken res = new NamedItemToken(t, t) { Morph = t.Morph }; string str = (t as Pullenti.Ner.TextToken).Term; if (str.EndsWith("О") || str.EndsWith("И") || str.EndsWith("Ы")) { res.NameValue = str; } else { res.NameValue = t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); } res.Chars = t.Chars; if (((!t.IsWhitespaceAfter && t.Next != null && t.Next.IsHiphen) && (t.Next.Next is Pullenti.Ner.TextToken) && !t.Next.Next.IsWhitespaceAfter) && t.Chars.IsCyrillicLetter == t.Next.Next.Chars.IsCyrillicLetter) { t = (res.EndToken = t.Next.Next); res.NameValue = string.Format("{0}-{1}", res.NameValue, t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)); } return(res); } return(null); }
bool CalcRankAndValue(int minNewlinesCount) { Rank = 0; if (BeginToken.Chars.IsAllLower) { Rank -= 30; } int words = 0; int upWords = 0; int notwords = 0; int lineNumber = 0; Pullenti.Ner.Token tstart = BeginToken; Pullenti.Ner.Token tend = EndToken; for (Pullenti.Ner.Token t = BeginToken; t != EndToken.Next && t != null && t.EndChar <= EndToken.EndChar; t = t.Next) { if (t.IsNewlineBefore) { } TitleItemToken tit = TitleItemToken.TryAttach(t); if (tit != null) { if (tit.Typ == TitleItemToken.Types.Theme || tit.Typ == TitleItemToken.Types.TypAndTheme) { if (t != BeginToken) { if (lineNumber > 0) { return(false); } words = (upWords = (notwords = 0)); tstart = tit.EndToken.Next; } t = tit.EndToken; if (t.Next == null) { return(false); } if (t.Next.Chars.IsLetter && t.Next.Chars.IsAllLower) { Rank += 20; } else { Rank += 100; } tstart = t.Next; if (tit.Typ == TitleItemToken.Types.TypAndTheme) { TypeValue = tit.Value; } continue; } if (tit.Typ == TitleItemToken.Types.Typ) { if (t == BeginToken) { if (tit.EndToken.IsNewlineAfter) { TypeValue = tit.Value; Rank += 5; tstart = tit.EndToken.Next; } } t = tit.EndToken; words++; if (tit.BeginToken != tit.EndToken) { words++; } if (tit.Chars.IsAllUpper) { upWords++; } continue; } if (tit.Typ == TitleItemToken.Types.Dust || tit.Typ == TitleItemToken.Types.Speciality) { if (t == BeginToken) { return(false); } Rank -= 20; if (tit.Typ == TitleItemToken.Types.Speciality) { Speciality = tit.Value; } t = tit.EndToken; continue; } if (tit.Typ == TitleItemToken.Types.Consultant || tit.Typ == TitleItemToken.Types.Boss || tit.Typ == TitleItemToken.Types.Editor) { t = tit.EndToken; if (t.Next != null && ((t.Next.IsCharOf(":") || t.Next.IsHiphen || t.WhitespacesAfterCount > 4))) { Rank -= 10; } else { Rank -= 2; } continue; } return(false); } Pullenti.Ner.Booklink.Internal.BookLinkToken blt = Pullenti.Ner.Booklink.Internal.BookLinkToken.TryParse(t, 0); if (blt != null) { if (blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.Misc || blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.N || blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.Pages) { Rank -= 10; } else if (blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.N || blt.Typ == Pullenti.Ner.Booklink.Internal.BookLinkTyp.PageRange) { Rank -= 20; } } if (t == BeginToken && Pullenti.Ner.Booklink.Internal.BookLinkToken.TryParseAuthor(t, Pullenti.Ner.Person.Internal.FioTemplateType.Undefined) != null) { Rank -= 20; } if (t.IsNewlineBefore && t != BeginToken) { lineNumber++; if (lineNumber > 4) { return(false); } if (t.Chars.IsAllLower) { Rank += 10; } else if (t.Previous.IsChar('.')) { Rank -= 10; } else if (t.Previous.IsCharOf(",-")) { Rank += 10; } else { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Previous, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.EndChar >= t.EndChar) { Rank += 10; } } } if (t != BeginToken && t.NewlinesBeforeCount > minNewlinesCount) { Rank -= (t.NewlinesBeforeCount - minNewlinesCount); } Pullenti.Ner.Core.BracketSequenceToken bst = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (bst != null && bst.IsQuoteType && bst.EndToken.EndChar <= EndToken.EndChar) { if (words == 0) { tstart = bst.BeginToken; Rank += 10; if (bst.EndToken == EndToken) { tend = EndToken; Rank += 10; } } } List <Pullenti.Ner.Referent> rli = t.GetReferents(); if (rli != null) { foreach (Pullenti.Ner.Referent r in rli) { if (r is Pullenti.Ner.Org.OrganizationReferent) { if (t.IsNewlineBefore) { Rank -= 10; } else { Rank -= 4; } continue; } if ((r is Pullenti.Ner.Geo.GeoReferent) || (r is Pullenti.Ner.Person.PersonReferent)) { if (t.IsNewlineBefore) { Rank -= 5; if (t.IsNewlineAfter || t.Next == null) { Rank -= 20; } else if (t.Next.IsHiphen || (t.Next is Pullenti.Ner.NumberToken) || (t.Next.GetReferent() is Pullenti.Ner.Date.DateReferent)) { Rank -= 20; } else if (t != BeginToken) { Rank -= 20; } } continue; } if ((r is Pullenti.Ner.Geo.GeoReferent) || (r is Pullenti.Ner.Denomination.DenominationReferent)) { continue; } if ((r is Pullenti.Ner.Uri.UriReferent) || (r is Pullenti.Ner.Phone.PhoneReferent)) { return(false); } if (t.IsNewlineBefore) { Rank -= 4; } else { Rank -= 2; } if (t == BeginToken && (EndToken.GetReferent() is Pullenti.Ner.Person.PersonReferent)) { Rank -= 10; } } words++; if (t.Chars.IsAllUpper) { upWords++; } if (t == BeginToken) { if (t.IsNewlineAfter) { Rank -= 10; } else if (t.Next != null && t.Next.IsChar('.') && t.Next.IsNewlineAfter) { Rank -= 10; } } continue; } if (t is Pullenti.Ner.NumberToken) { if ((t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words) { words++; if (t.Chars.IsAllUpper) { upWords++; } } else { notwords++; } continue; } Pullenti.Ner.Person.Internal.PersonAttrToken pat = Pullenti.Ner.Person.Internal.PersonAttrToken.TryAttach(t, null, Pullenti.Ner.Person.Internal.PersonAttrToken.PersonAttrAttachAttrs.No); if (pat != null) { if (t.IsNewlineBefore) { if (!pat.Morph.Case.IsUndefined && !pat.Morph.Case.IsNominative) { } else if (pat.Chars.IsAllUpper) { } else { Rank -= 20; } } else if (t.Chars.IsAllLower) { Rank--; } for (; t != null; t = t.Next) { words++; if (t.Chars.IsAllUpper) { upWords++; } if (t == pat.EndToken) { break; } } continue; } Pullenti.Ner.Org.Internal.OrgItemTypeToken oitt = Pullenti.Ner.Org.Internal.OrgItemTypeToken.TryAttach(t, true, null); if (oitt != null) { if (oitt.Morph.Number != Pullenti.Morph.MorphNumber.Plural && !oitt.IsDoubtRootWord) { if (!oitt.Morph.Case.IsUndefined && !oitt.Morph.Case.IsNominative) { words++; if (t.Chars.IsAllUpper) { upWords++; } } else { Rank -= 4; if (t == BeginToken) { Rank -= 5; } } } else { words += 1; if (t.Chars.IsAllUpper) { upWords++; } } t = oitt.EndToken; continue; } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt != null) { if (tt.IsChar('©')) { Rank -= 10; } if (tt.IsChar('_')) { Rank--; } if (tt.Chars.IsLetter) { if (tt.LengthChar > 2) { words++; if (t.Chars.IsAllUpper) { upWords++; } } } else if (!tt.IsChar(',')) { notwords++; } if (tt.IsPureVerb) { { Rank -= 30; words--; } break; } if (tt == EndToken) { if (tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction) { Rank -= 10; } else if (tt.IsChar('.')) { Rank += 5; } } else if (tt.IsCharOf("._")) { Rank -= 5; } } } Rank += words; Rank -= notwords; if ((words < 1) && (Rank < 50)) { return(false); } if (tstart == null || tend == null) { return(false); } if (tstart.EndChar > tend.EndChar) { return(false); } TitleItemToken tit1 = TitleItemToken.TryAttach(EndToken.Next); if (tit1 != null && ((tit1.Typ == TitleItemToken.Types.Typ || tit1.Typ == TitleItemToken.Types.Speciality))) { if (tit1.EndToken.IsNewlineAfter) { Rank += 15; } else { Rank += 10; } if (tit1.Typ == TitleItemToken.Types.Speciality) { Speciality = tit1.Value; } } if (upWords > 4 && upWords > ((int)((0.8 * words)))) { if (tstart.Previous != null && (tstart.Previous.GetReferent() is Pullenti.Ner.Person.PersonReferent)) { Rank += (5 + upWords); } } BeginNameToken = tstart; EndNameToken = tend; return(true); }
void CorrectWordsByMerging(Pullenti.Morph.MorphLang lang) { for (Pullenti.Ner.Token t = FirstToken; t != null && t.Next != null; t = t.Next) { if (!t.Chars.IsLetter || (t.LengthChar < 2)) { continue; } Pullenti.Morph.MorphClass mc0 = t.GetMorphClassInDictionary(); if (t.Morph.ContainsAttr("прдктв.", null)) { continue; } Pullenti.Ner.Token t1 = t.Next; if (t1.IsHiphen && t1.Next != null && !t1.IsNewlineAfter) { t1 = t1.Next; } if (t1.LengthChar == 1) { continue; } if (!t1.Chars.IsLetter || !t.Chars.IsLetter || t1.Chars.IsLatinLetter != t.Chars.IsLatinLetter) { continue; } if (t1.Chars.IsAllUpper && !t.Chars.IsAllUpper) { continue; } else if (!t1.Chars.IsAllLower) { continue; } else if (t.Chars.IsAllUpper) { continue; } if (t1.Morph.ContainsAttr("прдктв.", null)) { continue; } Pullenti.Morph.MorphClass mc1 = t1.GetMorphClassInDictionary(); if (!mc1.IsUndefined && !mc0.IsUndefined) { continue; } if (((t as Pullenti.Ner.TextToken).Term.Length + (t1 as Pullenti.Ner.TextToken).Term.Length) < 6) { continue; } string corw = (t as Pullenti.Ner.TextToken).Term + (t1 as Pullenti.Ner.TextToken).Term; List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null); if (ccc == null || ccc.Count != 1) { continue; } if (corw == "ПОСТ" || corw == "ВРЕД") { continue; } Pullenti.Ner.TextToken tt = new Pullenti.Ner.TextToken(ccc[0], this, t.BeginChar, t1.EndChar); if (tt.GetMorphClassInDictionary().IsUndefined) { continue; } tt.Chars = t.Chars; if (t == FirstToken) { FirstToken = tt; } else { t.Previous.Next = tt; } if (t1.Next != null) { tt.Next = t1.Next; } t = tt; } }
void MergeLetters() { bool beforeWord = false; StringBuilder tmp = new StringBuilder(); for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next) { Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (!tt.Chars.IsLetter || tt.LengthChar != 1) { beforeWord = false; continue; } int i = t.WhitespacesBeforeCount; if (i > 2 || ((i == 2 && beforeWord))) { } else { beforeWord = false; continue; } i = 0; Pullenti.Ner.Token t1; tmp.Length = 0; tmp.Append(tt.GetSourceText()); for (t1 = t; t1.Next != null; t1 = t1.Next) { tt = t1.Next as Pullenti.Ner.TextToken; if (tt.LengthChar != 1 || tt.WhitespacesBeforeCount != 1) { break; } i++; tmp.Append(tt.GetSourceText()); } if (i > 3 || ((i > 1 && beforeWord))) { } else { beforeWord = false; continue; } beforeWord = false; List <Pullenti.Morph.MorphToken> mt = Pullenti.Morph.MorphologyService.Process(tmp.ToString(), null, null); if (mt == null || mt.Count != 1) { t = t1; continue; } foreach (Pullenti.Morph.MorphWordForm wf in mt[0].WordForms) { if (wf.IsInDictionary) { beforeWord = true; break; } } if (!beforeWord) { t = t1; continue; } tt = new Pullenti.Ner.TextToken(mt[0], this, t.BeginChar, t1.EndChar); if (t == FirstToken) { FirstToken = tt; } else { tt.Previous = t.Previous; } tt.Next = t1.Next; t = tt; } }
private Line(Pullenti.Ner.Token begin, Pullenti.Ner.Token end) : base(begin, end, null) { }
void ClearDust() { for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next) { int cou = CalcAbnormalCoef(t); int norm = 0; if (cou < 1) { continue; } Pullenti.Ner.Token t1 = t; for (Pullenti.Ner.Token tt = t; tt != null; tt = tt.Next) { int co = CalcAbnormalCoef(tt); if (co == 0) { continue; } if (co < 0) { norm++; if (norm > 1) { break; } } else { norm = 0; cou += co; t1 = tt; } } int len = t1.EndChar - t.BeginChar; if (cou > 20 && len > 500) { for (int p = t.BeginChar; p < t1.EndChar; p++) { if (Sofa.Text[p] == Sofa.Text[p + 1]) { len--; } } if (len > 500) { if (t.Previous != null) { t.Previous.Next = t1.Next; } else { FirstToken = t1.Next; } t = t1; } else { t = t1; } } else { t = t1; } } }
public static void RefreshGenerals(Pullenti.Ner.Processor proc, Pullenti.Ner.Core.AnalysisKit kit) { Dictionary <string, Dictionary <string, List <Pullenti.Ner.Referent> > > all = new Dictionary <string, Dictionary <string, List <Pullenti.Ner.Referent> > >(); List <Node> allRefs = new List <Node>(); foreach (Pullenti.Ner.Analyzer a in proc.Analyzers) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(a); if (ad == null) { continue; } foreach (Pullenti.Ner.Referent r in ad.Referents) { Node nod = new Node() { Ref = r, Ad = ad }; allRefs.Add(nod); r.Tag = nod; Dictionary <string, List <Pullenti.Ner.Referent> > si; if (!all.TryGetValue(a.Name, out si)) { all.Add(a.Name, (si = new Dictionary <string, List <Pullenti.Ner.Referent> >())); } List <string> strs = r.GetCompareStrings(); if (strs == null || strs.Count == 0) { continue; } foreach (string s in strs) { if (s == null) { continue; } List <Pullenti.Ner.Referent> li; if (!si.TryGetValue(s, out li)) { si.Add(s, (li = new List <Pullenti.Ner.Referent>())); } li.Add(r); } } } foreach (Node r in allRefs) { foreach (Pullenti.Ner.Slot s in r.Ref.Slots) { if (s.Value is Pullenti.Ner.Referent) { Pullenti.Ner.Referent to = s.Value as Pullenti.Ner.Referent; Node tn = to.Tag as Node; if (tn == null) { continue; } if (tn.RefsFrom == null) { tn.RefsFrom = new List <Node>(); } tn.RefsFrom.Add(r); if (r.RefsTo == null) { r.RefsTo = new List <Node>(); } r.RefsTo.Add(tn); } } } foreach (Dictionary <string, List <Pullenti.Ner.Referent> > ty in all.Values) { foreach (List <Pullenti.Ner.Referent> li in ty.Values) { if (li.Count < 2) { continue; } if (li.Count > 3000) { continue; } for (int i = 0; i < li.Count; i++) { for (int j = i + 1; j < li.Count; j++) { Node n1 = null; Node n2 = null; if (li[i].CanBeGeneralFor(li[j]) && !li[j].CanBeGeneralFor(li[i])) { n1 = li[i].Tag as Node; n2 = li[j].Tag as Node; } else if (li[j].CanBeGeneralFor(li[i]) && !li[i].CanBeGeneralFor(li[j])) { n1 = li[j].Tag as Node; n2 = li[i].Tag as Node; } if (n1 != null && n2 != null) { if (n1.GenFrom == null) { n1.GenFrom = new List <Node>(); } if (!n1.GenFrom.Contains(n2)) { n1.GenFrom.Add(n2); } if (n2.GenTo == null) { n2.GenTo = new List <Node>(); } if (!n2.GenTo.Contains(n1)) { n2.GenTo.Add(n1); } } } } } } foreach (Node n in allRefs) { if (n.GenTo != null && n.GenTo.Count > 1) { for (int i = n.GenTo.Count - 1; i >= 0; i--) { Node p = n.GenTo[i]; bool del = false; for (int j = 0; j < n.GenTo.Count; j++) { if (j != i && n.GenTo[j].IsInGenParentsOrHigher(p)) { del = true; } } if (del) { p.GenFrom.Remove(n); n.GenTo.RemoveAt(i); } } } } foreach (Node n in allRefs) { if (!n.Deleted && n.GenTo != null && n.GenTo.Count == 1) { Node p = n.GenTo[0]; if (p.GenFrom.Count == 1) { n.Ref.MergeSlots(p.Ref, true); p.Ref.Tag = n.Ref; p.ReplaceValues(n); foreach (Pullenti.Ner.TextAnnotation o in p.Ref.Occurrence) { n.Ref.AddOccurence(o); } p.Deleted = true; } else { n.Ref.GeneralReferent = p.Ref; } } } for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { _correctReferents(t); } foreach (Node n in allRefs) { if (n.Deleted) { n.Ad.RemoveReferent(n.Ref); } n.Ref.Tag = null; } }
Pullenti.Ner.ReferentToken TryAttach(Pullenti.Ner.Token t, bool keyWord) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token t1 = t; List <string> urisKeys = null; List <Pullenti.Ner.Uri.UriReferent> uris = null; Pullenti.Ner.Referent org = null; Pullenti.Ner.Referent corOrg = null; bool orgIsBank = false; int empty = 0; Pullenti.Ner.Uri.UriReferent lastUri = null; for (; t != null; t = t.Next) { if (t.IsTableControlChar && t != t0) { break; } if (t.IsComma || t.Morph.Class.IsPreposition || t.IsCharOf("/\\")) { continue; } bool bankKeyword = false; if (t.IsValue("ПОЛНЫЙ", null) && t.Next != null && ((t.Next.IsValue("НАИМЕНОВАНИЕ", null) || t.Next.IsValue("НАЗВАНИЕ", null)))) { t = t.Next.Next; if (t == null) { break; } } if (t.IsValue("БАНК", null)) { if ((t is Pullenti.Ner.ReferentToken) && t.GetReferent().TypeName == "ORGANIZATION") { bankKeyword = true; } Pullenti.Ner.Token tt = t.Next; Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { tt = npt.EndToken.Next; } if (tt != null && tt.IsChar(':')) { tt = tt.Next; } if (tt != null) { if (!bankKeyword) { t = tt; bankKeyword = true; } else if (tt.GetReferent() != null && tt.GetReferent().TypeName == "ORGANIZATION") { t = tt; } } } Pullenti.Ner.Referent r = t.GetReferent(); if (r != null && r.TypeName == "ORGANIZATION") { bool isBank = false; int kk = 0; for (Pullenti.Ner.Referent rr = r; rr != null && (kk < 4); rr = rr.ParentReferent, kk++) { isBank = string.Compare(rr.GetStringValue("KIND") ?? "", "Bank", true) == 0; if (isBank) { break; } } if (!isBank && bankKeyword) { isBank = true; } if (!isBank && uris != null && urisKeys.Contains("ИНН")) { return(null); } if ((lastUri != null && lastUri.Scheme == "К/С" && t.Previous != null) && t.Previous.IsValue("В", null)) { corOrg = r; t1 = t; } else if (org == null || ((!orgIsBank && isBank))) { org = r; t1 = t; orgIsBank = isBank; if (isBank) { continue; } } if (uris == null && !keyWord) { return(null); } continue; } if (r is Pullenti.Ner.Uri.UriReferent) { Pullenti.Ner.Uri.UriReferent u = r as Pullenti.Ner.Uri.UriReferent; if (uris == null) { if (!_isBankReq(u.Scheme)) { return(null); } if (u.Scheme == "ИНН" && t.IsNewlineAfter) { return(null); } uris = new List <Pullenti.Ner.Uri.UriReferent>(); urisKeys = new List <string>(); } else { if (!_isBankReq(u.Scheme)) { break; } if (urisKeys.Contains(u.Scheme)) { break; } if (u.Scheme == "ИНН") { if (empty > 0) { break; } } } urisKeys.Add(u.Scheme); uris.Add(u); lastUri = u; t1 = t; empty = 0; continue; } else if (uris == null && !keyWord && !orgIsBank) { return(null); } if (r != null && ((r.TypeName == "GEO" || r.TypeName == "ADDRESS"))) { empty++; continue; } if (t is Pullenti.Ner.TextToken) { if (t.IsValue("ПОЛНЫЙ", null) || t.IsValue("НАИМЕНОВАНИЕ", null) || t.IsValue("НАЗВАНИЕ", null)) { } else if (t.Chars.IsLetter) { Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { t = tok.EndToken; empty = 0; } else { empty++; if (t.IsNewlineBefore) { Pullenti.Ner.Core.NounPhraseToken nnn = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (nnn != null && nnn.EndToken.Next != null && nnn.EndToken.Next.IsChar(':')) { break; } } } if (uris == null) { break; } } } if (empty > 2) { break; } if (empty > 0 && t.IsChar(':') && t.IsNewlineAfter) { break; } if (((t is Pullenti.Ner.NumberToken) && t.IsNewlineBefore && t.Next != null) && !t.Next.Chars.IsLetter) { break; } } if (uris == null) { return(null); } if (!urisKeys.Contains("Р/С") && !urisKeys.Contains("Л/С")) { return(null); } bool ok = false; if ((uris.Count < 2) && org == null) { return(null); } BankDataReferent bdr = new BankDataReferent(); foreach (Pullenti.Ner.Uri.UriReferent u in uris) { bdr.AddSlot(BankDataReferent.ATTR_ITEM, u, false, 0); } if (org != null) { bdr.AddSlot(BankDataReferent.ATTR_BANK, org, false, 0); } if (corOrg != null) { bdr.AddSlot(BankDataReferent.ATTR_CORBANK, corOrg, false, 0); } Pullenti.Ner.Referent org0 = (t0.Previous == null ? null : t0.Previous.GetReferent()); if (org0 != null && org0.TypeName == "ORGANIZATION") { foreach (Pullenti.Ner.Slot s in org0.Slots) { if (s.Value is Pullenti.Ner.Uri.UriReferent) { Pullenti.Ner.Uri.UriReferent u = s.Value as Pullenti.Ner.Uri.UriReferent; if (_isBankReq(u.Scheme)) { if (!urisKeys.Contains(u.Scheme)) { bdr.AddSlot(BankDataReferent.ATTR_ITEM, u, false, 0); } } } } } return(new Pullenti.Ner.ReferentToken(bdr, t0, t1)); }
public ParenthesisToken(Pullenti.Ner.Token b, Pullenti.Ner.Token e) : base(b, e, null) { }
public static ParenthesisToken TryAttach(Pullenti.Ner.Token t) { if (t == null) { return(null); } Pullenti.Ner.Core.TerminToken tok = m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { ParenthesisToken res = new ParenthesisToken(t, tok.EndToken); return(res); } if (!(t is Pullenti.Ner.TextToken)) { return(null); } Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); bool ok = false; Pullenti.Ner.Token t1; if (mc.IsAdverb) { ok = true; } else if (mc.IsAdjective) { if (t.Morph.ContainsAttr("сравн.", null) && t.Morph.ContainsAttr("кач.прил.", null)) { ok = true; } } if (ok && t.Next != null) { if (t.Next.IsChar(',')) { return(new ParenthesisToken(t, t)); } t1 = t.Next; if (t1.GetMorphClassInDictionary() == Pullenti.Morph.MorphClass.Verb) { if (t1.Morph.ContainsAttr("н.вр.", null) && t1.Morph.ContainsAttr("нес.в.", null) && t1.Morph.ContainsAttr("дейст.з.", null)) { return(new ParenthesisToken(t, t1)); } } } t1 = null; if ((t.IsValue("В", null) && t.Next != null && t.Next.IsValue("СООТВЕТСТВИЕ", null)) && t.Next.Next != null && t.Next.Next.Morph.Class.IsPreposition) { t1 = t.Next.Next.Next; } else if (t.IsValue("СОГЛАСНО", null)) { t1 = t.Next; } else if (t.IsValue("В", null) && t.Next != null) { if (t.Next.IsValue("СИЛА", null)) { t1 = t.Next.Next; } else if (t.Next.Morph.Class.IsAdjective || t.Next.Morph.Class.IsPronoun) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { if (npt.Noun.IsValue("ВИД", null) || npt.Noun.IsValue("СЛУЧАЙ", null) || npt.Noun.IsValue("СФЕРА", null)) { return(new ParenthesisToken(t, npt.EndToken)); } } } } if (t1 != null) { if (t1.Next != null) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null) { if (npt1.Noun.IsValue("НОРМА", null) || npt1.Noun.IsValue("ПОЛОЖЕНИЕ", null) || npt1.Noun.IsValue("УКАЗАНИЕ", null)) { t1 = npt1.EndToken.Next; } } } Pullenti.Ner.Referent r = t1.GetReferent(); if (r != null) { ParenthesisToken res = new ParenthesisToken(t, t1) { Ref = r }; if (t1.Next != null && t1.Next.IsComma) { bool sila = false; for (Pullenti.Ner.Token ttt = t1.Next.Next; ttt != null; ttt = ttt.Next) { if (ttt.IsValue("СИЛА", null) || ttt.IsValue("ДЕЙСТВИЕ", null)) { sila = true; continue; } if (ttt.IsComma) { if (sila) { res.EndToken = ttt.Previous; } break; } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(ttt, false, false)) { break; } } } return(res); } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t1, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { return(new ParenthesisToken(t, npt.EndToken)); } } Pullenti.Ner.Token tt = t; if (tt.IsValue("НЕ", null) && t != null) { tt = tt.Next; } if (tt.Morph.Class.IsPreposition && tt != null) { tt = tt.Next; Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null) { tt = npt1.EndToken; if (tt.Next != null && tt.Next.IsComma) { return(new ParenthesisToken(t, tt.Next)); } if (npt1.Noun.IsValue("ОЧЕРЕДЬ", null)) { return(new ParenthesisToken(t, tt)); } } } if (t.IsValue("ВЕДЬ", null)) { return(new ParenthesisToken(t, t)); } return(null); }
static Pullenti.Ner.Core.NumberExToken _correctMoney(Pullenti.Ner.Core.NumberExToken res, Pullenti.Ner.Token t1) { if (t1 == null) { return(null); } List <Pullenti.Ner.Core.TerminToken> toks = m_Postfixes.TryParseAll(t1, Pullenti.Ner.Core.TerminParseAttr.No); if (toks == null || toks.Count == 0) { return(null); } Pullenti.Ner.Token tt = toks[0].EndToken.Next; Pullenti.Ner.Referent r = (tt == null ? null : tt.GetReferent()); string alpha2 = null; if (r != null && r.TypeName == "GEO") { alpha2 = r.GetStringValue("ALPHA2"); } if (alpha2 != null && toks.Count > 0) { for (int i = toks.Count - 1; i >= 0; i--) { if (!toks[i].Termin.CanonicText.StartsWith(alpha2)) { toks.RemoveAt(i); } } if (toks.Count == 0) { toks = m_Postfixes.TryParseAll(t1, Pullenti.Ner.Core.TerminParseAttr.No); } } if (toks.Count > 1) { alpha2 = null; string str = toks[0].Termin.Terms[0].CanonicalText; if (str == "РУБЛЬ" || str == "RUBLE") { alpha2 = "RU"; } else if (str == "ДОЛЛАР" || str == "ДОЛАР" || str == "DOLLAR") { alpha2 = "US"; } else if (str == "ФУНТ" || str == "POUND") { alpha2 = "UK"; } if (alpha2 != null) { for (int i = toks.Count - 1; i >= 0; i--) { if (!toks[i].Termin.CanonicText.StartsWith(alpha2) && toks[i].Termin.CanonicText != "GBP") { toks.RemoveAt(i); } } } alpha2 = null; } if (toks.Count < 1) { return(null); } res.ExTypParam = toks[0].Termin.CanonicText; if (alpha2 != null && tt != null) { res.EndToken = tt; } tt = res.EndToken.Next; if (tt != null && tt.IsCommaAnd) { tt = tt.Next; } if ((tt is Pullenti.Ner.NumberToken) && tt.Next != null && (tt.WhitespacesAfterCount < 4)) { Pullenti.Ner.Token tt1 = tt.Next; if ((tt1 != null && tt1.IsChar('(') && (tt1.Next is Pullenti.Ner.NumberToken)) && tt1.Next.Next != null && tt1.Next.Next.IsChar(')')) { if ((tt as Pullenti.Ner.NumberToken).Value == (tt1.Next as Pullenti.Ner.NumberToken).Value) { tt1 = tt1.Next.Next.Next; } } Pullenti.Ner.Core.TerminToken tok = m_SmallMoney.TryParse(tt1, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null && tt1 != null && tt1.IsChar(')')) { tok = m_SmallMoney.TryParse(tt1.Next, Pullenti.Ner.Core.TerminParseAttr.No); } if (tok != null && (tt as Pullenti.Ner.NumberToken).IntValue != null) { int max = (int)tok.Termin.Tag; int val = (tt as Pullenti.Ner.NumberToken).IntValue.Value; if (val < max) { double f = (double)val; f /= max; double f0 = res.RealValue - ((long)res.RealValue); int re0 = (int)(((f0 * 100) + 0.0001)); if (re0 > 0 && val != re0) { res.AltRestMoney = val; } else if (f0 == 0) { res.RealValue += f; } f0 = res.AltRealValue - ((long)res.AltRealValue); re0 = (int)(((f0 * 100) + 0.0001)); if (re0 > 0 && val != re0) { res.AltRestMoney = val; } else if (f0 == 0) { res.AltRealValue += f; } res.EndToken = tok.EndToken; } } } else if ((tt is Pullenti.Ner.TextToken) && tt.IsValue("НОЛЬ", null)) { Pullenti.Ner.Core.TerminToken tok = m_SmallMoney.TryParse(tt.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { res.EndToken = tok.EndToken; } } return(res); }
public static Pullenti.Ner.Core.NumberExToken TryParseNumberWithPostfix(Pullenti.Ner.Token t) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; string isDollar = null; if (t.LengthChar == 1 && t.Next != null) { if ((((isDollar = Pullenti.Ner.Core.NumberHelper.IsMoneyChar(t)))) != null) { t = t.Next; } } Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken; if (nt == null) { if ((!(t.Previous is Pullenti.Ner.NumberToken) && t.IsChar('(') && (t.Next is Pullenti.Ner.NumberToken)) && t.Next.Next != null && t.Next.Next.IsChar(')')) { Pullenti.Ner.Core.TerminToken toks1 = m_Postfixes.TryParse(t.Next.Next.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (toks1 != null && ((Pullenti.Ner.Core.NumberExType)toks1.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money) { Pullenti.Ner.NumberToken nt0 = t.Next as Pullenti.Ner.NumberToken; Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, toks1.EndToken, nt0.Value, nt0.Typ, Pullenti.Ner.Core.NumberExType.Money) { AltRealValue = nt0.RealValue, Morph = toks1.BeginToken.Morph }; return(_correctMoney(res, toks1.BeginToken)); } } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; if (tt == null || !tt.Morph.Class.IsAdjective) { return(null); } string val = tt.Term; for (int i = 4; i < (val.Length - 5); i++) { string v = val.Substring(0, i); List <Pullenti.Ner.Core.Termin> li = Pullenti.Ner.Core.NumberHelper.m_Nums.FindTerminsByString(v, tt.Morph.Language); if (li == null) { continue; } string vv = val.Substring(i); List <Pullenti.Ner.Core.Termin> lii = m_Postfixes.FindTerminsByString(vv, tt.Morph.Language); if (lii != null && lii.Count > 0) { Pullenti.Ner.Core.NumberExToken re = new Pullenti.Ner.Core.NumberExToken(t, t, ((int)li[0].Tag).ToString(), Pullenti.Ner.NumberSpellingType.Words, (Pullenti.Ner.Core.NumberExType)lii[0].Tag) { Morph = t.Morph }; _correctExtTypes(re); return(re); } break; } return(null); } if (t.Next == null && isDollar == null) { return(null); } double f = nt.RealValue; if (double.IsNaN(f)) { return(null); } Pullenti.Ner.Token t1 = nt.Next; if (((t1 != null && t1.IsCharOf(",."))) || (((t1 is Pullenti.Ner.NumberToken) && (t1.WhitespacesBeforeCount < 3)))) { double d; Pullenti.Ner.NumberToken tt11 = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(nt, false, false); if (tt11 != null) { t1 = tt11.EndToken.Next; f = tt11.RealValue; } } if (t1 == null) { if (isDollar == null) { return(null); } } else if ((t1.Next != null && t1.Next.IsValue("С", "З") && t1.Next.Next != null) && t1.Next.Next.IsValue("ПОЛОВИНА", null)) { f += 0.5; t1 = t1.Next.Next; } if (t1 != null && t1.IsHiphen && t1.Next != null) { t1 = t1.Next; } bool det = false; double altf = f; if (((t1 is Pullenti.Ner.NumberToken) && t1.Previous != null && t1.Previous.IsHiphen) && (t1 as Pullenti.Ner.NumberToken).IntValue == 0 && t1.LengthChar == 2) { t1 = t1.Next; } if ((t1 != null && t1.Next != null && t1.IsChar('(')) && (((t1.Next is Pullenti.Ner.NumberToken) || t1.Next.IsValue("НОЛЬ", null))) && t1.Next.Next != null) { Pullenti.Ner.NumberToken nt1 = t1.Next as Pullenti.Ner.NumberToken; double val = (double)0; if (nt1 != null) { val = nt1.RealValue; } if (Math.Floor(f) == Math.Floor(val)) { Pullenti.Ner.Token ttt = t1.Next.Next; if (ttt.IsChar(')')) { t1 = ttt.Next; det = true; if ((t1 is Pullenti.Ner.NumberToken) && (t1 as Pullenti.Ner.NumberToken).IntValue != null && (t1 as Pullenti.Ner.NumberToken).IntValue.Value == 0) { t1 = t1.Next; } } else if (((((ttt is Pullenti.Ner.NumberToken) && ((ttt as Pullenti.Ner.NumberToken).RealValue < 100) && ttt.Next != null) && ttt.Next.IsChar('/') && ttt.Next.Next != null) && ttt.Next.Next.GetSourceText() == "100" && ttt.Next.Next.Next != null) && ttt.Next.Next.Next.IsChar(')')) { int rest = GetDecimalRest100(f); if ((ttt as Pullenti.Ner.NumberToken).IntValue != null && rest == (ttt as Pullenti.Ner.NumberToken).IntValue.Value) { t1 = ttt.Next.Next.Next.Next; det = true; } } else if ((ttt.IsValue("ЦЕЛЫХ", null) && (ttt.Next is Pullenti.Ner.NumberToken) && ttt.Next.Next != null) && ttt.Next.Next.Next != null && ttt.Next.Next.Next.IsChar(')')) { Pullenti.Ner.NumberToken num2 = ttt.Next as Pullenti.Ner.NumberToken; altf = num2.RealValue; if (ttt.Next.Next.IsValue("ДЕСЯТЫЙ", null)) { altf /= 10; } else if (ttt.Next.Next.IsValue("СОТЫЙ", null)) { altf /= 100; } else if (ttt.Next.Next.IsValue("ТЫСЯЧНЫЙ", null)) { altf /= 1000; } else if (ttt.Next.Next.IsValue("ДЕСЯТИТЫСЯЧНЫЙ", null)) { altf /= 10000; } else if (ttt.Next.Next.IsValue("СТОТЫСЯЧНЫЙ", null)) { altf /= 100000; } else if (ttt.Next.Next.IsValue("МИЛЛИОННЫЙ", null)) { altf /= 1000000; } if (altf < 1) { altf += val; t1 = ttt.Next.Next.Next.Next; det = true; } } else { Pullenti.Ner.Core.TerminToken toks1 = m_Postfixes.TryParse(ttt, Pullenti.Ner.Core.TerminParseAttr.No); if (toks1 != null) { if (((Pullenti.Ner.Core.NumberExType)toks1.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money) { if (toks1.EndToken.Next != null && toks1.EndToken.Next.IsChar(')')) { Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, toks1.EndToken.Next, nt.Value, nt.Typ, Pullenti.Ner.Core.NumberExType.Money) { RealValue = f, AltRealValue = altf, Morph = toks1.BeginToken.Morph }; return(_correctMoney(res, toks1.BeginToken)); } } } Pullenti.Ner.Core.NumberExToken res2 = TryParseNumberWithPostfix(t1.Next); if (res2 != null && res2.EndToken.Next != null && res2.EndToken.Next.IsChar(')')) { res2.BeginToken = t; res2.EndToken = res2.EndToken.Next; res2.AltRealValue = res2.RealValue; res2.RealValue = f; _correctExtTypes(res2); if (res2.WhitespacesAfterCount < 2) { Pullenti.Ner.Core.TerminToken toks2 = m_Postfixes.TryParse(res2.EndToken.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (toks2 != null) { if (((Pullenti.Ner.Core.NumberExType)toks2.Termin.Tag) == Pullenti.Ner.Core.NumberExType.Money) { res2.EndToken = toks2.EndToken; } } } return(res2); } } } else if (nt1 != null && nt1.Typ == Pullenti.Ner.NumberSpellingType.Words && nt.Typ == Pullenti.Ner.NumberSpellingType.Digit) { altf = nt1.RealValue; Pullenti.Ner.Token ttt = t1.Next.Next; if (ttt.IsChar(')')) { t1 = ttt.Next; det = true; } if (!det) { altf = f; } } } if ((t1 != null && t1.IsChar('(') && t1.Next != null) && t1.Next.IsValue("СУММА", null)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t1, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { t1 = br.EndToken.Next; } } if (isDollar != null) { Pullenti.Ner.Token te = null; if (t1 != null) { te = t1.Previous; } else { for (t1 = t0; t1 != null; t1 = t1.Next) { if (t1.Next == null) { te = t1; } } } if (te == null) { return(null); } if (te.IsHiphen && te.Next != null) { if (te.Next.IsValue("МИЛЛИОННЫЙ", null)) { f *= 1000000; altf *= 1000000; te = te.Next; } else if (te.Next.IsValue("МИЛЛИАРДНЫЙ", null)) { f *= 1000000000; altf *= 1000000000; te = te.Next; } } if (!te.IsWhitespaceAfter && (te.Next is Pullenti.Ner.TextToken)) { if (te.Next.IsValue("M", null)) { f *= 1000000; altf *= 1000000; te = te.Next; } else if (te.Next.IsValue("BN", null)) { f *= 1000000000; altf *= 1000000000; te = te.Next; } } return(new Pullenti.Ner.Core.NumberExToken(t0, te, "", nt.Typ, Pullenti.Ner.Core.NumberExType.Money) { RealValue = f, AltRealValue = altf, ExTypParam = isDollar }); } if (t1 == null || ((t1.IsNewlineBefore && !det))) { return(null); } Pullenti.Ner.Core.TerminToken toks = m_Postfixes.TryParse(t1, Pullenti.Ner.Core.TerminParseAttr.No); if ((toks == null && det && (t1 is Pullenti.Ner.NumberToken)) && (t1 as Pullenti.Ner.NumberToken).Value == "0") { toks = m_Postfixes.TryParse(t1.Next, Pullenti.Ner.Core.TerminParseAttr.No); } if (toks == null && t1.IsChar('р')) { int cou = 10; for (Pullenti.Ner.Token ttt = t0.Previous; ttt != null && cou > 0; ttt = ttt.Previous, cou--) { if (ttt.IsValue("СУММА", null) || ttt.IsValue("НАЛИЧНЫЙ", null) || ttt.IsValue("БАЛАНС", null)) { } else if (ttt.GetReferent() != null && ttt.GetReferent().TypeName == "MONEY") { } else { continue; } toks = new Pullenti.Ner.Core.TerminToken(t1, t1) { Termin = m_Postfixes.FindTerminsByCanonicText("RUB")[0] }; if (t1.Next != null && t1.Next.IsChar('.')) { toks.EndToken = t1.Next; } Pullenti.Ner.Core.NumberExType ty = (Pullenti.Ner.Core.NumberExType)toks.Termin.Tag; return(new Pullenti.Ner.Core.NumberExToken(t, toks.EndToken, nt.Value, nt.Typ, ty) { RealValue = f, AltRealValue = altf, Morph = toks.BeginToken.Morph, ExTypParam = "RUB" }); } } if (toks != null) { t1 = toks.EndToken; if (!t1.IsChar('.') && t1.Next != null && t1.Next.IsChar('.')) { if ((t1 is Pullenti.Ner.TextToken) && t1.IsValue(toks.Termin.Terms[0].CanonicalText, null)) { } else if (!t1.Chars.IsLetter) { } else { t1 = t1.Next; } } if (toks.Termin.CanonicText == "LTL") { return(null); } if (toks.BeginToken == t1) { if (t1.Morph.Class.IsPreposition || t1.Morph.Class.IsConjunction) { if (t1.IsWhitespaceBefore && t1.IsWhitespaceAfter) { return(null); } } } Pullenti.Ner.Core.NumberExType ty = (Pullenti.Ner.Core.NumberExType)toks.Termin.Tag; Pullenti.Ner.Core.NumberExToken res = new Pullenti.Ner.Core.NumberExToken(t, t1, nt.Value, nt.Typ, ty) { RealValue = f, AltRealValue = altf, Morph = toks.BeginToken.Morph }; if (ty != Pullenti.Ner.Core.NumberExType.Money) { _correctExtTypes(res); return(res); } return(_correctMoney(res, toks.BeginToken)); } Pullenti.Ner.Core.NumberExToken pfx = _attachSpecPostfix(t1); if (pfx != null) { pfx.BeginToken = t; pfx.Value = nt.Value; pfx.Typ = nt.Typ; pfx.RealValue = f; pfx.AltRealValue = altf; return(pfx); } if (t1.Next != null && ((t1.Morph.Class.IsPreposition || t1.Morph.Class.IsConjunction))) { if (t1.IsValue("НА", null)) { } else { Pullenti.Ner.Core.NumberExToken nn = TryParseNumberWithPostfix(t1.Next); if (nn != null) { return new Pullenti.Ner.Core.NumberExToken(t, t, nt.Value, nt.Typ, nn.ExTyp) { RealValue = f, AltRealValue = altf, ExTyp2 = nn.ExTyp2, ExTypParam = nn.ExTypParam } } ; } } if (!t1.IsWhitespaceAfter && (t1.Next is Pullenti.Ner.NumberToken) && (t1 is Pullenti.Ner.TextToken)) { string term = (t1 as Pullenti.Ner.TextToken).Term; Pullenti.Ner.Core.NumberExType ty = Pullenti.Ner.Core.NumberExType.Undefined; if (term == "СМХ" || term == "CMX") { ty = Pullenti.Ner.Core.NumberExType.Santimeter; } else if (term == "MX" || term == "МХ") { ty = Pullenti.Ner.Core.NumberExType.Meter; } else if (term == "MMX" || term == "ММХ") { ty = Pullenti.Ner.Core.NumberExType.Millimeter; } if (ty != Pullenti.Ner.Core.NumberExType.Undefined) { return new Pullenti.Ner.Core.NumberExToken(t, t1, nt.Value, nt.Typ, ty) { RealValue = f, AltRealValue = altf, MultAfter = true } } ; } return(null); }
static VerbPhraseToken TryParseRu(Pullenti.Ner.Token t, bool canBePartition, bool canBeAdjPartition, bool forceParse) { VerbPhraseToken res = null; Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token not = null; bool hasVerb = false; bool verbBeBefore = false; PrepositionToken prep = null; for (; t != null; t = t.Next) { if (!(t is Pullenti.Ner.TextToken)) { break; } Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken; bool isParticiple = false; if (tt.Term == "НЕ") { not = t; continue; } int ty = 0; string norm = null; Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if (tt.Term == "НЕТ") { if (hasVerb) { break; } ty = 1; } else if (tt.Term == "ДОПУСТИМО") { ty = 3; } else if (mc.IsAdverb && !mc.IsVerb) { ty = 2; } else if (tt.IsPureVerb || tt.IsVerbBe) { ty = 1; if (hasVerb) { if (!tt.Morph.ContainsAttr("инф.", null)) { if (verbBeBefore) { } else { break; } } } } else if (mc.IsVerb) { if (mc.IsPreposition || mc.IsMisc || mc.IsPronoun) { } else if (mc.IsNoun) { if (tt.Term == "СТАЛИ" || tt.Term == "СТЕКЛО" || tt.Term == "БЫЛИ") { ty = 1; } else if (!tt.Chars.IsAllLower && !MiscHelper.CanBeStartOfSentence(tt)) { ty = 1; } else if (mc.IsAdjective && canBePartition) { ty = 1; } else if (forceParse) { ty = 1; } } else if (mc.IsProper) { if (tt.Chars.IsAllLower) { ty = 1; } } else { ty = 1; } if (mc.IsAdjective) { isParticiple = true; } if (!tt.Morph.Case.IsUndefined) { isParticiple = true; } if (!canBePartition && isParticiple) { break; } if (hasVerb) { if (tt.Morph.ContainsAttr("инф.", null)) { } else if (!isParticiple) { } else { break; } } } else if ((mc.IsAdjective && tt.Morph.ContainsAttr("к.ф.", null) && tt.Term.EndsWith("О")) && NounPhraseHelper.TryParse(tt, NounPhraseParseAttr.No, 0, null) == null) { ty = 2; } else if (mc.IsAdjective && ((canBePartition || canBeAdjPartition))) { if (tt.Morph.ContainsAttr("к.ф.", null) && !canBeAdjPartition) { break; } norm = tt.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Masculine, false); if (norm.EndsWith("ЙШИЙ")) { } else { List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm, true, null); if (grs != null && grs.Count > 0) { bool hVerb = false; bool hPart = false; foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs) { foreach (Pullenti.Semantic.Utils.DerivateWord w in gr.Words) { if (w.Class.IsAdjective && w.Class.IsVerb) { if (w.Spelling == norm) { hPart = true; } } else if (w.Class.IsVerb) { hVerb = true; } } } if (hPart && hVerb) { ty = 3; } else if (canBeAdjPartition) { ty = 3; } if (ty != 3 && !string.IsNullOrEmpty(grs[0].Prefix) && norm.StartsWith(grs[0].Prefix)) { hVerb = false; hPart = false; string norm1 = norm.Substring(grs[0].Prefix.Length); grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(norm1, true, null); if (grs != null && grs.Count > 0) { foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs) { foreach (Pullenti.Semantic.Utils.DerivateWord w in gr.Words) { if (w.Class.IsAdjective && w.Class.IsVerb) { if (w.Spelling == norm1) { hPart = true; } } else if (w.Class.IsVerb) { hVerb = true; } } } } if (hPart && hVerb) { ty = 3; } } } } } if (ty == 0 && t == t0 && canBePartition) { prep = PrepositionHelper.TryParse(t); if (prep != null) { t = prep.EndToken; continue; } } if (ty == 0) { break; } if (res == null) { res = new VerbPhraseToken(t0, t); } res.EndToken = t; VerbPhraseItemToken it = new VerbPhraseItemToken(t, t) { Morph = new Pullenti.Ner.MorphCollection(t.Morph) }; if (not != null) { it.BeginToken = not; it.Not = true; not = null; } it.IsAdverb = ty == 2; if (prep != null && !t.Morph.Case.IsUndefined && res.Items.Count == 0) { if (((prep.NextCase & t.Morph.Case)).IsUndefined) { return(null); } it.Morph.RemoveItems(prep.NextCase); res.Preposition = prep; } if (norm == null) { norm = t.GetNormalCaseText((ty == 3 ? Pullenti.Morph.MorphClass.Adjective : (ty == 2 ? Pullenti.Morph.MorphClass.Adverb : Pullenti.Morph.MorphClass.Verb)), Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Masculine, false); if (ty == 1 && !tt.Morph.Case.IsUndefined) { Pullenti.Morph.MorphWordForm mi = new Pullenti.Morph.MorphWordForm() { Case = Pullenti.Morph.MorphCase.Nominative, Number = Pullenti.Morph.MorphNumber.Singular, Gender = Pullenti.Morph.MorphGender.Masculine }; foreach (Pullenti.Morph.MorphBaseInfo mit in tt.Morph.Items) { if (mit is Pullenti.Morph.MorphWordForm) { mi.Misc = (mit as Pullenti.Morph.MorphWordForm).Misc; break; } } string nnn = Pullenti.Morph.MorphologyService.GetWordform("КК" + (t as Pullenti.Ner.TextToken).Term, mi); if (nnn != null) { norm = nnn.Substring(2); } } } it.Normal = norm; res.Items.Add(it); if (!hasVerb && ((ty == 1 || ty == 3))) { res.Morph = it.Morph; hasVerb = true; } if (ty == 1 || ty == 3) { if (ty == 1 && tt.IsVerbBe) { verbBeBefore = true; } else { verbBeBefore = false; } } } if (!hasVerb) { return(null); } for (int i = res.Items.Count - 1; i > 0; i--) { if (res.Items[i].IsAdverb) { res.Items.RemoveAt(i); res.EndToken = res.Items[i - 1].EndToken; } else { break; } } return(res); }
public static DefinitionWithNumericToken TryParse(Pullenti.Ner.Token t) { if (!Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t)) { return(null); } Pullenti.Ner.Token tt = t; Pullenti.Ner.Core.NounPhraseToken noun = null; Pullenti.Ner.NumberToken num = null; for (; tt != null; tt = tt.Next) { if (tt != t && Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt)) { return(null); } if (!(tt is Pullenti.Ner.NumberToken)) { continue; } if (tt.WhitespacesAfterCount > 2 || tt == t) { continue; } if (tt.Morph.Class.IsAdjective) { continue; } Pullenti.Ner.Core.NounPhraseToken nn = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (nn == null) { continue; } num = tt as Pullenti.Ner.NumberToken; noun = nn; break; } if (num == null || num.IntValue == null) { return(null); } DefinitionWithNumericToken res = new DefinitionWithNumericToken(t, noun.EndToken); res.Number = num.IntValue.Value; res.NumberBeginChar = num.BeginChar; res.NumberEndChar = num.EndChar; res.Noun = noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); res.NounsGenetive = noun.GetMorphVariant(Pullenti.Morph.MorphCase.Genitive, true) ?? res.Noun; res.Text = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, num.Previous, Pullenti.Ner.Core.GetTextAttr.KeepQuotes | Pullenti.Ner.Core.GetTextAttr.KeepRegister); if (num.IsWhitespaceBefore) { res.Text += " "; } res.NumberSubstring = Pullenti.Ner.Core.MiscHelper.GetTextValue(num, noun.EndToken, Pullenti.Ner.Core.GetTextAttr.KeepQuotes | Pullenti.Ner.Core.GetTextAttr.KeepRegister); res.Text += res.NumberSubstring; for (tt = noun.EndToken; tt != null; tt = tt.Next) { if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt)) { break; } res.EndToken = tt; } if (res.EndToken != noun.EndToken) { if (noun.IsWhitespaceAfter) { res.Text += " "; } res.Text += Pullenti.Ner.Core.MiscHelper.GetTextValue(noun.EndToken.Next, res.EndToken, Pullenti.Ner.Core.GetTextAttr.KeepQuotes | Pullenti.Ner.Core.GetTextAttr.KeepRegister); } return(res); }
public override Pullenti.Ner.ReferentToken ProcessReferent(Pullenti.Ner.Token begin, Pullenti.Ner.Token end) { List <Pullenti.Ner.Weapon.Internal.WeaponItemToken> its = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParseList(begin, 10); if (its == null) { return(null); } List <Pullenti.Ner.ReferentToken> rr = this.TryAttach(its, true); if (rr != null && rr.Count > 0) { return(rr[0]); } return(null); }
public static Pullenti.Semantic.SemObject CreateNounGroup(Pullenti.Semantic.SemGraph gr, Pullenti.Ner.Core.NounPhraseToken npt) { Pullenti.Ner.Token noun = npt.Noun.BeginToken; Pullenti.Semantic.SemObject sem = new Pullenti.Semantic.SemObject(gr); sem.Tokens.Add(npt.Noun); sem.Typ = Pullenti.Semantic.SemObjectType.Noun; if (npt.Noun.Morph.Class.IsPersonalPronoun) { sem.Typ = Pullenti.Semantic.SemObjectType.PersonalPronoun; } else if (npt.Noun.Morph.Class.IsPronoun) { sem.Typ = Pullenti.Semantic.SemObjectType.Pronoun; } if (npt.Noun.BeginToken != npt.Noun.EndToken) { sem.Morph.NormalCase = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); sem.Morph.NormalFull = npt.Noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); sem.Morph.Class = Pullenti.Morph.MorphClass.Noun; sem.Morph.Number = npt.Morph.Number; sem.Morph.Gender = npt.Morph.Gender; sem.Morph.Case = npt.Morph.Case; } else if (noun is Pullenti.Ner.TextToken) { foreach (Pullenti.Morph.MorphBaseInfo wf in noun.Morph.Items) { if (wf.CheckAccord(npt.Morph, false, false) && (wf is Pullenti.Morph.MorphWordForm)) { _setMorph(sem, wf as Pullenti.Morph.MorphWordForm); break; } } if (sem.Morph.NormalCase == null) { sem.Morph.NormalCase = noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); sem.Morph.NormalFull = noun.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); } List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(sem.Morph.NormalFull, true, null); if (grs != null && grs.Count > 0) { sem.Concept = grs[0]; } } else if (noun is Pullenti.Ner.ReferentToken) { Pullenti.Ner.Referent r = (noun as Pullenti.Ner.ReferentToken).Referent; if (r == null) { return(null); } sem.Morph.NormalFull = (sem.Morph.NormalCase = r.ToString()); sem.Concept = r; } else if (noun is Pullenti.Ner.NumberToken) { Pullenti.Ner.NumberToken num = noun as Pullenti.Ner.NumberToken; sem.Morph.Gender = noun.Morph.Gender; sem.Morph.Number = noun.Morph.Number; if (num.IntValue != null) { sem.Morph.NormalCase = Pullenti.Ner.Core.NumberHelper.GetNumberAdjective(num.IntValue.Value, noun.Morph.Gender, noun.Morph.Number); sem.Morph.NormalFull = Pullenti.Ner.Core.NumberHelper.GetNumberAdjective(num.IntValue.Value, Pullenti.Morph.MorphGender.Masculine, Pullenti.Morph.MorphNumber.Singular); } else { sem.Morph.NormalFull = (sem.Morph.NormalCase = noun.GetSourceText().ToUpper()); } } noun.Tag = sem; if (npt.Adjectives.Count > 0) { foreach (Pullenti.Ner.MetaToken a in npt.Adjectives) { if (npt.MultiNouns && a != npt.Adjectives[0]) { break; } Pullenti.Semantic.SemObject asem = CreateNptAdj(gr, npt, a); if (asem != null) { gr.AddLink(Pullenti.Semantic.SemLinkType.Detail, sem, asem, "какой", false, null); } } } if (npt.InternalNoun != null) { Pullenti.Semantic.SemObject intsem = CreateNounGroup(gr, npt.InternalNoun); if (intsem != null) { gr.AddLink(Pullenti.Semantic.SemLinkType.Detail, sem, intsem, null, false, null); } } gr.Objects.Add(sem); return(sem); }
List <Pullenti.Ner.ReferentToken> TryAttach(List <Pullenti.Ner.Weapon.Internal.WeaponItemToken> its, bool attach) { WeaponReferent tr = new WeaponReferent(); int i; Pullenti.Ner.Token t1 = null; Pullenti.Ner.Weapon.Internal.WeaponItemToken noun = null; Pullenti.Ner.Weapon.Internal.WeaponItemToken brand = null; Pullenti.Ner.Weapon.Internal.WeaponItemToken model = null; for (i = 0; i < its.Count; i++) { if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Noun) { if (its.Count == 1) { return(null); } if (tr.FindSlot(WeaponReferent.ATTR_TYPE, null, true) != null) { if (tr.FindSlot(WeaponReferent.ATTR_TYPE, its[i].Value, true) == null) { break; } } if (!its[i].IsInternal) { noun = its[i]; } tr.AddSlot(WeaponReferent.ATTR_TYPE, its[i].Value, false, 0); if (its[i].AltValue != null) { tr.AddSlot(WeaponReferent.ATTR_TYPE, its[i].AltValue, false, 0); } t1 = its[i].EndToken; continue; } if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Brand) { if (tr.FindSlot(WeaponReferent.ATTR_BRAND, null, true) != null) { if (tr.FindSlot(WeaponReferent.ATTR_BRAND, its[i].Value, true) == null) { break; } } if (!its[i].IsInternal) { if (noun != null && noun.IsDoubt) { noun.IsDoubt = false; } } brand = its[i]; tr.AddSlot(WeaponReferent.ATTR_BRAND, its[i].Value, false, 0); t1 = its[i].EndToken; continue; } if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Model) { if (tr.FindSlot(WeaponReferent.ATTR_MODEL, null, true) != null) { if (tr.FindSlot(WeaponReferent.ATTR_MODEL, its[i].Value, true) == null) { break; } } model = its[i]; tr.AddSlot(WeaponReferent.ATTR_MODEL, its[i].Value, false, 0); if (its[i].AltValue != null) { tr.AddSlot(WeaponReferent.ATTR_MODEL, its[i].AltValue, false, 0); } t1 = its[i].EndToken; continue; } if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Name) { if (tr.FindSlot(WeaponReferent.ATTR_NAME, null, true) != null) { break; } tr.AddSlot(WeaponReferent.ATTR_NAME, its[i].Value, false, 0); if (its[i].AltValue != null) { tr.AddSlot(WeaponReferent.ATTR_NAME, its[i].AltValue, false, 0); } t1 = its[i].EndToken; continue; } if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Number) { if (tr.FindSlot(WeaponReferent.ATTR_NUMBER, null, true) != null) { break; } tr.AddSlot(WeaponReferent.ATTR_NUMBER, its[i].Value, false, 0); if (its[i].AltValue != null) { tr.AddSlot(WeaponReferent.ATTR_NUMBER, its[i].AltValue, false, 0); } t1 = its[i].EndToken; continue; } if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Caliber) { if (tr.FindSlot(WeaponReferent.ATTR_CALIBER, null, true) != null) { break; } tr.AddSlot(WeaponReferent.ATTR_CALIBER, its[i].Value, false, 0); if (its[i].AltValue != null) { tr.AddSlot(WeaponReferent.ATTR_CALIBER, its[i].AltValue, false, 0); } t1 = its[i].EndToken; continue; } if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Developer) { tr.AddSlot(WeaponReferent.ATTR_REF, its[i].Ref, false, 0); t1 = its[i].EndToken; continue; } if (its[i].Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Date) { if (tr.FindSlot(WeaponReferent.ATTR_DATE, null, true) != null) { break; } tr.AddSlot(WeaponReferent.ATTR_DATE, its[i].Ref, true, 0); t1 = its[i].EndToken; continue; } } bool hasGoodNoun = (noun == null ? false : !noun.IsDoubt); WeaponReferent prev = null; if (noun == null) { for (Pullenti.Ner.Token tt = its[0].BeginToken.Previous; tt != null; tt = tt.Previous) { if ((((prev = tt.GetReferent() as WeaponReferent))) != null) { List <Pullenti.Ner.Slot> addSlots = new List <Pullenti.Ner.Slot>(); foreach (Pullenti.Ner.Slot s in prev.Slots) { if (s.TypeName == WeaponReferent.ATTR_TYPE) { tr.AddSlot(s.TypeName, s.Value, false, 0); } else if (s.TypeName == WeaponReferent.ATTR_BRAND || s.TypeName == WeaponReferent.ATTR_BRAND || s.TypeName == WeaponReferent.ATTR_MODEL) { if (tr.FindSlot(s.TypeName, null, true) == null) { addSlots.Add(s); } } } foreach (Pullenti.Ner.Slot s in addSlots) { tr.AddSlot(s.TypeName, s.Value, false, 0); } hasGoodNoun = true; break; } else if ((tt is Pullenti.Ner.TextToken) && ((!tt.Chars.IsLetter || tt.Morph.Class.IsConjunction))) { } else { break; } } } if (noun == null && model != null) { int cou = 0; for (Pullenti.Ner.Token tt = its[0].BeginToken.Previous; tt != null && (cou < 100); tt = tt.Previous, cou++) { if ((((prev = tt.GetReferent() as WeaponReferent))) != null) { if (prev.FindSlot(WeaponReferent.ATTR_MODEL, model.Value, true) == null) { continue; } List <Pullenti.Ner.Slot> addSlots = new List <Pullenti.Ner.Slot>(); foreach (Pullenti.Ner.Slot s in prev.Slots) { if (s.TypeName == WeaponReferent.ATTR_TYPE) { tr.AddSlot(s.TypeName, s.Value, false, 0); } else if (s.TypeName == WeaponReferent.ATTR_BRAND || s.TypeName == WeaponReferent.ATTR_BRAND) { if (tr.FindSlot(s.TypeName, null, true) == null) { addSlots.Add(s); } } } foreach (Pullenti.Ner.Slot s in addSlots) { tr.AddSlot(s.TypeName, s.Value, false, 0); } hasGoodNoun = true; break; } } } if (hasGoodNoun) { } else if (noun != null) { if (model != null || ((brand != null && !brand.IsDoubt))) { } else { return(null); } } else { if (model == null) { return(null); } int cou = 0; bool ok = false; for (Pullenti.Ner.Token tt = t1.Previous; tt != null && (cou < 20); tt = tt.Previous, cou++) { if ((tt.IsValue("ОРУЖИЕ", null) || tt.IsValue("ВООРУЖЕНИЕ", null) || tt.IsValue("ВЫСТРЕЛ", null)) || tt.IsValue("ВЫСТРЕЛИТЬ", null)) { ok = true; break; } } if (!ok) { return(null); } } List <Pullenti.Ner.ReferentToken> res = new List <Pullenti.Ner.ReferentToken>(); res.Add(new Pullenti.Ner.ReferentToken(tr, its[0].BeginToken, t1)); return(res); }
public static List <Line> Parse(Pullenti.Ner.Token t0, int maxLines, int maxChars, int maxEndChar) { List <Line> res = new List <Line>(); int totalChars = 0; for (Pullenti.Ner.Token t = t0; t != null; t = t.Next) { if (maxEndChar > 0) { if (t.BeginChar > maxEndChar) { break; } } Pullenti.Ner.Token t1; for (t1 = t; t1 != null && t1.Next != null; t1 = t1.Next) { if (t1.IsNewlineAfter) { if (t1.Next == null || Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t1.Next)) { break; } } if (t1 == t && t.IsNewlineBefore && (t.GetReferent() is Pullenti.Ner.Person.PersonReferent)) { if (t1.Next == null) { continue; } if ((t1.Next is Pullenti.Ner.TextToken) && t1.Next.Chars.IsLetter && !t1.Next.Chars.IsAllLower) { break; } } } if (t1 == null) { t1 = t; } TitleItemToken tit = TitleItemToken.TryAttach(t); if (tit != null) { if (tit.Typ == TitleItemToken.Types.Keywords) { break; } } Pullenti.Ner.Core.Internal.BlockTitleToken bl = Pullenti.Ner.Core.Internal.BlockTitleToken.TryAttach(t, false, null); if (bl != null) { if (bl.Typ != Pullenti.Ner.Core.Internal.BlkTyps.Undefined) { break; } } Line l = new Line(t, t1); res.Add(l); totalChars += l.CharsCount; if (res.Count >= maxLines || totalChars >= maxChars) { break; } t = t1; } return(res); }
public override void Process(Pullenti.Ner.Core.AnalysisKit kit) { Pullenti.Ner.Core.AnalyzerData ad = kit.GetAnalyzerData(this); Pullenti.Ner.Core.TerminCollection models = new Pullenti.Ner.Core.TerminCollection(); Dictionary <string, List <Pullenti.Ner.Referent> > objsByModel = new Dictionary <string, List <Pullenti.Ner.Referent> >(); Pullenti.Ner.Core.TerminCollection objByNames = new Pullenti.Ner.Core.TerminCollection(); for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { List <Pullenti.Ner.Weapon.Internal.WeaponItemToken> its = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParseList(t, 10); if (its == null) { continue; } List <Pullenti.Ner.ReferentToken> rts = this.TryAttach(its, false); if (rts != null) { foreach (Pullenti.Ner.ReferentToken rt in rts) { rt.Referent = ad.RegisterReferent(rt.Referent); kit.EmbedToken(rt); t = rt; foreach (Pullenti.Ner.Slot s in rt.Referent.Slots) { if (s.TypeName == WeaponReferent.ATTR_MODEL) { string mod = s.Value.ToString(); for (int k = 0; k < 2; k++) { if (!char.IsDigit(mod[0])) { List <Pullenti.Ner.Referent> li; if (!objsByModel.TryGetValue(mod, out li)) { objsByModel.Add(mod, (li = new List <Pullenti.Ner.Referent>())); } if (!li.Contains(rt.Referent)) { li.Add(rt.Referent); } models.AddString(mod, li, null, false); } if (k > 0) { break; } string brand = rt.Referent.GetStringValue(WeaponReferent.ATTR_BRAND); if (brand == null) { break; } mod = string.Format("{0} {1}", brand, mod); } } else if (s.TypeName == WeaponReferent.ATTR_NAME) { objByNames.Add(new Pullenti.Ner.Core.Termin(s.Value.ToString()) { Tag = rt.Referent }); } } } } } if (objsByModel.Count == 0 && objByNames.Termins.Count == 0) { return; } for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 10); if (br != null) { Pullenti.Ner.Core.TerminToken toks = objByNames.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (toks != null && toks.EndToken.Next == br.EndToken) { Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(toks.Termin.Tag as Pullenti.Ner.Referent, br.BeginToken, br.EndToken); kit.EmbedToken(rt0); t = rt0; continue; } } if (!(t is Pullenti.Ner.TextToken)) { continue; } if (!t.Chars.IsLetter) { continue; } Pullenti.Ner.Core.TerminToken tok = models.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null) { if (!t.Chars.IsAllLower) { tok = objByNames.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); } if (tok == null) { continue; } } if (!tok.IsWhitespaceAfter) { if (tok.EndToken.Next == null || !tok.EndToken.Next.IsCharOf(",.)")) { if (!Pullenti.Ner.Core.BracketHelper.IsBracket(tok.EndToken.Next, false)) { continue; } } } Pullenti.Ner.Referent tr = null; List <Pullenti.Ner.Referent> li = tok.Termin.Tag as List <Pullenti.Ner.Referent>; if (li != null && li.Count == 1) { tr = li[0]; } else { tr = tok.Termin.Tag as Pullenti.Ner.Referent; } if (tr != null) { Pullenti.Ner.Weapon.Internal.WeaponItemToken tit = Pullenti.Ner.Weapon.Internal.WeaponItemToken.TryParse(tok.BeginToken.Previous, null, false, true); if (tit != null && tit.Typ == Pullenti.Ner.Weapon.Internal.WeaponItemToken.Typs.Brand) { tr.AddSlot(WeaponReferent.ATTR_BRAND, tit.Value, false, 0); tok.BeginToken = tit.BeginToken; } Pullenti.Ner.ReferentToken rt0 = new Pullenti.Ner.ReferentToken(tr, tok.BeginToken, tok.EndToken); kit.EmbedToken(rt0); t = rt0; continue; } } }
public AnalysisKit(Pullenti.Ner.SourceOfAnalysis sofa = null, bool onlyTokenizing = false, Pullenti.Morph.MorphLang lang = null, ProgressChangedEventHandler progress = null) { if (sofa == null) { return; } m_Sofa = sofa; StartDate = DateTime.Now; List <Pullenti.Morph.MorphToken> tokens = Pullenti.Morph.MorphologyService.Process(sofa.Text, lang, progress); Pullenti.Ner.Token t0 = null; if (tokens != null) { for (int ii = 0; ii < tokens.Count; ii++) { Pullenti.Morph.MorphToken mt = tokens[ii]; if (mt.BeginChar == 733860) { } Pullenti.Ner.TextToken tt = new Pullenti.Ner.TextToken(mt, this); if (sofa.CorrectionDict != null) { string corw; if (sofa.CorrectionDict.TryGetValue(mt.Term, out corw)) { List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null); if (ccc != null && ccc.Count == 1) { Pullenti.Ner.TextToken tt1 = new Pullenti.Ner.TextToken(ccc[0], this, tt.BeginChar, tt.EndChar) { Term0 = tt.Term }; tt1.Chars = tt.Chars; tt = tt1; if (CorrectedTokens == null) { CorrectedTokens = new Dictionary <Pullenti.Ner.Token, string>(); } CorrectedTokens.Add(tt, tt.GetSourceText()); } } } if (t0 == null) { FirstToken = tt; } else { t0.Next = tt; } t0 = tt; } } if (sofa.ClearDust) { this.ClearDust(); } if (sofa.DoWordsMergingByMorph) { this.CorrectWordsByMerging(lang); } if (sofa.DoWordCorrectionByMorph) { this.CorrectWordsByMorph(lang); } this.MergeLetters(); this.DefineBaseLanguage(); if (sofa.CreateNumberTokens) { for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next) { Pullenti.Ner.NumberToken nt = NumberHelper.TryParseNumber(t); if (nt == null) { continue; } this.EmbedToken(nt); t = nt; } } if (onlyTokenizing) { return; } for (Pullenti.Ner.Token t = FirstToken; t != null; t = t.Next) { if (t.Morph.Class.IsPreposition) { continue; } Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsUndefined && t.Chars.IsCyrillicLetter && t.LengthChar > 4) { string tail = sofa.Text.Substring(t.EndChar - 1, 2); Pullenti.Ner.Token tte = null; Pullenti.Ner.Token tt = t.Previous; if (tt != null && ((tt.IsCommaAnd || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction))) { tt = tt.Previous; } if ((tt != null && !tt.GetMorphClassInDictionary().IsUndefined&& ((tt.Morph.Class.Value & t.Morph.Class.Value)) != 0) && tt.LengthChar > 4) { string tail2 = sofa.Text.Substring(tt.EndChar - 1, 2); if (tail2 == tail) { tte = tt; } } if (tte == null) { tt = t.Next; if (tt != null && ((tt.IsCommaAnd || tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction))) { tt = tt.Next; } if ((tt != null && !tt.GetMorphClassInDictionary().IsUndefined&& ((tt.Morph.Class.Value & t.Morph.Class.Value)) != 0) && tt.LengthChar > 4) { string tail2 = sofa.Text.Substring(tt.EndChar - 1, 2); if (tail2 == tail) { tte = tt; } } } if (tte != null) { t.Morph.RemoveItemsEx(tte.Morph, tte.GetMorphClassInDictionary()); } } continue; } this.CreateStatistics(); }
static BookLinkToken _tryParse(Pullenti.Ner.Token t, int lev) { if (t == null || lev > 3) { return(null); } if (t.IsChar('[')) { BookLinkToken re = _tryParse(t.Next, lev + 1); if (re != null && re.EndToken.Next != null && re.EndToken.Next.IsChar(']')) { re.BeginToken = t; re.EndToken = re.EndToken.Next; return(re); } if (re != null && re.EndToken.IsChar(']')) { re.BeginToken = t; return(re); } if (re != null) { if (re.Typ == BookLinkTyp.Sostavitel || re.Typ == BookLinkTyp.Editors) { return(re); } } Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { if ((br.EndToken.Previous is Pullenti.Ner.NumberToken) && (br.LengthChar < 30)) { return new BookLinkToken(t, br.EndToken) { Typ = BookLinkTyp.Number, Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(br.BeginToken.Next, br.EndToken.Previous, Pullenti.Ner.Core.GetTextAttr.No) } } ; } } Pullenti.Ner.Token t0 = t; if (t is Pullenti.Ner.ReferentToken) { if (t.GetReferent() is Pullenti.Ner.Person.PersonReferent) { return(TryParseAuthor(t, Pullenti.Ner.Person.Internal.FioTemplateType.Undefined)); } if (t.GetReferent() is Pullenti.Ner.Geo.GeoReferent) { return new BookLinkToken(t, t) { Typ = BookLinkTyp.Geo, Ref = t.GetReferent() } } ; if (t.GetReferent() is Pullenti.Ner.Date.DateReferent) { Pullenti.Ner.Date.DateReferent dr = t.GetReferent() as Pullenti.Ner.Date.DateReferent; if (dr.Slots.Count == 1 && dr.Year > 0) { return new BookLinkToken(t, t) { Typ = BookLinkTyp.Year, Value = dr.Year.ToString() } } ; if (dr.Year > 0 && t.Previous != null && t.Previous.IsComma) { return new BookLinkToken(t, t) { Typ = BookLinkTyp.Year, Value = dr.Year.ToString() } } ; } if (t.GetReferent() is Pullenti.Ner.Org.OrganizationReferent) { Pullenti.Ner.Org.OrganizationReferent org = t.GetReferent() as Pullenti.Ner.Org.OrganizationReferent; if (org.Kind == Pullenti.Ner.Org.OrganizationKind.Press) { return new BookLinkToken(t, t) { Typ = BookLinkTyp.Press, Ref = org } } ; } if (t.GetReferent() is Pullenti.Ner.Uri.UriReferent) { Pullenti.Ner.Uri.UriReferent uri = t.GetReferent() as Pullenti.Ner.Uri.UriReferent; if ((uri.Scheme == "http" || uri.Scheme == "https" || uri.Scheme == "ftp") || uri.Scheme == null) { return new BookLinkToken(t, t) { Typ = BookLinkTyp.Url, Ref = uri } } ; } } Pullenti.Ner.Core.TerminToken tok = m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok != null) { BookLinkTyp typ = (BookLinkTyp)tok.Termin.Tag; bool ok = true; if (typ == BookLinkTyp.Type || typ == BookLinkTyp.NameTail || typ == BookLinkTyp.ElectronRes) { if (t.Previous != null && ((t.Previous.IsCharOf(".:[") || t.Previous.IsHiphen))) { } else { ok = false; } } if (ok) { return new BookLinkToken(t, tok.EndToken) { Typ = typ, Value = tok.Termin.CanonicText } } ; if (typ == BookLinkTyp.ElectronRes) { for (Pullenti.Ner.Token tt = tok.EndToken.Next; tt != null; tt = tt.Next) { if ((tt is Pullenti.Ner.TextToken) && !tt.Chars.IsLetter) { continue; } if (tt.GetReferent() is Pullenti.Ner.Uri.UriReferent) { return new BookLinkToken(t, tt) { Typ = BookLinkTyp.ElectronRes, Ref = tt.GetReferent() } } ; break; } } } if (t.IsChar('/')) { BookLinkToken res = new BookLinkToken(t, t) { Typ = BookLinkTyp.Delimeter, Value = "/" }; if (t.Next != null && t.Next.IsChar('/')) { res.EndToken = t.Next; res.Value = "//"; } if (!t.IsWhitespaceBefore && !t.IsWhitespaceAfter) { int coo = 3; bool no = true; for (Pullenti.Ner.Token tt = t.Next; tt != null && coo > 0; tt = tt.Next, coo--) { BookLinkToken vvv = TryParse(tt, lev + 1); if (vvv != null && vvv.Typ != BookLinkTyp.Number) { no = false; break; } } if (no) { return(null); } } return(res); } if ((t is Pullenti.Ner.NumberToken) && (t as Pullenti.Ner.NumberToken).IntValue != null && (t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit) { BookLinkToken res = new BookLinkToken(t, t) { Typ = BookLinkTyp.Number, Value = (t as Pullenti.Ner.NumberToken).Value.ToString() }; int val = (t as Pullenti.Ner.NumberToken).IntValue.Value; if (val >= 1930 && (val < 2030)) { res.Typ = BookLinkTyp.Year; } if (t.Next != null && t.Next.IsChar('.')) { res.EndToken = t.Next; } else if ((t.Next != null && t.Next.LengthChar == 1 && !t.Next.Chars.IsLetter) && t.Next.IsWhitespaceAfter) { res.EndToken = t.Next; } else if (t.Next is Pullenti.Ner.TextToken) { string term = (t.Next as Pullenti.Ner.TextToken).Term; if (((term == "СТР" || term == "C" || term == "С") || term == "P" || term == "S") || term == "PAGES") { res.EndToken = t.Next; res.Typ = BookLinkTyp.Pages; res.Value = (t as Pullenti.Ner.NumberToken).Value.ToString(); } } return(res); } if (t is Pullenti.Ner.TextToken) { string term = (t as Pullenti.Ner.TextToken).Term; if (((((((term == "СТР" || term == "C" || term == "С") || term == "ТОМ" || term == "T") || term == "Т" || term == "P") || term == "PP" || term == "V") || term == "VOL" || term == "S") || term == "СТОР" || t.IsValue("PAGE", null)) || t.IsValue("СТРАНИЦА", "СТОРІНКА")) { Pullenti.Ner.Token tt = t.Next; while (tt != null) { if (tt.IsCharOf(".:~")) { tt = tt.Next; } else { break; } } if (tt is Pullenti.Ner.NumberToken) { BookLinkToken res = new BookLinkToken(t, tt) { Typ = BookLinkTyp.PageRange }; Pullenti.Ner.Token tt0 = tt; Pullenti.Ner.Token tt1 = tt; for (tt = tt.Next; tt != null; tt = tt.Next) { if (tt.IsCharOf(",") || tt.IsHiphen) { if (tt.Next is Pullenti.Ner.NumberToken) { tt = tt.Next; res.EndToken = tt; tt1 = tt; continue; } } break; } res.Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(tt0, tt1, Pullenti.Ner.Core.GetTextAttr.No); return(res); } } if ((term == "M" || term == "М" || term == "СПБ") || term == "K" || term == "К") { if (t.Next != null && t.Next.IsCharOf(":;")) { BookLinkToken re = new BookLinkToken(t, t.Next) { Typ = BookLinkTyp.Geo }; return(re); } if (t.Next != null && t.Next.IsCharOf(".")) { BookLinkToken res = new BookLinkToken(t, t.Next) { Typ = BookLinkTyp.Geo }; if (t.Next.Next != null && t.Next.Next.IsCharOf(":;")) { res.EndToken = t.Next.Next; } else if (t.Next.Next != null && (t.Next.Next is Pullenti.Ner.NumberToken)) { } else if (t.Next.Next != null && t.Next.Next.IsComma && (t.Next.Next.Next is Pullenti.Ner.NumberToken)) { } else { return(null); } return(res); } } if (term == "ПЕР" || term == "ПЕРЕВ" || term == "ПЕРЕВОД") { Pullenti.Ner.Token tt = t; if (tt.Next != null && tt.Next.IsChar('.')) { tt = tt.Next; } if (tt.Next != null && ((tt.Next.IsValue("C", null) || tt.Next.IsValue("С", null)))) { tt = tt.Next; if (tt.Next == null || tt.WhitespacesAfterCount > 2) { return(null); } BookLinkToken re = new BookLinkToken(t, tt.Next) { Typ = BookLinkTyp.Translate }; return(re); } } if (term == "ТАМ" || term == "ТАМЖЕ") { BookLinkToken res = new BookLinkToken(t, t) { Typ = BookLinkTyp.Tamze }; if (t.Next != null && t.Next.IsValue("ЖЕ", null)) { res.EndToken = t.Next; } return(res); } if (((term == "СМ" || term == "CM" || term == "НАПР") || term == "НАПРИМЕР" || term == "SEE") || term == "ПОДРОБНЕЕ" || term == "ПОДРОБНО") { BookLinkToken res = new BookLinkToken(t, t) { Typ = BookLinkTyp.See }; for (t = t.Next; t != null; t = t.Next) { if (t.IsCharOf(".:") || t.IsValue("ALSO", null)) { res.EndToken = t; continue; } if (t.IsValue("В", null) || t.IsValue("IN", null)) { res.EndToken = t; continue; } BookLinkToken vvv = _tryParse(t, lev + 1); if (vvv != null && vvv.Typ == BookLinkTyp.See) { res.EndToken = vvv.EndToken; break; } break; } return(res); } if (term == "БОЛЕЕ") { BookLinkToken vvv = _tryParse(t.Next, lev + 1); if (vvv != null && vvv.Typ == BookLinkTyp.See) { vvv.BeginToken = t; return(vvv); } } Pullenti.Ner.Token no = Pullenti.Ner.Core.MiscHelper.CheckNumberPrefix(t); if (no is Pullenti.Ner.NumberToken) { return new BookLinkToken(t, no) { Typ = BookLinkTyp.N } } ; if (((term == "B" || term == "В")) && (t.Next is Pullenti.Ner.NumberToken) && (t.Next.Next is Pullenti.Ner.TextToken)) { string term2 = (t.Next.Next as Pullenti.Ner.TextToken).Term; if (((term2 == "Т" || term2 == "T" || term2.StartsWith("ТОМ")) || term2 == "TT" || term2 == "ТТ") || term2 == "КН" || term2.StartsWith("КНИГ")) { return new BookLinkToken(t, t.Next.Next) { Typ = BookLinkTyp.Volume } } ; } } if (t.IsChar('(')) { if (((t.Next is Pullenti.Ner.NumberToken) && (t.Next as Pullenti.Ner.NumberToken).IntValue != null && t.Next.Next != null) && t.Next.Next.IsChar(')')) { int num = (t.Next as Pullenti.Ner.NumberToken).IntValue.Value; if (num > 1900 && num <= 2040) { if (num <= DateTime.Now.Year) { return new BookLinkToken(t, t.Next.Next) { Typ = BookLinkTyp.Year, Value = num.ToString() } } ; } } if (((t.Next is Pullenti.Ner.ReferentToken) && (t.Next.GetReferent() is Pullenti.Ner.Date.DateReferent) && t.Next.Next != null) && t.Next.Next.IsChar(')')) { int num = (t.Next.GetReferent() as Pullenti.Ner.Date.DateReferent).Year; if (num > 0) { return new BookLinkToken(t, t.Next.Next) { Typ = BookLinkTyp.Year, Value = num.ToString() } } ; } } return(null); }
void CorrectWordsByMorph(Pullenti.Morph.MorphLang lang) { for (Pullenti.Ner.Token tt = FirstToken; tt != null; tt = tt.Next) { if (!(tt is Pullenti.Ner.TextToken)) { continue; } if (tt.Morph.ContainsAttr("прдктв.", null)) { continue; } Pullenti.Morph.MorphClass dd = tt.GetMorphClassInDictionary(); if (!dd.IsUndefined || (tt.LengthChar < 4)) { continue; } if (tt.Morph.Class.IsProperSurname && !tt.Chars.IsAllLower) { continue; } if (tt.Chars.IsAllUpper) { continue; } string corw = Pullenti.Morph.MorphologyService.CorrectWord((tt as Pullenti.Ner.TextToken).Term, (tt.Morph.Language.IsUndefined ? lang : tt.Morph.Language)); if (corw == null) { continue; } List <Pullenti.Morph.MorphToken> ccc = Pullenti.Morph.MorphologyService.Process(corw, lang, null); if (ccc == null || ccc.Count != 1) { continue; } Pullenti.Ner.TextToken tt1 = new Pullenti.Ner.TextToken(ccc[0], this, tt.BeginChar, tt.EndChar) { Chars = tt.Chars, Term0 = (tt as Pullenti.Ner.TextToken).Term }; Pullenti.Morph.MorphClass mc = tt1.GetMorphClassInDictionary(); if (mc.IsProperSurname) { continue; } if (tt == FirstToken) { FirstToken = tt1; } else { tt.Previous.Next = tt1; } tt1.Next = tt.Next; tt = tt1; if (CorrectedTokens == null) { CorrectedTokens = new Dictionary <Pullenti.Ner.Token, string>(); } CorrectedTokens.Add(tt, tt.GetSourceText()); } }
public BookLinkToken(Pullenti.Ner.Token b, Pullenti.Ner.Token e) : base(b, e, null) { }
public NamedItemToken(Pullenti.Ner.Token b, Pullenti.Ner.Token e) : base(b, e, null) { }
public static BookLinkToken TryParseAuthor(Pullenti.Ner.Token t, Pullenti.Ner.Person.Internal.FioTemplateType prevPersTemplate = Pullenti.Ner.Person.Internal.FioTemplateType.Undefined) { if (t == null) { return(null); } Pullenti.Ner.ReferentToken rtp = Pullenti.Ner.Person.Internal.PersonItemToken.TryParsePerson(t, prevPersTemplate); if (rtp != null) { BookLinkToken re; if (rtp.Data == null) { re = new BookLinkToken(t, (rtp == t ? t : rtp.EndToken)) { Typ = BookLinkTyp.Person, Ref = rtp.Referent } } ; else { re = new BookLinkToken(t, rtp.EndToken) { Typ = BookLinkTyp.Person, Tok = rtp } }; re.PersonTemplate = (Pullenti.Ner.Person.Internal.FioTemplateType)rtp.MiscAttrs; for (Pullenti.Ner.Token tt = rtp.BeginToken; tt != null && tt.EndChar <= rtp.EndChar; tt = tt.Next) { if (!(tt.GetReferent() is Pullenti.Ner.Person.PersonPropertyReferent)) { continue; } Pullenti.Ner.ReferentToken rt = tt as Pullenti.Ner.ReferentToken; if (rt.BeginToken.Chars.IsCapitalUpper && tt != rtp.BeginToken) { re.StartOfName = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(rt, Pullenti.Ner.Core.GetTextAttr.KeepRegister); break; } return(null); } return(re); } if (t.IsChar('[')) { BookLinkToken re = TryParseAuthor(t.Next, Pullenti.Ner.Person.Internal.FioTemplateType.Undefined); if (re != null && re.EndToken.Next != null && re.EndToken.Next.IsChar(']')) { re.BeginToken = t; re.EndToken = re.EndToken.Next; return(re); } } if (((t.IsValue("И", null) || t.IsValue("ET", null))) && t.Next != null) { if (t.Next.IsValue("ДРУГИЕ", null) || t.Next.IsValue("ДР", null) || t.Next.IsValue("AL", null)) { BookLinkToken res = new BookLinkToken(t, t.Next) { Typ = BookLinkTyp.AndOthers }; if (t.Next.Next != null && t.Next.Next.IsChar('.')) { res.EndToken = res.EndToken.Next; } return(res); } } return(null); }
static void _corrPrevNext(Pullenti.Ner.MetaToken mt, Pullenti.Ner.Token prev, Pullenti.Ner.Token next) { mt.BeginToken.m_Previous = prev; mt.EndToken.m_Next = next; for (Pullenti.Ner.Token t = mt.BeginToken; t != null && t.EndChar <= mt.EndChar; t = t.Next) { if (t is Pullenti.Ner.MetaToken) { _corrPrevNext(t as Pullenti.Ner.MetaToken, t.Previous, t.Next); } } }
public DefinitionWithNumericToken(Pullenti.Ner.Token b, Pullenti.Ner.Token e) : base(b, e, null) { }
private TitleNameToken(Pullenti.Ner.Token begin, Pullenti.Ner.Token end) : base(begin, end, null) { }
public ConjunctionToken(Pullenti.Ner.Token b, Pullenti.Ner.Token e) : base(b, e, null) { }