static void _optimizeTokens(Pullenti.Semantic.SemObject o) { for (int i = 0; i < o.Tokens.Count; i++) { bool ch = false; for (int j = 0; j < (o.Tokens.Count - 1); j++) { if (_compareToks(o.Tokens[j], o.Tokens[j + 1]) > 0) { Pullenti.Ner.MetaToken t = o.Tokens[j]; o.Tokens[j] = o.Tokens[j + 1]; o.Tokens[j + 1] = t; ch = true; } } if (!ch) { break; } } for (int i = 0; i < (o.Tokens.Count - 1); i++) { if (o.Tokens[i].EndToken.Next == o.Tokens[i + 1].BeginToken) { o.Tokens[i] = new Pullenti.Ner.MetaToken(o.Tokens[i].BeginToken, o.Tokens[i + 1].EndToken); o.Tokens.RemoveAt(i + 1); i--; } } }
public static Pullenti.Ner.MetaToken TryAttachNordWest(Pullenti.Ner.Token t) { if (!(t is Pullenti.Ner.TextToken)) { return(null); } Pullenti.Ner.Core.TerminToken tok = m_Nords.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null) { return(null); } Pullenti.Ner.MetaToken res = new Pullenti.Ner.MetaToken(t, t) { Morph = t.Morph }; Pullenti.Ner.Token t1 = null; if ((t.Next != null && t.Next.IsHiphen && !t.IsWhitespaceAfter) && !t.IsWhitespaceAfter) { t1 = t.Next.Next; } else if (t.Morph.Class.IsAdjective && (t.WhitespacesAfterCount < 2)) { t1 = t.Next; } if (t1 != null) { if ((((tok = m_Nords.TryParse(t1, Pullenti.Ner.Core.TerminParseAttr.No)))) != null) { res.EndToken = tok.EndToken; res.Morph = tok.Morph; } } return(res); }
public static Pullenti.Semantic.Utils.DerivateWord FindWordInGroup(Pullenti.Ner.MetaToken mt, Pullenti.Semantic.Utils.DerivateGroup gr) { if (gr == null || mt == null) { return(null); } Pullenti.Ner.Token t = null; if (mt is Pullenti.Ner.Core.NounPhraseToken) { t = (mt as Pullenti.Ner.Core.NounPhraseToken).Noun.EndToken; } else if ((mt is SemanticAbstractSlave) && ((mt as SemanticAbstractSlave).Source is Pullenti.Ner.Core.NounPhraseToken)) { t = ((mt as SemanticAbstractSlave).Source as Pullenti.Ner.Core.NounPhraseToken).Noun.EndToken; } else { t = mt.EndToken; } foreach (Pullenti.Semantic.Utils.DerivateWord w in gr.Words) { if (w.Class != null && w.Class.IsNoun && w.Lang.IsRu) { if (t.IsValue(w.Spelling, null)) { return(w); } } } return(null); }
public SentItem(Pullenti.Ner.MetaToken mt) { Source = mt; if (mt is Pullenti.Ner.Core.NounPhraseToken) { Pullenti.Ner.Core.NounPhraseToken npt = mt as Pullenti.Ner.Core.NounPhraseToken; if (npt.Preposition != null) { Prep = npt.Preposition.Normal; } else { Prep = ""; } Typ = SentItemType.Noun; string Normal = npt.Noun.GetNormalCaseText(Pullenti.Morph.MorphClass.Noun, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Masculine, false); if (Normal != null) { DrGroups = Pullenti.Semantic.Utils.DerivateService.FindDerivates(Normal, true, null); } } else if ((mt is Pullenti.Ner.ReferentToken) || (mt is Pullenti.Ner.Measure.Internal.NumbersWithUnitToken)) { Typ = SentItemType.Noun; } else if (mt is AdverbToken) { Typ = SentItemType.Adverb; } else if (mt is Pullenti.Ner.Core.ConjunctionToken) { Typ = SentItemType.Conj; } else if (mt is DelimToken) { Typ = SentItemType.Delim; } else if (mt is Pullenti.Ner.Core.VerbPhraseToken) { Pullenti.Ner.Core.VerbPhraseToken vpt = mt as Pullenti.Ner.Core.VerbPhraseToken; string Normal = (vpt.FirstVerb.VerbMorph == null ? null : vpt.FirstVerb.VerbMorph.NormalFull ?? vpt.FirstVerb.VerbMorph.NormalCase); if (Normal != null) { DrGroups = Pullenti.Semantic.Utils.DerivateService.FindDerivates(Normal, true, null); } if (vpt.FirstVerb != vpt.LastVerb) { Normal = (vpt.LastVerb.VerbMorph == null ? vpt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false) : vpt.LastVerb.VerbMorph.NormalFull ?? vpt.LastVerb.VerbMorph.NormalCase); DrGroups2 = Pullenti.Semantic.Utils.DerivateService.FindDerivates(Normal, true, null); } else { DrGroups2 = DrGroups; } Prep = (vpt.Preposition == null ? "" : vpt.Preposition.Normal); Typ = SentItemType.Verb; } }
static void _corrPrevNext(Pullenti.Ner.MetaToken mt, Pullenti.Ner.Token prev, Pullenti.Ner.Token next) { mt.BeginToken.m_Previous = prev; mt.EndToken.m_Next = next; for (Pullenti.Ner.Token t = mt.BeginToken; t != null && t.EndChar <= mt.EndChar; t = t.Next) { if (t is Pullenti.Ner.MetaToken) { _corrPrevNext(t as Pullenti.Ner.MetaToken, t.Previous, t.Next); } } }
public static string GetKeyword(Pullenti.Ner.MetaToken mt) { Pullenti.Ner.Core.VerbPhraseToken vpt = mt as Pullenti.Ner.Core.VerbPhraseToken; if (vpt != null) { return(vpt.LastVerb.VerbMorph.NormalFull ?? vpt.LastVerb.VerbMorph.NormalCase); } Pullenti.Ner.Core.NounPhraseToken npt = mt as Pullenti.Ner.Core.NounPhraseToken; if (npt != null) { return(npt.Noun.EndToken.GetNormalCaseText(Pullenti.Morph.MorphClass.Noun, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false)); } return(null); }
static Pullenti.Semantic.Utils.ControlModelItem FindControlItem(Pullenti.Ner.MetaToken mt, Pullenti.Semantic.Utils.DerivateGroup gr) { if (gr == null) { return(null); } if (mt is Pullenti.Ner.Core.NounPhraseToken) { Pullenti.Ner.Token t = (mt as Pullenti.Ner.Core.NounPhraseToken).Noun.EndToken; foreach (Pullenti.Semantic.Utils.ControlModelItem m in gr.Model.Items) { if (m.Word != null) { if (t.IsValue(m.Word, null)) { return(m); } } } foreach (Pullenti.Semantic.Utils.DerivateWord w in gr.Words) { if (w.Attrs.IsVerbNoun) { if (t.IsValue(w.Spelling, null)) { return(gr.Model.FindItemByTyp(Pullenti.Semantic.Utils.ControlModelItemType.Noun)); } } } return(null); } if (mt is Pullenti.Ner.Core.VerbPhraseItemToken) { Pullenti.Ner.Core.VerbPhraseItemToken ti = mt as Pullenti.Ner.Core.VerbPhraseItemToken; bool rev = ti.IsVerbReversive || ti.IsVerbPassive; foreach (Pullenti.Semantic.Utils.ControlModelItem it in gr.Model.Items) { if (rev && it.Typ == Pullenti.Semantic.Utils.ControlModelItemType.Reflexive) { return(it); } else if (!rev && it.Typ == Pullenti.Semantic.Utils.ControlModelItemType.Verb) { return(it); } } } return(null); }
public override string ToString() { if (BeginNameToken == null || EndNameToken == null) { return("?"); } Pullenti.Ner.MetaToken mt = new Pullenti.Ner.MetaToken(BeginNameToken, EndNameToken); if (TypeValue == null) { return(string.Format("{0}: {1}", Rank, mt.ToString())); } else { return(string.Format("{0}: {1} ({2})", Rank, mt.ToString(), TypeValue)); } }
static bool _containsTableChar(Pullenti.Ner.MetaToken mt) { for (Pullenti.Ner.Token tt = mt.BeginToken; tt != null && tt.EndChar <= mt.EndChar; tt = tt.Next) { if (tt is Pullenti.Ner.MetaToken) { if (_containsTableChar(tt as Pullenti.Ner.MetaToken)) { return(true); } } else if (((tt.IsTableControlChar && tt.Previous != null && !tt.Previous.IsTableControlChar) && tt.Next != null && !tt.Next.IsTableControlChar) && tt.Previous.BeginChar >= mt.BeginChar && tt.Next.EndChar <= mt.EndChar) { return(true); } } return(false); }
/// <summary> /// Убрать метатокен из цепочки, восстановив исходное /// </summary> /// <param name="t">удаляемый из цепочки метатокен</param> /// <return>первый токен удалённого метатокена</return> public Pullenti.Ner.Token DebedToken(Pullenti.Ner.Token t) { Pullenti.Ner.Referent r = t.GetReferent(); if (r != null) { foreach (Pullenti.Ner.TextAnnotation o in r.Occurrence) { if (o.BeginChar == t.BeginChar && o.EndChar == t.EndChar) { r.Occurrence.Remove(o); break; } } } Pullenti.Ner.MetaToken mt = t as Pullenti.Ner.MetaToken; if (mt == null) { return(t); } if (t.Next != null) { t.Next.Previous = mt.EndToken; } if (t.Previous != null) { t.Previous.Next = mt.BeginToken; } if (mt == FirstToken) { FirstToken = mt.BeginToken; } if (r != null && r.Occurrence.Count == 0) { foreach (AnalyzerData d in m_Datas.Values) { if (d.Referents.Contains(r)) { d.RemoveReferent(r); break; } } } return(mt.BeginToken); }
static bool _compareListItemTails(Pullenti.Ner.MetaToken mt1, Pullenti.Ner.MetaToken mt2) { Pullenti.Ner.TextToken t1 = mt1.EndToken as Pullenti.Ner.TextToken; Pullenti.Ner.TextToken t2 = mt2.EndToken as Pullenti.Ner.TextToken; if (t1 == null || t2 == null) { return(true); } int k = 0; int i1 = t1.Term.Length - 1; int i2 = t2.Term.Length - 1; for (; i1 > 0 && i2 > 0; i1--, i2--, k++) { if (t1.Term[i1] != t2.Term[i2]) { break; } } if (k >= 2) { return(true); } string nn = t2.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); if (t1.IsValue(nn, null)) { return(true); } if (((t1.Morph.Number & t2.Morph.Number)) == Pullenti.Morph.MorphNumber.Undefined) { return(false); } if (((t1.Morph.Case & t2.Morph.Case)).IsUndefined) { return(false); } if (t1.Morph.Class.IsVerb != t2.Morph.Class.IsVerb && t1.Morph.Class.IsAdjective != t2.Morph.Class.IsAdjective) { return(false); } return(true); }
static List <SemanticLink> _tryCreateInf(Pullenti.Ner.MetaToken master, Pullenti.Ner.Core.VerbPhraseToken vpt2, Pullenti.Semantic.Utils.DerivateGroup gr) { Pullenti.Semantic.Utils.ControlModelItem cit = FindControlItem(master, gr); List <SemanticLink> res = new List <SemanticLink>(); SemanticRole? rol = null; if (cit != null && cit.Links.ContainsKey(Pullenti.Semantic.Utils.ControlModelQuestion.ToDo)) { rol = cit.Links[Pullenti.Semantic.Utils.ControlModelQuestion.ToDo]; } if (rol != null) { res.Add(new SemanticLink() { Rank = (rol.Value != SemanticRole.Common ? 2 : 1), Question = Pullenti.Semantic.Utils.ControlModelQuestion.ToDo }); } return(res); }
static Pullenti.Ner.Token DeserializeToken(Stream stream, Pullenti.Ner.Core.AnalysisKit kit, int vers) { short typ = DeserializeShort(stream); if (typ == 0) { return(null); } Pullenti.Ner.Token t = null; if (typ == 1) { t = new Pullenti.Ner.TextToken(null, kit); } else if (typ == 2) { t = new Pullenti.Ner.NumberToken(null, null, null, Pullenti.Ner.NumberSpellingType.Digit, kit); } else if (typ == 3) { t = new Pullenti.Ner.ReferentToken(null, null, null, kit); } else { t = new Pullenti.Ner.MetaToken(null, null, kit); } t.Deserialize(stream, kit, vers); if (t is Pullenti.Ner.MetaToken) { Pullenti.Ner.Token tt = DeserializeTokens(stream, kit, vers); if (tt != null) { (t as Pullenti.Ner.MetaToken).m_BeginToken = tt; for (; tt != null; tt = tt.Next) { (t as Pullenti.Ner.MetaToken).m_EndToken = tt; } } } return(t); }
/// <summary> /// Встроить токен в основную цепочку токенов /// </summary> /// <param name="mt">встраиваемый метатокен</param> public void EmbedToken(Pullenti.Ner.MetaToken mt) { if (mt == null) { return; } if (mt.BeginChar > mt.EndChar) { Pullenti.Ner.Token bg = mt.BeginToken; mt.BeginToken = mt.EndToken; mt.EndToken = bg; } if (mt.BeginChar > mt.EndChar) { return; } if (mt.BeginToken == FirstToken) { FirstToken = mt; } else { Pullenti.Ner.Token tp = mt.BeginToken.Previous; mt.Previous = tp; } Pullenti.Ner.Token tn = mt.EndToken.Next; mt.Next = tn; if (mt is Pullenti.Ner.ReferentToken) { if ((mt as Pullenti.Ner.ReferentToken).Referent != null) { (mt as Pullenti.Ner.ReferentToken).Referent.AddOccurence(new Pullenti.Ner.TextAnnotation() { Sofa = Sofa, BeginChar = mt.BeginChar, EndChar = mt.EndChar }); } } }
static Pullenti.Ner.ReferentToken TryParseThesis(Pullenti.Ner.Token t) { if (t == null) { return(null); } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token tt = t; Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); Pullenti.Ner.MetaToken preamb = null; if (mc.IsConjunction) { return(null); } if (t.IsValue("LET", null)) { return(null); } if (mc.IsPreposition || mc.IsMisc || mc.IsAdverb) { if (!Pullenti.Ner.Core.MiscHelper.IsEngArticle(tt)) { for (tt = tt.Next; tt != null; tt = tt.Next) { if (tt.IsComma) { break; } if (tt.IsChar('(')) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { tt = br.EndToken; continue; } } if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt)) { break; } Pullenti.Ner.Core.NounPhraseToken npt0 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective | Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun, 0, null); if (npt0 != null) { tt = npt0.EndToken; continue; } if (tt.GetMorphClassInDictionary().IsVerb) { break; } } if (tt == null || !tt.IsComma || tt.Next == null) { return(null); } preamb = new Pullenti.Ner.MetaToken(t0, tt.Previous); tt = tt.Next; } } Pullenti.Ner.Token t1 = tt; mc = tt.GetMorphClassInDictionary(); Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective | Pullenti.Ner.Core.NounPhraseParseAttr.ReferentCanBeNoun | Pullenti.Ner.Core.NounPhraseParseAttr.ParseAdverbs, 0, null); if (npt == null && (tt is Pullenti.Ner.TextToken)) { if (tt.Chars.IsAllUpper) { npt = new Pullenti.Ner.Core.NounPhraseToken(tt, tt); } else if (!tt.Chars.IsAllLower) { if (mc.IsProper || preamb != null) { npt = new Pullenti.Ner.Core.NounPhraseToken(tt, tt); } } } if (npt == null) { return(null); } if (mc.IsPersonalPronoun) { return(null); } Pullenti.Ner.Token t2 = npt.EndToken.Next; if (t2 == null || Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t2) || !(t2 is Pullenti.Ner.TextToken)) { return(null); } if (!t2.GetMorphClassInDictionary().IsVerb) { return(null); } Pullenti.Ner.Token t3 = t2; for (tt = t2.Next; tt != null; tt = tt.Next) { if (!tt.GetMorphClassInDictionary().IsVerb) { break; } } for (; tt != null; tt = tt.Next) { if (tt.Next == null) { t3 = tt; break; } if (tt.IsCharOf(".;!?")) { if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt.Next)) { t3 = tt; break; } } if (!(tt is Pullenti.Ner.TextToken)) { continue; } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(tt, false, false)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { tt = br.EndToken; continue; } } } tt = t3; if (t3.IsCharOf(";.!?")) { tt = tt.Previous; } string txt = Pullenti.Ner.Core.MiscHelper.GetTextValue(t2, tt, Pullenti.Ner.Core.GetTextAttr.KeepRegister | Pullenti.Ner.Core.GetTextAttr.KeepQuotes); if (txt == null || (txt.Length < 15)) { return(null); } if (t0 != t1) { tt = t1.Previous; if (tt.IsComma) { tt = tt.Previous; } string txt0 = Pullenti.Ner.Core.MiscHelper.GetTextValue(t0, tt, Pullenti.Ner.Core.GetTextAttr.KeepRegister | Pullenti.Ner.Core.GetTextAttr.KeepQuotes); if (txt0 != null && txt0.Length > 10) { if (t0.Chars.IsCapitalUpper) { txt0 = char.ToLower(txt0[0]) + txt0.Substring(1); } txt = string.Format("{0}, {1}", txt, txt0); } } tt = t1; if (Pullenti.Ner.Core.MiscHelper.IsEngArticle(tt)) { tt = tt.Next; } string nam = Pullenti.Ner.Core.MiscHelper.GetTextValue(tt, t2.Previous, Pullenti.Ner.Core.GetTextAttr.KeepQuotes); if (nam.StartsWith("SO-CALLED")) { nam = nam.Substring(9).Trim(); } Pullenti.Ner.Definition.DefinitionReferent dr = new Pullenti.Ner.Definition.DefinitionReferent(); dr.Kind = Pullenti.Ner.Definition.DefinitionKind.Assertation; dr.AddSlot(Pullenti.Ner.Definition.DefinitionReferent.ATTR_TERMIN, nam, false, 0); dr.AddSlot(Pullenti.Ner.Definition.DefinitionReferent.ATTR_VALUE, txt, false, 0); return(new Pullenti.Ner.ReferentToken(dr, t0, t3)); }
public static NamedItemToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.IntOntologyCollection locOnto) { if (t == null) { return(null); } if (t is Pullenti.Ner.ReferentToken) { Pullenti.Ner.Referent r = t.GetReferent(); if ((r.TypeName == "PERSON" || r.TypeName == "PERSONPROPERTY" || (r is Pullenti.Ner.Geo.GeoReferent)) || r.TypeName == "ORGANIZATION") { return new NamedItemToken(t, t) { Ref = r, Morph = t.Morph } } ; return(null); } Pullenti.Ner.Core.TerminToken typ = m_Types.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); Pullenti.Ner.Core.TerminToken nam = m_Names.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (typ != null) { if (!(t is Pullenti.Ner.TextToken)) { return(null); } NamedItemToken res = new NamedItemToken(typ.BeginToken, typ.EndToken) { Morph = typ.Morph, Chars = typ.Chars }; res.Kind = (Pullenti.Ner.Named.NamedEntityKind)typ.Termin.Tag; res.TypeValue = typ.Termin.CanonicText; if ((nam != null && nam.EndToken == typ.EndToken && !t.Chars.IsAllLower) && ((Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag) == res.Kind) { res.NameValue = nam.Termin.CanonicText; res.IsWellknown = true; } return(res); } if (nam != null) { if (nam.BeginToken.Chars.IsAllLower) { return(null); } NamedItemToken res = new NamedItemToken(nam.BeginToken, nam.EndToken) { Morph = nam.Morph, Chars = nam.Chars }; res.Kind = (Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag; res.NameValue = nam.Termin.CanonicText; bool ok = true; if (!t.IsWhitespaceBefore && t.Previous != null) { ok = false; } else if (!t.IsWhitespaceAfter && t.Next != null) { if (t.Next.IsCharOf(",.;!?") && t.Next.IsWhitespaceAfter) { } else { ok = false; } } if (ok) { res.IsWellknown = true; res.TypeValue = nam.Termin.Tag2 as string; } return(res); } Pullenti.Ner.MetaToken adj = Pullenti.Ner.Geo.Internal.MiscLocationHelper.TryAttachNordWest(t); if (adj != null) { if (adj.Morph.Class.IsNoun) { if (adj.EndToken.IsValue("ВОСТОК", null)) { if (adj.BeginToken == adj.EndToken) { return(null); } NamedItemToken re = new NamedItemToken(t, adj.EndToken) { Morph = adj.Morph }; re.Kind = Pullenti.Ner.Named.NamedEntityKind.Location; re.NameValue = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, adj.EndToken, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative); re.IsWellknown = true; return(re); } return(null); } if (adj.WhitespacesAfterCount > 2) { return(null); } if ((adj.EndToken.Next is Pullenti.Ner.ReferentToken) && (adj.EndToken.Next.GetReferent() is Pullenti.Ner.Geo.GeoReferent)) { NamedItemToken re = new NamedItemToken(t, adj.EndToken.Next) { Morph = adj.EndToken.Next.Morph }; re.Kind = Pullenti.Ner.Named.NamedEntityKind.Location; re.NameValue = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, adj.EndToken.Next, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative); re.IsWellknown = true; re.Ref = adj.EndToken.Next.GetReferent(); return(re); } NamedItemToken res = TryParse(adj.EndToken.Next, locOnto); if (res != null && res.Kind == Pullenti.Ner.Named.NamedEntityKind.Location) { string s = adj.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, res.Morph.Gender, false); if (s != null) { if (res.NameValue == null) { res.NameValue = s.ToUpper(); } else { res.NameValue = string.Format("{0} {1}", s.ToUpper(), res.NameValue); res.TypeValue = null; } res.BeginToken = t; res.Chars = t.Chars; res.IsWellknown = true; return(res); } } } if (t.Chars.IsCapitalUpper && !Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t)) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.Adjectives.Count > 0) { NamedItemToken test = TryParse(npt.Noun.BeginToken, locOnto); if (test != null && test.EndToken == npt.EndToken && test.TypeValue != null) { test.BeginToken = t; StringBuilder tmp = new StringBuilder(); foreach (Pullenti.Ner.MetaToken a in npt.Adjectives) { string s = a.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, test.Morph.Gender, false); if (tmp.Length > 0) { tmp.Append(' '); } tmp.Append(s); } test.NameValue = tmp.ToString(); test.Chars = t.Chars; if (test.Kind == Pullenti.Ner.Named.NamedEntityKind.Location) { test.IsWellknown = true; } return(test); } } } if ((Pullenti.Ner.Core.BracketHelper.IsBracket(t, true) && t.Next != null && t.Next.Chars.IsLetter) && !t.Next.Chars.IsAllLower) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { NamedItemToken res = new NamedItemToken(t, br.EndToken); res.IsInBracket = true; res.NameValue = Pullenti.Ner.Core.MiscHelper.GetTextValue(t, br.EndToken, Pullenti.Ner.Core.GetTextAttr.No); nam = m_Names.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (nam != null && nam.EndToken == br.EndToken.Previous) { res.Kind = (Pullenti.Ner.Named.NamedEntityKind)nam.Termin.Tag; res.IsWellknown = true; res.NameValue = nam.Termin.CanonicText; } return(res); } } if (((t is Pullenti.Ner.TextToken) && t.Chars.IsLetter && !t.Chars.IsAllLower) && t.LengthChar > 2) { NamedItemToken res = new NamedItemToken(t, t) { Morph = t.Morph }; string str = (t as Pullenti.Ner.TextToken).Term; if (str.EndsWith("О") || str.EndsWith("И") || str.EndsWith("Ы")) { res.NameValue = str; } else { res.NameValue = t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); } res.Chars = t.Chars; if (((!t.IsWhitespaceAfter && t.Next != null && t.Next.IsHiphen) && (t.Next.Next is Pullenti.Ner.TextToken) && !t.Next.Next.IsWhitespaceAfter) && t.Chars.IsCyrillicLetter == t.Next.Next.Chars.IsCyrillicLetter) { t = (res.EndToken = t.Next.Next); res.NameValue = string.Format("{0}-{1}", res.NameValue, t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)); } return(res); } return(null); }
static List <SemanticLink> _tryCreateNoun(Pullenti.Ner.Core.NounPhraseToken npt1, Pullenti.Ner.MetaToken slave, Pullenti.Semantic.Utils.DerivateGroup gr) { if (npt1 == null || slave == null) { return(null); } if (slave is Pullenti.Ner.Core.VerbPhraseToken) { return(_tryCreateInf(npt1, slave as Pullenti.Ner.Core.VerbPhraseToken, gr)); } SemanticAbstractSlave sla2 = slave as SemanticAbstractSlave; List <SemanticLink> res = new List <SemanticLink>(); if (sla2 == null) { return(res); } Pullenti.Semantic.Utils.ControlModelItem cit = FindControlItem(npt1, gr); _createRoles(cit, sla2.Preposition, sla2.Morph.Case, res, false, false); if (res.Count == 1 && res[0].Role == SemanticRole.Agent && res[0].Question == Pullenti.Semantic.Utils.ControlModelQuestion.BaseInstrumental) { if (gr.Model.Items.Count > 0 && gr.Model.Items[0].Typ == Pullenti.Semantic.Utils.ControlModelItemType.Verb && gr.Model.Items[0].Links.ContainsKey(Pullenti.Semantic.Utils.ControlModelQuestion.BaseInstrumental)) { res[0].Role = gr.Model.Items[0].Links[Pullenti.Semantic.Utils.ControlModelQuestion.BaseInstrumental]; } } bool ok = false; Pullenti.Semantic.Utils.DerivateWord w = FindWordInGroup(npt1, gr); if (w != null && w.NextWords != null && w.NextWords.Count > 0) { foreach (string n in w.NextWords) { if (sla2.Source != null) { if (sla2.Source.EndToken.IsValue(n, null)) { ok = true; break; } } } } if (gr != null && gr.Model.Pacients.Count > 0) { foreach (string n in gr.Model.Pacients) { if (sla2.Source != null) { if (sla2.Source.EndToken.IsValue(n, null)) { ok = true; break; } } } } if (ok) { if (res.Count == 0) { res.Add(new SemanticLink() { Question = Pullenti.Semantic.Utils.ControlModelQuestion.BaseGenetive, Role = SemanticRole.Pacient, Idiom = true }); } foreach (SemanticLink r in res) { r.Rank += 4; if (r.Role == SemanticRole.Common) { r.Role = SemanticRole.Strong; } if (npt1.EndToken.Next == sla2.BeginToken) { r.Rank += 2; } r.Idiom = true; } } return(res); }
/// <summary> /// Попробовать создать семантическую связь между элементами. /// Элементом м.б. именная (NounPhraseToken) или глагольная группа (VerbPhraseToken). /// </summary> /// <param name="master">основной элемент</param> /// <param name="slave">стыкуемый элемент (также м.б. SemanticAbstractSlave)</param> /// <param name="onto">дополнительный онтологический словарь</param> /// <return>список вариантов (возможно, пустой)</return> public static List <SemanticLink> TryCreateLinks(Pullenti.Ner.MetaToken master, Pullenti.Ner.MetaToken slave, ISemanticOnto onto = null) { List <SemanticLink> res = new List <SemanticLink>(); Pullenti.Ner.Core.VerbPhraseToken vpt1 = master as Pullenti.Ner.Core.VerbPhraseToken; Pullenti.Ner.Core.VerbPhraseToken vpt2 = slave as Pullenti.Ner.Core.VerbPhraseToken; Pullenti.Ner.Core.NounPhraseToken npt1 = master as Pullenti.Ner.Core.NounPhraseToken; if (slave is Pullenti.Ner.Core.NounPhraseToken) { slave = SemanticAbstractSlave.CreateFromNoun(slave as Pullenti.Ner.Core.NounPhraseToken); } SemanticAbstractSlave sla2 = slave as SemanticAbstractSlave; if (vpt2 != null) { if (!vpt2.FirstVerb.IsVerbInfinitive || !vpt2.LastVerb.IsVerbInfinitive) { return(res); } } List <Pullenti.Semantic.Utils.DerivateGroup> grs = FindDerivates(master); if (grs == null || grs.Count == 0) { List <SemanticLink> rl = (vpt1 != null ? _tryCreateVerb(vpt1, slave, null) : _tryCreateNoun(npt1, slave, null)); if (rl != null) { res.AddRange(rl); } } else { foreach (Pullenti.Semantic.Utils.DerivateGroup gr in grs) { List <SemanticLink> rl = (vpt1 != null ? _tryCreateVerb(vpt1, slave, gr) : _tryCreateNoun(npt1, slave, gr)); if (rl == null || rl.Count == 0) { continue; } res.AddRange(rl); } } if ((npt1 != null && sla2 != null && sla2.Morph.Case.IsGenitive) && sla2.Preposition == null) { if (npt1.Noun.BeginToken.GetMorphClassInDictionary().IsPersonalPronoun) { } else { bool hasGen = false; foreach (SemanticLink r in res) { if (r.Question == Pullenti.Semantic.Utils.ControlModelQuestion.BaseGenetive) { hasGen = true; break; } } if (!hasGen) { res.Add(new SemanticLink() { Modelled = true, Master = npt1, Slave = sla2, Rank = 0.5, Question = Pullenti.Semantic.Utils.ControlModelQuestion.BaseGenetive }); } } } if (onto != null) { string str1 = GetKeyword(master); string str2 = GetKeyword(slave); if (str2 != null) { if (onto.CheckLink(str1, str2)) { if (res.Count > 0) { foreach (SemanticLink r in res) { r.Rank += 3; if (r.Role == SemanticRole.Common) { r.Role = SemanticRole.Strong; } } } else { res.Add(new SemanticLink() { Role = SemanticRole.Strong, Master = master, Slave = slave, Rank = 3 }); } } } } if (npt1 != null) { if (((npt1.Adjectives.Count > 0 && npt1.Adjectives[0].BeginToken.Morph.Class.IsPronoun)) || npt1.Anafor != null) { foreach (SemanticLink r in res) { if (r.Question == Pullenti.Semantic.Utils.ControlModelQuestion.BaseGenetive) { r.Rank -= 0.5; if (r.Role == SemanticRole.Strong) { r.Role = SemanticRole.Common; } } } } } foreach (SemanticLink r in res) { if (r.Role == SemanticRole.Strong) { foreach (SemanticLink rr in res) { if (rr != r && rr.Role != SemanticRole.Strong) { rr.Rank /= 2; } } } } for (int i = 0; i < res.Count; i++) { for (int j = 0; j < (res.Count - 1); j++) { if (res[j].CompareTo(res[j + 1]) > 0) { SemanticLink r = res[j]; res[j] = res[j + 1]; res[j + 1] = r; } } } foreach (SemanticLink r in res) { r.Master = master; r.Slave = slave; } return(res); }
public static MeasureToken TryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection addUnits, bool canBeSet = true, bool canUnitsAbsent = false, bool isResctriction = false, bool isSubval = false) { if (!(t is Pullenti.Ner.TextToken)) { return(null); } if (t.IsTableControlChar) { return(null); } Pullenti.Ner.Token t0 = t; Pullenti.Ner.MetaToken whd = null; int minmax = 0; Pullenti.Ner.Token tt = NumbersWithUnitToken._isMinOrMax(t0, ref minmax); if (tt != null) { t = tt.Next; } Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition | Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreBrackets, 0, null); if (npt == null) { whd = NumbersWithUnitToken._tryParseWHL(t); if (whd != null) { npt = new Pullenti.Ner.Core.NounPhraseToken(t0, whd.EndToken); } else if (t0.IsValue("КПД", null)) { npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0); } else if ((t0 is Pullenti.Ner.TextToken) && t0.LengthChar > 3 && t0.GetMorphClassInDictionary().IsUndefined) { npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0); } else if (t0.IsValue("T", null) && t0.Chars.IsAllLower) { npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0); t = t0; if (t.Next != null && t.Next.IsChar('=')) { npt.EndToken = t.Next; } } else if ((t0 is Pullenti.Ner.TextToken) && t0.Chars.IsLetter && isSubval) { if (NumbersWithUnitToken.TryParse(t, addUnits, false, false, false, false) != null) { return(null); } npt = new Pullenti.Ner.Core.NounPhraseToken(t0, t0); for (t = t0.Next; t != null; t = t.Next) { if (t.WhitespacesBeforeCount > 2) { break; } else if (!(t is Pullenti.Ner.TextToken)) { break; } else if (!t.Chars.IsLetter) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(t, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { npt.EndToken = (t = br.EndToken); } else { break; } } else if (NumbersWithUnitToken.TryParse(t, addUnits, false, false, false, false) != null) { break; } else { npt.EndToken = t; } } } else { return(null); } } else if (Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t, true, false) != null) { return(null); } else { Pullenti.Ner.Date.Internal.DateItemToken dtok = Pullenti.Ner.Date.Internal.DateItemToken.TryAttach(t, null, false); if (dtok != null) { return(null); } } Pullenti.Ner.Token t1 = npt.EndToken; t = npt.EndToken; Pullenti.Ner.MetaToken name = new Pullenti.Ner.MetaToken(npt.BeginToken, npt.EndToken) { Morph = npt.Morph }; List <UnitToken> units = null; List <UnitToken> units2 = null; List <MeasureToken> internals = new List <MeasureToken>(); bool not = false; for (tt = t1.Next; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } if (tt.IsTableControlChar) { break; } Pullenti.Ner.Token tt2 = NumbersWithUnitToken._isMinOrMax(tt, ref minmax); if (tt2 != null) { t1 = (t = (tt = tt2)); continue; } if ((tt.IsValue("БЫТЬ", null) || tt.IsValue("ДОЛЖЕН", null) || tt.IsValue("ДОЛЖНЫЙ", null)) || tt.IsValue("МОЖЕТ", null) || ((tt.IsValue("СОСТАВЛЯТЬ", null) && !tt.GetMorphClassInDictionary().IsAdjective))) { t1 = (t = tt); if (tt.Previous.IsValue("НЕ", null)) { not = true; } continue; } Pullenti.Ner.MetaToken www = NumbersWithUnitToken._tryParseWHL(tt); if (www != null) { whd = www; t1 = (t = (tt = www.EndToken)); continue; } if (tt.IsValue("ПРИ", null)) { MeasureToken mt1 = TryParse(tt.Next, addUnits, false, false, true, false); if (mt1 != null) { internals.Add(mt1); t1 = (t = (tt = mt1.EndToken)); continue; } NumbersWithUnitToken n1 = NumbersWithUnitToken.TryParse(tt.Next, addUnits, false, false, false, false); if (n1 != null && n1.Units.Count > 0) { mt1 = new MeasureToken(n1.BeginToken, n1.EndToken) { Nums = n1 }; internals.Add(mt1); t1 = (t = (tt = mt1.EndToken)); continue; } } if (tt.IsValue("ПО", null) && tt.Next != null && tt.Next.IsValue("U", null)) { t1 = (t = (tt = tt.Next)); continue; } if (internals.Count > 0) { if (tt.IsChar(':')) { break; } MeasureToken mt1 = TryParse(tt.Next, addUnits, false, false, true, false); if (mt1 != null && mt1.Reliable) { internals.Add(mt1); t1 = (t = (tt = mt1.EndToken)); continue; } } if ((tt is Pullenti.Ner.NumberToken) && (tt as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words) { Pullenti.Ner.Core.NounPhraseToken npt3 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective, 0, null); if (npt3 != null) { t1 = (tt = npt3.EndToken); if (internals.Count == 0) { name.EndToken = t1; } continue; } } if (((tt.IsHiphen && !tt.IsWhitespaceBefore && !tt.IsWhitespaceAfter) && (tt.Next is Pullenti.Ner.NumberToken) && (tt.Previous is Pullenti.Ner.TextToken)) && tt.Previous.Chars.IsAllUpper) { t1 = (tt = (t = tt.Next)); if (internals.Count == 0) { name.EndToken = t1; } continue; } if (((tt is Pullenti.Ner.NumberToken) && !tt.IsWhitespaceBefore && (tt.Previous is Pullenti.Ner.TextToken)) && tt.Previous.Chars.IsAllUpper) { t1 = (t = tt); if (internals.Count == 0) { name.EndToken = t1; } continue; } if ((((tt is Pullenti.Ner.NumberToken) && !tt.IsWhitespaceAfter && tt.Next.IsHiphen) && !tt.Next.IsWhitespaceAfter && (tt.Next.Next is Pullenti.Ner.TextToken)) && tt.Next.Next.LengthChar > 2) { t1 = (t = (tt = tt.Next.Next)); Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null && npt1.EndChar > tt.EndChar) { t1 = (t = (tt = npt1.EndToken)); } if (internals.Count == 0) { name.EndToken = t1; } continue; } if ((tt is Pullenti.Ner.NumberToken) && tt.Previous != null) { if (tt.Previous.IsValue("USB", null)) { t1 = (t = tt); if (internals.Count == 0) { name.EndToken = t1; } for (Pullenti.Ner.Token ttt = tt.Next; ttt != null; ttt = ttt.Next) { if (ttt.IsWhitespaceBefore) { break; } if (ttt.IsCharOf(",:")) { break; } t1 = (t = (tt = ttt)); if (internals.Count == 0) { name.EndToken = t1; } } continue; } } NumbersWithUnitToken mt0 = NumbersWithUnitToken.TryParse(tt, addUnits, false, false, false, false); if (mt0 != null) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParseNumericAsAdjective | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition, 0, null); if (npt1 != null && npt1.EndChar > mt0.EndChar) { t1 = (t = (tt = npt1.EndToken)); if (internals.Count == 0) { name.EndToken = t1; } continue; } break; } if (((tt.IsComma || tt.IsChar('('))) && tt.Next != null) { www = NumbersWithUnitToken._tryParseWHL(tt.Next); if (www != null) { whd = www; t1 = (t = (tt = www.EndToken)); if (tt.Next != null && tt.Next.IsComma) { t1 = (tt = tt.Next); } if (tt.Next != null && tt.Next.IsChar(')')) { t1 = (tt = tt.Next); continue; } } List <UnitToken> uu = UnitToken.TryParseList(tt.Next, addUnits, false); if (uu != null) { t1 = (t = uu[uu.Count - 1].EndToken); units = uu; if (tt.IsChar('(') && t1.Next != null && t1.Next.IsChar(')')) { t1 = (t = (tt = t1.Next)); continue; } else if (t1.Next != null && t1.Next.IsChar('(')) { uu = UnitToken.TryParseList(t1.Next.Next, addUnits, false); if (uu != null && uu[uu.Count - 1].EndToken.Next != null && uu[uu.Count - 1].EndToken.Next.IsChar(')')) { units2 = uu; t1 = (t = (tt = uu[uu.Count - 1].EndToken.Next)); continue; } www = NumbersWithUnitToken._tryParseWHL(t1.Next); if (www != null) { whd = www; t1 = (t = (tt = www.EndToken)); continue; } } if (uu != null && uu.Count > 0 && !uu[0].IsDoubt) { break; } if (t1.Next != null) { if (t1.Next.IsTableControlChar || t1.IsNewlineAfter) { break; } } units = null; } } if (Pullenti.Ner.Core.BracketHelper.CanBeStartOfSequence(tt, false, false) && !(tt.Next is Pullenti.Ner.NumberToken)) { Pullenti.Ner.Core.BracketSequenceToken br = Pullenti.Ner.Core.BracketHelper.TryParse(tt, Pullenti.Ner.Core.BracketParseAttr.No, 100); if (br != null) { t1 = (t = (tt = br.EndToken)); continue; } } if (tt.IsValue("НЕ", null) && tt.Next != null) { Pullenti.Morph.MorphClass mc = tt.Next.GetMorphClassInDictionary(); if (mc.IsAdverb || mc.IsMisc) { break; } continue; } if (tt.IsValue("ЯМЗ", null)) { } Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.ParsePreposition | Pullenti.Ner.Core.NounPhraseParseAttr.IgnoreBrackets | Pullenti.Ner.Core.NounPhraseParseAttr.ParsePronouns, 0, null); if (npt2 == null) { if (tt.Morph.Class.IsPreposition || tt.Morph.Class.IsConjunction) { Pullenti.Ner.Core.TerminToken to = NumbersWithUnitToken.m_Termins.TryParse(tt, Pullenti.Ner.Core.TerminParseAttr.No); if (to != null) { if ((to.EndToken.Next is Pullenti.Ner.TextToken) && to.EndToken.Next.IsLetters) { } else { break; } } t1 = tt; continue; } Pullenti.Morph.MorphClass mc = tt.GetMorphClassInDictionary(); if (((tt is Pullenti.Ner.TextToken) && tt.Chars.IsLetter && tt.LengthChar > 1) && (((tt.Chars.IsAllUpper || mc.IsAdverb || mc.IsUndefined) || mc.IsAdjective))) { List <UnitToken> uu = UnitToken.TryParseList(tt, addUnits, false); if (uu != null) { if (uu[0].LengthChar > 1 || uu.Count > 1) { units = uu; t1 = (t = uu[uu.Count - 1].EndToken); break; } } t1 = (t = tt); if (internals.Count == 0) { name.EndToken = tt; } continue; } if (tt.IsComma) { continue; } if (tt.IsChar('.')) { if (!Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(tt.Next)) { continue; } List <UnitToken> uu = UnitToken.TryParseList(tt.Next, addUnits, false); if (uu != null) { if (uu[0].LengthChar > 2 || uu.Count > 1) { units = uu; t1 = (t = uu[uu.Count - 1].EndToken); break; } } } break; } t1 = (t = (tt = npt2.EndToken)); if (internals.Count > 0) { } else if (t.IsValue("ПРЕДЕЛ", null) || t.IsValue("ГРАНИЦА", null) || t.IsValue("ДИАПАЗОН", null)) { } else if (t.Chars.IsLetter) { name.EndToken = t1; } } Pullenti.Ner.Token t11 = t1; for (t1 = t1.Next; t1 != null; t1 = t1.Next) { if (t1.IsTableControlChar) { } else if (t1.IsCharOf(":,_")) { if (isResctriction) { return(null); } Pullenti.Ner.MetaToken www = NumbersWithUnitToken._tryParseWHL(t1.Next); if (www != null) { whd = www; t1 = (t = www.EndToken); continue; } List <UnitToken> uu = UnitToken.TryParseList(t1.Next, addUnits, false); if (uu != null) { if (uu[0].LengthChar > 1 || uu.Count > 1) { units = uu; t1 = (t = uu[uu.Count - 1].EndToken); continue; } } if (t1.IsChar(':')) { List <MeasureToken> li = new List <MeasureToken>(); for (Pullenti.Ner.Token ttt = t1.Next; ttt != null; ttt = ttt.Next) { if (ttt.IsHiphen || ttt.IsTableControlChar) { continue; } if ((ttt is Pullenti.Ner.TextToken) && !ttt.Chars.IsLetter) { continue; } MeasureToken mt1 = TryParse(ttt, addUnits, true, true, false, true); if (mt1 == null) { break; } li.Add(mt1); ttt = mt1.EndToken; if (ttt.Next != null && ttt.Next.IsChar(';')) { ttt = ttt.Next; } if (ttt.IsChar(';')) { } else if (ttt.IsNewlineAfter && mt1.IsNewlineBefore) { } else { break; } } if (li.Count > 1) { MeasureToken res0 = new MeasureToken(t0, li[li.Count - 1].EndToken) { Internals = li, IsEmpty = true }; if (internals != null && internals.Count > 0) { res0.InternalEx = internals[0]; } string nam = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(name, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative); li[0].BeginToken = t0; foreach (MeasureToken v in li) { v.Name = string.Format("{0} ({1})", nam, v.Name ?? "").Trim(); if (v.Nums != null && v.Nums.Units.Count == 0 && units != null) { v.Nums.Units = units; } } return(res0); } } } else if (t1.IsHiphen && t1.IsWhitespaceAfter && t1.IsWhitespaceBefore) { } else if (t1.IsHiphen && t1.Next != null && t1.Next.IsChar('(')) { } else { break; } } if (t1 == null) { return(null); } List <NumbersWithUnitToken> mts = NumbersWithUnitToken.TryParseMulti(t1, addUnits, false, not, true, isResctriction); if (mts == null) { if (units != null && units.Count > 0) { if (t1 == null || t1.Previous.IsChar(':')) { mts = new List <NumbersWithUnitToken>(); if (t1 == null) { for (t1 = t11; t1 != null && t1.Next != null; t1 = t1.Next) { } } else { t1 = t1.Previous; } mts.Add(new NumbersWithUnitToken(t0, t1) { SingleVal = double.NaN }); } } if (mts == null) { return(null); } } NumbersWithUnitToken mt = mts[0]; if (mt.BeginToken == mt.EndToken && !(mt.BeginToken is Pullenti.Ner.NumberToken)) { return(null); } if (!isSubval && name.BeginToken.Morph.Class.IsPreposition) { name.BeginToken = name.BeginToken.Next; } if (mt.WHL != null) { whd = mt.WHL; } for (int kk = 0; kk < 10; kk++) { if (whd != null && whd.EndToken == name.EndToken) { name.EndToken = whd.BeginToken.Previous; continue; } if (units != null) { if (units[units.Count - 1].EndToken == name.EndToken) { name.EndToken = units[0].BeginToken.Previous; continue; } } break; } if (mts.Count > 1 && internals.Count == 0) { if (mt.Units.Count == 0) { if (units != null) { foreach (NumbersWithUnitToken m in mts) { m.Units = units; } } } MeasureToken res1 = new MeasureToken(t0, mts[mts.Count - 1].EndToken) { Morph = name.Morph, Reliable = true }; res1.Name = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(name, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative); for (int k = 0; k < mts.Count; k++) { MeasureToken ttt = new MeasureToken(mts[k].BeginToken, mts[k].EndToken) { Nums = mts[k] }; if (whd != null) { List <string> nams = whd.Tag as List <string>; if (k < nams.Count) { ttt.Name = nams[k]; } } res1.Internals.Add(ttt); } Pullenti.Ner.Token tt1 = res1.EndToken.Next; if (tt1 != null && tt1.IsChar('±')) { NumbersWithUnitToken nn = NumbersWithUnitToken._tryParse(tt1, addUnits, true, false, false); if (nn != null && nn.PlusMinusPercent) { res1.EndToken = nn.EndToken; res1.Nums = nn; if (nn.Units.Count > 0 && units == null && mt.Units.Count == 0) { foreach (NumbersWithUnitToken m in mts) { m.Units = nn.Units; } } } } return(res1); } if (!mt.IsWhitespaceBefore) { if (mt.BeginToken.Previous == null) { return(null); } if (mt.BeginToken.Previous.IsCharOf(":),") || mt.BeginToken.Previous.IsTableControlChar || mt.BeginToken.Previous.IsValue("IP", null)) { } else if (mt.BeginToken.IsHiphen && mt.Units.Count > 0 && !mt.Units[0].IsDoubt) { } else { return(null); } } if (mt.Units.Count == 0 && units != null) { mt.Units = units; if (mt.DivNum != null && units.Count > 1 && mt.DivNum.Units.Count == 0) { for (int i = 1; i < units.Count; i++) { if (units[i].Pow == -1) { for (int j = i; j < units.Count; j++) { mt.DivNum.Units.Add(units[j]); units[j].Pow = -units[j].Pow; } mt.Units.RemoveRange(i, units.Count - i); break; } } } } if ((minmax < 0) && mt.SingleVal != null) { mt.FromVal = mt.SingleVal; mt.FromInclude = true; mt.SingleVal = null; } if (minmax > 0 && mt.SingleVal != null) { mt.ToVal = mt.SingleVal; mt.ToInclude = true; mt.SingleVal = null; } if (mt.Units.Count == 0) { units = UnitToken.TryParseList(mt.EndToken.Next, addUnits, true); if (units == null) { if (canUnitsAbsent) { } else { return(null); } } else { mt.Units = units; } } MeasureToken res = new MeasureToken(t0, mt.EndToken) { Morph = name.Morph, Internals = internals }; if (((!t0.IsWhitespaceBefore && t0.Previous != null && t0 == name.BeginToken) && t0.Previous.IsHiphen && !t0.Previous.IsWhitespaceBefore) && (t0.Previous.Previous is Pullenti.Ner.TextToken)) { name.BeginToken = (res.BeginToken = name.BeginToken.Previous.Previous); } res.Name = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(name, (!isSubval ? Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominative : Pullenti.Ner.Core.GetTextAttr.No)); res.Nums = mt; foreach (UnitToken u in res.Nums.Units) { if (u.Keyword != null) { if (u.Keyword.BeginChar >= res.BeginChar) { res.Reliable = true; } } } res._parseInternals(addUnits); if (res.Internals.Count > 0 || !canBeSet) { return(res); } t1 = res.EndToken.Next; if (t1 != null && t1.IsCommaAnd) { t1 = t1.Next; } List <NumbersWithUnitToken> mts1 = NumbersWithUnitToken.TryParseMulti(t1, addUnits, false, false, false, false); if ((mts1 != null && mts1.Count == 1 && (t1.WhitespacesBeforeCount < 3)) && mts1[0].Units.Count > 0 && !UnitToken.CanBeEquals(mts[0].Units, mts1[0].Units)) { res.IsSet = true; res.Nums = null; res.Internals.Add(new MeasureToken(mt.BeginToken, mt.EndToken) { Nums = mt }); res.Internals.Add(new MeasureToken(mts1[0].BeginToken, mts1[0].EndToken) { Nums = mts1[0] }); res.EndToken = mts1[0].EndToken; } return(res); }
/// <summary> /// Попробовать создать именную группу с указанного токена /// </summary> /// <param name="t">начальный токен</param> /// <param name="attrs">атрибуты (можно битовую маску)</param> /// <param name="maxCharPos">максимальная позиция в тексте, до которой выделять (если 0, то без ограничений)</param> /// <param name="noun">это если нужно выделить только прилагательные для ранее выделенного существительного (из другой группы)</param> /// <return>именная группа или null</return> public static NounPhraseToken TryParse(Pullenti.Ner.Token t, NounPhraseParseAttr attrs = NounPhraseParseAttr.No, int maxCharPos = 0, Pullenti.Ner.MetaToken noun = null) { NounPhraseToken res = _NounPraseHelperInt.TryParse(t, attrs, maxCharPos, noun as Pullenti.Ner.Core.Internal.NounPhraseItem); if (res != null) { if (((attrs & NounPhraseParseAttr.ParsePreposition)) != NounPhraseParseAttr.No) { if (res.BeginToken == res.EndToken && t.Morph.Class.IsPreposition) { PrepositionToken prep = PrepositionHelper.TryParse(t); if (prep != null) { NounPhraseToken res2 = _NounPraseHelperInt.TryParse(t.Next, attrs, maxCharPos, noun as Pullenti.Ner.Core.Internal.NounPhraseItem); if (res2 != null) { if (!((prep.NextCase & res2.Morph.Case)).IsUndefined) { res2.Morph.RemoveItems(prep.NextCase); res2.Preposition = prep; res2.BeginToken = t; return(res2); } } } } } return(res); } if (((attrs & NounPhraseParseAttr.ParsePreposition)) != NounPhraseParseAttr.No) { PrepositionToken prep = PrepositionHelper.TryParse(t); if (prep != null && (prep.NewlinesAfterCount < 2)) { res = _NounPraseHelperInt.TryParse(prep.EndToken.Next, attrs, maxCharPos, noun as Pullenti.Ner.Core.Internal.NounPhraseItem); if (res != null) { res.Preposition = prep; res.BeginToken = t; if (!((prep.NextCase & res.Morph.Case)).IsUndefined) { res.Morph.RemoveItems(prep.NextCase); } else if (t.Morph.Class.IsAdverb) { return(null); } return(res); } } } return(null); }
internal static NumbersWithUnitToken _tryParse(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection addUnits, bool second, bool canOmitNumber, bool canBeNan) { if (t == null) { return(null); } while (t != null) { if (t.IsCommaAnd || t.IsValue("НО", null)) { t = t.Next; } else { break; } } Pullenti.Ner.Token t0 = t; bool about = false; bool hasKeyw = false; bool isDiapKeyw = false; int minMax = 0; Pullenti.Ner.Token ttt = _isMinOrMax(t, ref minMax); if (ttt != null) { t = ttt.Next; if (t == null) { return(null); } } if (t == null) { return(null); } if (t.IsChar('~') || t.IsValue("ОКОЛО", null) || t.IsValue("ПРИМЕРНО", null)) { t = t.Next; about = true; hasKeyw = true; if (t == null) { return(null); } } if (t.IsValue("В", null) && t.Next != null) { if (t.Next.IsValue("ПРЕДЕЛ", null) || t.IsValue("ДИАПАЗОН", null)) { t = t.Next.Next; if (t == null) { return(null); } isDiapKeyw = true; } } if (t0.IsChar('(')) { NumbersWithUnitToken mt0 = _tryParse(t.Next, addUnits, false, false, false); if (mt0 != null && mt0.EndToken.Next != null && mt0.EndToken.Next.IsChar(')')) { if (second) { if (mt0.FromVal != null && mt0.ToVal != null && mt0.FromVal.Value == (-mt0.ToVal.Value)) { } else { return(null); } } mt0.BeginToken = t0; mt0.EndToken = mt0.EndToken.Next; List <UnitToken> uu = UnitToken.TryParseList(mt0.EndToken.Next, addUnits, false); if (uu != null && mt0.Units.Count == 0) { mt0.Units = uu; mt0.EndToken = uu[uu.Count - 1].EndToken; } return(mt0); } } bool plusminus = false; bool unitBefore = false; bool isAge = false; DiapTyp dty = DiapTyp.Undefined; Pullenti.Ner.MetaToken whd = null; List <UnitToken> uni = null; Pullenti.Ner.Core.TerminToken tok = (m_Termins == null ? null : m_Termins.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No)); if (tok != null) { if (tok.EndToken.IsValue("СТАРШЕ", null) || tok.EndToken.IsValue("МЛАДШЕ", null)) { isAge = true; } t = tok.EndToken.Next; dty = (DiapTyp)tok.Termin.Tag; hasKeyw = true; if (!tok.IsWhitespaceAfter) { if (t == null) { return(null); } if (t is Pullenti.Ner.NumberToken) { if (tok.BeginToken == tok.EndToken && !tok.Chars.IsAllLower) { return(null); } } else if (t.IsComma && t.Next != null && t.Next.IsValue("ЧЕМ", null)) { t = t.Next.Next; if (t != null && t.Morph.Class.IsPreposition) { t = t.Next; } } else if (t.IsCharOf(":,(") || t.IsTableControlChar) { } else { return(null); } } if (t != null && t.IsChar('(')) { uni = UnitToken.TryParseList(t.Next, addUnits, false); if (uni != null) { t = uni[uni.Count - 1].EndToken.Next; while (t != null) { if (t.IsCharOf("):")) { t = t.Next; } else { break; } } NumbersWithUnitToken mt0 = _tryParse(t, addUnits, false, canOmitNumber, false); if (mt0 != null && mt0.Units.Count == 0) { mt0.BeginToken = t0; mt0.Units = uni; return(mt0); } } whd = _tryParseWHL(t); if (whd != null) { t = whd.EndToken.Next; } } else if (t != null && t.IsValue("IP", null)) { uni = UnitToken.TryParseList(t, addUnits, false); if (uni != null) { t = uni[uni.Count - 1].EndToken.Next; } } if ((t != null && t.IsHiphen && t.IsWhitespaceBefore) && t.IsWhitespaceAfter) { t = t.Next; } } else if (t.IsChar('<')) { dty = DiapTyp.Ls; t = t.Next; hasKeyw = true; if (t != null && t.IsChar('=')) { t = t.Next; dty = DiapTyp.Le; } } else if (t.IsChar('>')) { dty = DiapTyp.Gt; t = t.Next; hasKeyw = true; if (t != null && t.IsChar('=')) { t = t.Next; dty = DiapTyp.Ge; } } else if (t.IsChar('≤')) { dty = DiapTyp.Le; hasKeyw = true; t = t.Next; } else if (t.IsChar('≥')) { dty = DiapTyp.Ge; hasKeyw = true; t = t.Next; } else if (t.IsValue("IP", null)) { uni = UnitToken.TryParseList(t, addUnits, false); if (uni != null) { t = uni[uni.Count - 1].EndToken.Next; } } else if (t.IsValue("ЗА", null) && (t.Next is Pullenti.Ner.NumberToken)) { dty = DiapTyp.Ge; t = t.Next; } while (t != null && ((t.IsCharOf(":,") || t.IsValue("ЧЕМ", null) || t.IsTableControlChar))) { t = t.Next; } if (t != null) { if (t.IsChar('+') || t.IsValue("ПЛЮС", null)) { t = t.Next; if (t != null && !t.IsWhitespaceBefore) { if (t.IsHiphen) { t = t.Next; plusminus = true; } else if ((t.IsCharOf("\\/") && t.Next != null && !t.IsNewlineAfter) && t.Next.IsHiphen) { t = t.Next.Next; plusminus = true; } } } else if (second && (t.IsCharOf("\\/÷…~"))) { t = t.Next; } else if ((t.IsHiphen && t == t0 && !second) && m_Termins.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No) != null) { tok = m_Termins.TryParse(t.Next, Pullenti.Ner.Core.TerminParseAttr.No); t = tok.EndToken.Next; dty = (DiapTyp)tok.Termin.Tag; } else if (t.IsHiphen && t == t0 && ((t.IsWhitespaceAfter || second))) { t = t.Next; } else if (t.IsChar('±')) { t = t.Next; plusminus = true; hasKeyw = true; } else if ((second && t.IsChar('.') && t.Next != null) && t.Next.IsChar('.')) { t = t.Next.Next; if (t != null && t.IsChar('.')) { t = t.Next; } } } Pullenti.Ner.NumberToken num = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t, true, false); if (num == null) { uni = UnitToken.TryParseList(t, addUnits, false); if (uni != null) { unitBefore = true; t = uni[uni.Count - 1].EndToken.Next; bool delim = false; while (t != null) { if (t.IsCharOf(":,")) { delim = true; t = t.Next; } else if (t.IsHiphen && t.IsWhitespaceAfter) { delim = true; t = t.Next; } else { break; } } if (!delim) { if (t == null) { if (hasKeyw && canBeNan) { } else { return(null); } } else if (!t.IsWhitespaceBefore) { return(null); } if (t.Next != null && t.IsHiphen && t.IsWhitespaceAfter) { delim = true; t = t.Next; } } num = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t, true, false); } } NumbersWithUnitToken res = null; double rval = (double)0; if (num == null) { Pullenti.Ner.Core.TerminToken tt = m_Spec.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tt != null) { rval = (double)tt.Termin.Tag; string unam = (string)tt.Termin.Tag2; foreach (Unit u in UnitsHelper.Units) { if (u.FullnameCyr == unam) { uni = new List <UnitToken>(); uni.Add(new UnitToken(t, t) { Unit = u }); break; } } if (uni == null) { return(null); } res = new NumbersWithUnitToken(t0, tt.EndToken) { About = about }; t = tt.EndToken.Next; } else { if (!canOmitNumber && !hasKeyw && !canBeNan) { return(null); } if ((uni != null && uni.Count == 1 && uni[0].BeginToken == uni[0].EndToken) && uni[0].LengthChar > 3) { rval = 1; res = new NumbersWithUnitToken(t0, uni[uni.Count - 1].EndToken) { About = about }; t = res.EndToken.Next; } else if (hasKeyw && canBeNan) { rval = double.NaN; res = new NumbersWithUnitToken(t0, t0) { About = about }; if (t != null) { res.EndToken = t.Previous; } else { for (t = t0; t != null; t = t.Next) { res.EndToken = t; } } } else { return(null); } } } else { if ((t == t0 && t0.IsHiphen && !t.IsWhitespaceBefore) && !t.IsWhitespaceAfter && (num.RealValue < 0)) { num = Pullenti.Ner.Core.NumberHelper.TryParseRealNumber(t.Next, true, false); if (num == null) { return(null); } } if (t == t0 && (t is Pullenti.Ner.NumberToken) && t.Morph.Class.IsAdjective) { Pullenti.Ner.TextToken nn = (t as Pullenti.Ner.NumberToken).EndToken as Pullenti.Ner.TextToken; if (nn == null) { return(null); } string norm = nn.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false); if ((norm.EndsWith("Ь") || norm == "ЧЕТЫРЕ" || norm == "ТРИ") || norm == "ДВА") { } else { Pullenti.Morph.MorphWordForm mi = Pullenti.Morph.MorphologyService.GetWordBaseInfo("КОКО" + nn.Term, null, false, false); if (mi.Class.IsAdjective) { return(null); } } } t = num.EndToken.Next; res = new NumbersWithUnitToken(t0, num.EndToken) { About = about }; rval = num.RealValue; } if (uni == null) { uni = UnitToken.TryParseList(t, addUnits, false); if (uni != null) { if ((plusminus && second && uni.Count >= 1) && uni[0].Unit == UnitsHelper.uPercent) { res.EndToken = uni[0].EndToken; res.PlusMinusPercent = true; Pullenti.Ner.Token tt1 = uni[0].EndToken.Next; uni = UnitToken.TryParseList(tt1, addUnits, false); if (uni != null) { res.Units = uni; res.EndToken = uni[uni.Count - 1].EndToken; } } else { res.Units = uni; res.EndToken = uni[uni.Count - 1].EndToken; } t = res.EndToken.Next; } } else { res.Units = uni; if (uni.Count > 1) { List <UnitToken> uni1 = UnitToken.TryParseList(t, addUnits, false); if (((uni1 != null && uni1[0].Unit == uni[0].Unit && (uni1.Count < uni.Count)) && uni[uni1.Count].Pow == -1 && uni1[uni1.Count - 1].EndToken.Next != null) && uni1[uni1.Count - 1].EndToken.Next.IsCharOf("/\\")) { NumbersWithUnitToken num2 = _tryParse(uni1[uni1.Count - 1].EndToken.Next.Next, addUnits, false, false, false); if (num2 != null && num2.Units != null && num2.Units[0].Unit == uni[uni1.Count].Unit) { res.Units = uni1; res.DivNum = num2; res.EndToken = num2.EndToken; } } } } res.WHL = whd; if (dty != DiapTyp.Undefined) { if (dty == DiapTyp.Ge || dty == DiapTyp.From) { res.FromInclude = true; res.FromVal = rval; } else if (dty == DiapTyp.Gt) { res.FromInclude = false; res.FromVal = rval; } else if (dty == DiapTyp.Le || dty == DiapTyp.To) { res.ToInclude = true; res.ToVal = rval; } else if (dty == DiapTyp.Ls) { res.ToInclude = false; res.ToVal = rval; } } bool isSecondMax = false; if (!second) { int iii = 0; ttt = _isMinOrMax(t, ref iii); if (ttt != null && iii > 0) { isSecondMax = true; t = ttt.Next; } } NumbersWithUnitToken next = (second || plusminus || ((t != null && ((t.IsTableControlChar || t.IsNewlineBefore)))) ? null : _tryParse(t, addUnits, true, false, canBeNan)); if (next != null && (t.Previous is Pullenti.Ner.NumberToken)) { if (MeasureHelper.IsMultChar((t.Previous as Pullenti.Ner.NumberToken).EndToken)) { next = null; } } if (next != null && ((next.ToVal != null || next.SingleVal != null)) && next.FromVal == null) { if ((((next.BeginToken.IsChar('+') && next.SingleVal != null && !double.IsNaN(next.SingleVal.Value)) && next.EndToken.Next != null && next.EndToken.Next.IsCharOf("\\/")) && next.EndToken.Next.Next != null && next.EndToken.Next.Next.IsHiphen) && !hasKeyw && !double.IsNaN(rval)) { NumbersWithUnitToken next2 = _tryParse(next.EndToken.Next.Next.Next, addUnits, true, false, false); if (next2 != null && next2.SingleVal != null && !double.IsNaN(next2.SingleVal.Value)) { res.FromVal = rval - next2.SingleVal.Value; res.FromInclude = true; res.ToVal = rval + next.SingleVal.Value; res.ToInclude = true; if (next2.Units != null && res.Units.Count == 0) { res.Units = next2.Units; } res.EndToken = next2.EndToken; return(res); } } if (next.Units.Count > 0) { if (res.Units.Count == 0) { res.Units = next.Units; } else if (!UnitToken.CanBeEquals(res.Units, next.Units)) { next = null; } } else if (res.Units.Count > 0 && !unitBefore && !next.PlusMinusPercent) { next = null; } if (next != null) { res.EndToken = next.EndToken; } if (next != null && next.ToVal != null) { res.ToVal = next.ToVal; res.ToInclude = next.ToInclude; } else if (next != null && next.SingleVal != null) { if (next.BeginToken.IsCharOf("/\\")) { res.DivNum = next; res.SingleVal = rval; return(res); } else if (next.PlusMinusPercent) { res.SingleVal = rval; res.PlusMinus = next.SingleVal; res.PlusMinusPercent = true; res.ToInclude = true; } else { res.ToVal = next.SingleVal; res.ToInclude = true; } } if (next != null) { if (res.FromVal == null) { res.FromVal = rval; res.FromInclude = true; } return(res); } } else if ((next != null && next.FromVal != null && next.ToVal != null) && next.ToVal.Value == (-next.FromVal.Value)) { if (next.Units.Count == 1 && next.Units[0].Unit == UnitsHelper.uPercent && res.Units.Count > 0) { res.SingleVal = rval; res.PlusMinus = next.ToVal.Value; res.PlusMinusPercent = true; res.EndToken = next.EndToken; return(res); } if (next.Units.Count == 0) { res.SingleVal = rval; res.PlusMinus = next.ToVal.Value; res.EndToken = next.EndToken; return(res); } res.FromVal = next.FromVal + rval; res.FromInclude = true; res.ToVal = next.ToVal + rval; res.ToInclude = true; res.EndToken = next.EndToken; if (next.Units.Count > 0) { res.Units = next.Units; } return(res); } if (dty == DiapTyp.Undefined) { if (plusminus && ((!res.PlusMinusPercent || !second))) { res.FromInclude = true; res.FromVal = -rval; res.ToInclude = true; res.ToVal = rval; } else { res.SingleVal = rval; res.PlusMinusPercent = plusminus; } } if (isAge) { res.IsAge = true; } return(res); }
public static List <NumbersWithUnitToken> TryParseMulti(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection addUnits, bool canOmitNumber = false, bool not = false, bool canBeNon = false, bool isResctriction = false) { if (t == null || (t is Pullenti.Ner.ReferentToken)) { return(null); } Pullenti.Ner.Token tt0 = t; if (tt0.IsChar('(')) { Pullenti.Ner.MetaToken whd = _tryParseWHL(tt0); if (whd != null) { tt0 = whd.EndToken; } List <NumbersWithUnitToken> res0 = TryParseMulti(tt0.Next, addUnits, false, canOmitNumber, canBeNon, false); if (res0 != null) { res0[0].WHL = whd; Pullenti.Ner.Token tt2 = res0[res0.Count - 1].EndToken.Next; if (tt2 != null && tt2.IsCharOf(",")) { tt2 = tt2.Next; } if (whd != null) { return(res0); } if (tt2 != null && tt2.IsChar(')')) { res0[res0.Count - 1].EndToken = tt2; return(res0); } } } NumbersWithUnitToken mt = TryParse(t, addUnits, canOmitNumber, not, canBeNon, isResctriction); if (mt == null) { return(null); } List <NumbersWithUnitToken> res = new List <NumbersWithUnitToken>(); Pullenti.Ner.Token nnn = null; if (mt.WhitespacesAfterCount < 2) { if (MeasureHelper.IsMultChar(mt.EndToken.Next)) { nnn = mt.EndToken.Next.Next; } else if ((mt.EndToken is Pullenti.Ner.NumberToken) && MeasureHelper.IsMultChar((mt.EndToken as Pullenti.Ner.NumberToken).EndToken)) { nnn = mt.EndToken.Next; } } if (nnn != null) { NumbersWithUnitToken mt2 = NumbersWithUnitToken.TryParse(nnn, addUnits, not, false, false, false); if (mt2 != null) { NumbersWithUnitToken mt3 = null; nnn = null; if (mt2.WhitespacesAfterCount < 2) { if (MeasureHelper.IsMultChar(mt2.EndToken.Next)) { nnn = mt2.EndToken.Next.Next; } else if ((mt2.EndToken is Pullenti.Ner.NumberToken) && MeasureHelper.IsMultChar((mt2.EndToken as Pullenti.Ner.NumberToken).EndToken)) { nnn = mt2.EndToken.Next; } } if (nnn != null) { mt3 = NumbersWithUnitToken.TryParse(nnn, addUnits, false, false, false, false); } if (mt3 == null) { Pullenti.Ner.Token tt2 = mt2.EndToken.Next; if (tt2 != null && !tt2.IsWhitespaceBefore) { if (!tt2.IsCharOf(",.;")) { return(null); } } } if (mt3 != null && mt3.Units.Count > 0) { if (mt2.Units.Count == 0) { mt2.Units = mt3.Units; } } res.Add(mt); if (mt2 != null) { if (mt2.Units.Count > 0 && mt.Units.Count == 0) { mt.Units = mt2.Units; } res.Add(mt2); if (mt3 != null) { res.Add(mt3); } } return(res); } } if ((!mt.IsWhitespaceAfter && MeasureHelper.IsMultCharEnd(mt.EndToken.Next) && (mt.EndToken.Next.Next is Pullenti.Ner.NumberToken)) && mt.Units.Count == 0) { string utxt = (mt.EndToken.Next as Pullenti.Ner.TextToken).Term; utxt = utxt.Substring(0, utxt.Length - 1); List <Pullenti.Ner.Core.Termin> terms = UnitsHelper.Termins.FindTerminsByString(utxt, null); if (terms != null && terms.Count > 0) { mt.Units.Add(new UnitToken(mt.EndToken.Next, mt.EndToken.Next) { Unit = terms[0].Tag as Unit }); mt.EndToken = mt.EndToken.Next; List <NumbersWithUnitToken> res1 = TryParseMulti(mt.EndToken.Next, addUnits, false, false, false, false); if (res1 != null) { res1.Insert(0, mt); return(res1); } } } res.Add(mt); return(res); }
public static Pullenti.Ner.MetaToken _tryParseWHL(Pullenti.Ner.Token t) { if (!(t is Pullenti.Ner.TextToken)) { return(null); } if (t.IsCharOf(":-")) { Pullenti.Ner.MetaToken re0 = _tryParseWHL(t.Next); if (re0 != null) { return(re0); } } if (t.IsCharOf("(")) { Pullenti.Ner.MetaToken re0 = _tryParseWHL(t.Next); if (re0 != null) { if (re0.EndToken.Next != null && re0.EndToken.Next.IsChar(')')) { re0.BeginToken = t; re0.EndToken = re0.EndToken.Next; return(re0); } } } string txt = (t as Pullenti.Ner.TextToken).Term; List <string> nams = null; if (txt.Length == 5 && ((txt[1] == 'Х' || txt[1] == 'X')) && ((txt[3] == 'Х' || txt[3] == 'X'))) { nams = new List <string>(); for (int i = 0; i < 3; i++) { char ch = txt[i * 2]; if (ch == 'Г') { nams.Add("ГЛУБИНА"); } else if (ch == 'В' || ch == 'H' || ch == 'Н') { nams.Add("ВЫСОТА"); } else if (ch == 'Ш' || ch == 'B' || ch == 'W') { nams.Add("ШИРИНА"); } else if (ch == 'Д' || ch == 'L') { nams.Add("ДЛИНА"); } else if (ch == 'D') { nams.Add("ДИАМЕТР"); } else { return(null); } } return(new Pullenti.Ner.MetaToken(t, t) { Tag = nams }); } Pullenti.Ner.Token t0 = t; Pullenti.Ner.Token t1 = t; for (; t != null; t = t.Next) { if (!(t is Pullenti.Ner.TextToken) || ((t.WhitespacesBeforeCount > 1 && t != t0))) { break; } string term = (t as Pullenti.Ner.TextToken).Term; if (term.EndsWith("X") || term.EndsWith("Х")) { term = term.Substring(0, term.Length - 1); } string nam = null; if (((t.IsValue("ДЛИНА", null) || t.IsValue("ДЛИННА", null) || term == "Д") || term == "ДЛ" || term == "ДЛИН") || term == "L") { nam = "ДЛИНА"; } else if (((t.IsValue("ШИРИНА", null) || t.IsValue("ШИРОТА", null) || term == "Ш") || term == "ШИР" || term == "ШИРИН") || term == "W" || term == "B") { nam = "ШИРИНА"; } else if ((t.IsValue("ГЛУБИНА", null) || term == "Г" || term == "ГЛ") || term == "ГЛУБ") { nam = "ГЛУБИНА"; } else if ((t.IsValue("ВЫСОТА", null) || term == "В" || term == "ВЫС") || term == "H" || term == "Н") { nam = "ВЫСОТА"; } else if (t.IsValue("ДИАМЕТР", null) || term == "D" || term == "ДИАМ") { nam = "ДИАМЕТР"; } else { break; } if (nams == null) { nams = new List <string>(); } nams.Add(nam); t1 = t; if (t.Next != null && t.Next.IsChar('.')) { t1 = (t = t.Next); } if (t.Next == null) { break; } if (MeasureHelper.IsMultChar(t.Next) || t.Next.IsComma || t.Next.IsCharOf("\\/")) { t = t.Next; } } if (nams == null || (nams.Count < 2)) { return(null); } return(new Pullenti.Ner.MetaToken(t0, t1) { Tag = nams }); }
/// <summary> /// Попробовать восстановить последовательность, обрамляемую кавычками или скобками. Поддерживается /// вложенность, возможность отсутствия закрывающего элемента и др. /// </summary> /// <param name="t">начальный токен</param> /// <param name="attrs">параметры выделения</param> /// <param name="maxTokens">максимально токенов (вдруг забыли закрывающую кавычку)</param> /// <return>метатокен BracketSequenceToken</return> public static BracketSequenceToken TryParse(Pullenti.Ner.Token t, BracketParseAttr attrs = BracketParseAttr.No, int maxTokens = 100) { Pullenti.Ner.Token t0 = t; int cou = 0; if (!CanBeStartOfSequence(t0, false, false)) { return(null); } List <Bracket> brList = new List <Bracket>(); brList.Add(new Bracket(t0)); cou = 0; int crlf = 0; Pullenti.Ner.Token last = null; int lev = 1; bool isAssim = brList[0].Char != '«' && m_AssymOPenChars.IndexOf(brList[0].Char) >= 0; bool genCase = false; for (t = t0.Next; t != null; t = t.Next) { if (t.IsTableControlChar) { break; } last = t; if (t.IsCharOf(m_OpenChars) || t.IsCharOf(m_CloseChars)) { if (t.IsNewlineBefore && ((attrs & BracketParseAttr.CanBeManyLines)) == BracketParseAttr.No) { if (t.WhitespacesBeforeCount > 10 || CanBeStartOfSequence(t, false, false)) { if (t.IsChar('(') && !t0.IsChar('(')) { } else { last = t.Previous; break; } } } Bracket bb = new Bracket(t); brList.Add(bb); if (brList.Count > 20) { break; } if ((brList.Count == 3 && brList[1].CanBeOpen && bb.CanBeClose) && MustBeCloseChar(bb.Char, brList[1].Char) && MustBeCloseChar(bb.Char, brList[0].Char)) { bool ok = false; for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } if (tt.IsChar(',')) { break; } if (tt.IsChar('.')) { for (tt = tt.Next; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } else if (tt.IsCharOf(m_OpenChars) || tt.IsCharOf(m_CloseChars)) { Bracket bb2 = new Bracket(tt); if (BracketHelper.CanBeEndOfSequence(tt, false, null, false) && CanBeCloseChar(bb2.Char, brList[0].Char)) { ok = true; } break; } } break; } if (t.IsCharOf(m_OpenChars) || t.IsCharOf(m_CloseChars)) { ok = true; break; } } if (!ok) { break; } } if (isAssim) { if (bb.CanBeOpen && !bb.CanBeClose && bb.Char == brList[0].Char) { lev++; } else if (bb.CanBeClose && !bb.CanBeOpen && m_OpenChars.IndexOf(brList[0].Char) == m_CloseChars.IndexOf(bb.Char)) { lev--; if (lev == 0) { break; } } } } else { if ((++cou) > maxTokens) { break; } if (((attrs & BracketParseAttr.CanContainsVerbs)) == BracketParseAttr.No) { if (t.Morph.Language.IsCyrillic) { if (t.GetMorphClassInDictionary() == Pullenti.Morph.MorphClass.Verb) { if (!t.Morph.Class.IsAdjective && !t.Morph.ContainsAttr("страд.з.", null)) { if (t.Chars.IsAllLower) { string norm = t.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); if (!Pullenti.Morph.LanguageHelper.EndsWith(norm, "СЯ")) { if (brList.Count > 1) { break; } if (brList[0].Char != '(') { break; } } } } } } else if (t.Morph.Language.IsEn) { if (t.Morph.Class == Pullenti.Morph.MorphClass.Verb && t.Chars.IsAllLower) { break; } } Pullenti.Ner.Referent r = t.GetReferent(); if (r != null && r.TypeName == "ADDRESS") { if (!t0.IsChar('(')) { break; } } } } if (((attrs & BracketParseAttr.CanBeManyLines)) != BracketParseAttr.No) { if (t.IsNewlineBefore) { if (t.NewlinesBeforeCount > 1) { break; } crlf++; } continue; } if (t.IsNewlineBefore) { if (t.WhitespacesBeforeCount > 15) { last = t.Previous; break; } crlf++; if (!t.Chars.IsAllLower) { if (MiscHelper.CanBeStartOfSentence(t)) { bool has = false; for (Pullenti.Ner.Token tt = t.Next; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { break; } else if (tt.LengthChar == 1 && tt.IsCharOf(m_OpenChars) && tt.IsWhitespaceBefore) { break; } else if (tt.LengthChar == 1 && tt.IsCharOf(m_CloseChars) && !tt.IsWhitespaceBefore) { has = true; break; } } if (!has) { last = t.Previous; break; } } } if ((t.Previous is Pullenti.Ner.MetaToken) && CanBeEndOfSequence((t.Previous as Pullenti.Ner.MetaToken).EndToken, false, null, false)) { last = t.Previous; break; } } if (crlf > 1) { if (brList.Count > 1) { break; } if (crlf > 10) { break; } } if (t.IsChar(';') && t.IsNewlineAfter) { break; } NounPhraseToken npt = NounPhraseHelper.TryParse(t, NounPhraseParseAttr.No, 0, null); if (npt != null) { if (t.IsNewlineBefore) { genCase = npt.Morph.Case.IsGenitive; } last = (t = npt.EndToken); } } if ((brList.Count == 1 && brList[0].CanBeOpen && (last is Pullenti.Ner.MetaToken)) && last.IsNewlineAfter) { if (BracketHelper.CanBeEndOfSequence((last as Pullenti.Ner.MetaToken).EndToken, false, null, false)) { return(new BracketSequenceToken(t0, last)); } } if ((brList.Count == 1 && brList[0].CanBeOpen && genCase) && last.IsNewlineAfter && crlf <= 2) { return(new BracketSequenceToken(t0, last)); } if (brList.Count < 1) { return(null); } for (int i = 1; i < (brList.Count - 1); i++) { if (brList[i].Char == '<' && brList[i + 1].Char == '>') { brList[i].CanBeOpen = true; brList[i + 1].CanBeClose = true; } } List <BracketSequenceToken> internals = null; while (brList.Count > 3) { int i = brList.Count - 1; if ((brList[i].CanBeClose && brList[i - 1].CanBeOpen && !CanBeCloseChar(brList[i].Char, brList[0].Char)) && CanBeCloseChar(brList[i].Char, brList[i - 1].Char)) { brList.RemoveRange(brList.Count - 2, 2); continue; } break; } while (brList.Count >= 4) { bool changed = false; for (int i = 1; i < (brList.Count - 2); i++) { if ((brList[i].CanBeOpen && !brList[i].CanBeClose && brList[i + 1].CanBeClose) && !brList[i + 1].CanBeOpen) { bool ok = false; if (MustBeCloseChar(brList[i + 1].Char, brList[i].Char) || brList[i].Char != brList[0].Char) { ok = true; if ((i == 1 && ((i + 2) < brList.Count) && brList[i + 2].Char == ')') && brList[i + 1].Char != ')' && CanBeCloseChar(brList[i + 1].Char, brList[i - 1].Char)) { brList[i + 2] = brList[i + 1]; } } else if (i > 1 && ((i + 2) < brList.Count) && MustBeCloseChar(brList[i + 2].Char, brList[i - 1].Char)) { ok = true; } if (ok) { if (internals == null) { internals = new List <BracketSequenceToken>(); } internals.Add(new BracketSequenceToken(brList[i].Source, brList[i + 1].Source)); brList.RemoveRange(i, 2); changed = true; break; } } } if (!changed) { break; } } BracketSequenceToken res = null; if ((brList.Count >= 4 && brList[1].CanBeOpen && brList[2].CanBeClose) && brList[3].CanBeClose && !brList[3].CanBeOpen) { if (CanBeCloseChar(brList[3].Char, brList[0].Char)) { res = new BracketSequenceToken(brList[0].Source, brList[3].Source); if (brList[0].Source.Next != brList[1].Source || brList[2].Source.Next != brList[3].Source) { res.Internal.Add(new BracketSequenceToken(brList[1].Source, brList[2].Source)); } if (internals != null) { res.Internal.AddRange(internals); } } } if ((res == null && brList.Count >= 3 && brList[2].CanBeClose) && !brList[2].CanBeOpen) { if (((attrs & BracketParseAttr.NearCloseBracket)) != BracketParseAttr.No) { if (CanBeCloseChar(brList[1].Char, brList[0].Char)) { return(new BracketSequenceToken(brList[0].Source, brList[1].Source)); } } bool ok = true; if (CanBeCloseChar(brList[2].Char, brList[0].Char) && CanBeCloseChar(brList[1].Char, brList[0].Char) && brList[1].CanBeClose) { for (t = brList[1].Source; t != brList[2].Source && t != null; t = t.Next) { if (t.IsNewlineBefore) { ok = false; break; } if (t.Chars.IsLetter && t.Chars.IsAllLower) { ok = false; break; } NounPhraseToken npt = NounPhraseHelper.TryParse(t, NounPhraseParseAttr.No, 0, null); if (npt != null) { t = npt.EndToken; } } if (ok) { for (t = brList[0].Source.Next; t != brList[1].Source && t != null; t = t.Next) { if (t.IsNewlineBefore) { return(new BracketSequenceToken(brList[0].Source, t.Previous)); } } } int lev1 = 0; for (Pullenti.Ner.Token tt = brList[0].Source.Previous; tt != null; tt = tt.Previous) { if (tt.IsNewlineAfter || tt.IsTableControlChar) { break; } if (!(tt is Pullenti.Ner.TextToken)) { continue; } if (tt.Chars.IsLetter || tt.LengthChar > 1) { continue; } char ch = (tt as Pullenti.Ner.TextToken).Term[0]; if (CanBeCloseChar(ch, brList[0].Char)) { lev1++; } else if (CanBeCloseChar(brList[1].Char, ch)) { lev1--; if (lev1 < 0) { return(new BracketSequenceToken(brList[0].Source, brList[1].Source)); } } } } if (ok && CanBeCloseChar(brList[2].Char, brList[0].Char)) { BracketSequenceToken intern = new BracketSequenceToken(brList[1].Source, brList[2].Source); res = new BracketSequenceToken(brList[0].Source, brList[2].Source); res.Internal.Add(intern); } else if (ok && CanBeCloseChar(brList[2].Char, brList[1].Char) && brList[0].CanBeOpen) { if (CanBeCloseChar(brList[2].Char, brList[0].Char)) { BracketSequenceToken intern = new BracketSequenceToken(brList[1].Source, brList[2].Source); res = new BracketSequenceToken(brList[0].Source, brList[2].Source); res.Internal.Add(intern); } else if (brList.Count == 3) { return(null); } } } if (res == null && brList.Count > 1 && brList[1].CanBeClose) { res = new BracketSequenceToken(brList[0].Source, brList[1].Source); } if (res == null && brList.Count > 1 && CanBeCloseChar(brList[1].Char, brList[0].Char)) { res = new BracketSequenceToken(brList[0].Source, brList[1].Source); } if (res == null && brList.Count == 2 && brList[0].Char == brList[1].Char) { res = new BracketSequenceToken(brList[0].Source, brList[1].Source); } if (res != null && internals != null) { foreach (BracketSequenceToken i in internals) { if (i.BeginChar < res.EndChar) { res.Internal.Add(i); } } } if (res == null) { cou = 0; for (Pullenti.Ner.Token tt = t0.Next; tt != null; tt = tt.Next, cou++) { if (tt.IsTableControlChar) { break; } if (MiscHelper.CanBeStartOfSentence(tt)) { break; } if (maxTokens > 0 && cou > maxTokens) { break; } Pullenti.Ner.MetaToken mt = tt as Pullenti.Ner.MetaToken; if (mt == null) { continue; } if (mt.EndToken is Pullenti.Ner.TextToken) { if ((mt.EndToken as Pullenti.Ner.TextToken).IsCharOf(m_CloseChars)) { Bracket bb = new Bracket(mt.EndToken as Pullenti.Ner.TextToken); if (bb.CanBeClose && CanBeCloseChar(bb.Char, brList[0].Char)) { return(new BracketSequenceToken(t0, tt)); } } } } } return(res); }
public static Pullenti.Semantic.SemObject CreateNptAdj(Pullenti.Semantic.SemGraph gr, Pullenti.Ner.Core.NounPhraseToken npt, Pullenti.Ner.MetaToken a) { if (a.Morph.Class.IsPronoun) { Pullenti.Semantic.SemObject asem = new Pullenti.Semantic.SemObject(gr); gr.Objects.Add(asem); asem.Tokens.Add(a); asem.Typ = (a.BeginToken.Morph.Class.IsPersonalPronoun ? Pullenti.Semantic.SemObjectType.PersonalPronoun : Pullenti.Semantic.SemObjectType.Pronoun); foreach (Pullenti.Morph.MorphBaseInfo it in a.BeginToken.Morph.Items) { Pullenti.Morph.MorphWordForm wf = it as Pullenti.Morph.MorphWordForm; if (wf == null) { continue; } if (!npt.Morph.Case.IsUndefined) { if (((npt.Morph.Case & wf.Case)).IsUndefined) { continue; } } _setMorph(asem, wf); if (asem.Morph.NormalFull == "КАКОВ") { asem.Morph.NormalFull = "КАКОЙ"; } break; } if (asem.Morph.NormalFull == null) { asem.Morph.NormalFull = (asem.Morph.NormalCase = a.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false)); } return(asem); } if (!a.Morph.Class.IsVerb) { Pullenti.Semantic.SemObject asem = new Pullenti.Semantic.SemObject(gr); gr.Objects.Add(asem); asem.Tokens.Add(a); asem.Typ = Pullenti.Semantic.SemObjectType.Adjective; foreach (Pullenti.Morph.MorphBaseInfo wf in a.BeginToken.Morph.Items) { if (wf.CheckAccord(npt.Morph, false, false) && wf.Class.IsAdjective && (wf is Pullenti.Morph.MorphWordForm)) { _setMorph(asem, wf as Pullenti.Morph.MorphWordForm); break; } } if (asem.Morph.NormalCase == null) { asem.Morph.NormalCase = a.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Undefined, Pullenti.Morph.MorphGender.Undefined, false); asem.Morph.NormalFull = a.GetNormalCaseText(Pullenti.Morph.MorphClass.Adjective, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Masculine, false); _setMorph0(asem, a.BeginToken.Morph); } List <Pullenti.Semantic.Utils.DerivateGroup> grs = Pullenti.Semantic.Utils.DerivateService.FindDerivates(asem.Morph.NormalFull, true, null); if (grs != null && grs.Count > 0) { asem.Concept = grs[0]; } return(asem); } return(null); }
static List <SemanticLink> _tryCreateVerb(Pullenti.Ner.Core.VerbPhraseToken vpt1, Pullenti.Ner.MetaToken slave, Pullenti.Semantic.Utils.DerivateGroup gr) { if (slave is Pullenti.Ner.Core.VerbPhraseToken) { return(_tryCreateInf(vpt1, slave as Pullenti.Ner.Core.VerbPhraseToken, gr)); } SemanticAbstractSlave sla2 = slave as SemanticAbstractSlave; List <SemanticLink> res = new List <SemanticLink>(); if (sla2 == null) { return(res); } Pullenti.Semantic.Utils.ControlModelItem cit = FindControlItem(vpt1.LastVerb, gr); string prep = sla2.Preposition; Pullenti.Morph.MorphBaseInfo morph = (Pullenti.Morph.MorphBaseInfo)sla2.Morph; bool isRev1 = vpt1.LastVerb.IsVerbReversive || vpt1.LastVerb.IsVerbPassive; bool noNomin = false; bool noInstr = false; if (prep == null && morph.Case.IsNominative && !vpt1.FirstVerb.IsParticiple) { bool ok = true; bool err = false; Pullenti.Morph.MorphWordForm vm = vpt1.FirstVerb.VerbMorph; if (vm == null) { return(res); } if (vm.Number == Pullenti.Morph.MorphNumber.Singular) { if (morph.Number == Pullenti.Morph.MorphNumber.Plural) { if (!vpt1.FirstVerb.IsVerbInfinitive) { ok = false; } } } if (!CheckMorphAccord(morph, false, vm, false)) { if (!err && !vpt1.FirstVerb.IsVerbInfinitive) { ok = false; } } else if (vm.Misc.Person != Pullenti.Morph.MorphPerson.Undefined) { if (((vm.Misc.Person & Pullenti.Morph.MorphPerson.Third)) == Pullenti.Morph.MorphPerson.Undefined) { if (((vm.Misc.Person & Pullenti.Morph.MorphPerson.First)) == Pullenti.Morph.MorphPerson.First) { if (!morph.ContainsAttr("1 л.", null)) { ok = false; } } if (((vm.Misc.Person & Pullenti.Morph.MorphPerson.Second)) == Pullenti.Morph.MorphPerson.Second) { if (!morph.ContainsAttr("2 л.", null)) { ok = false; } } } } noNomin = true; if (ok) { Pullenti.Semantic.Utils.ControlModelItem cit00 = cit; bool isRev0 = isRev1; if (vpt1.FirstVerb != vpt1.LastVerb && ((vpt1.FirstVerb.IsVerbReversive || vpt1.FirstVerb.IsVerbPassive || vpt1.FirstVerb.Normal == "ИМЕТЬ"))) { cit00 = null; isRev0 = true; List <Pullenti.Semantic.Utils.DerivateGroup> grs = FindDerivates(vpt1.FirstVerb); if (grs != null) { foreach (Pullenti.Semantic.Utils.DerivateGroup gg in grs) { if ((((cit00 = FindControlItem(vpt1.FirstVerb, gg)))) != null) { break; } } } } SemanticLink sl = null; bool addagent = false; if (cit00 == null) { sl = new SemanticLink() { Modelled = true, Role = (isRev0 ? SemanticRole.Pacient : SemanticRole.Agent), Rank = 1, Question = Pullenti.Semantic.Utils.ControlModelQuestion.BaseNominative, IsPassive = isRev0 } } ; else { foreach (KeyValuePair <Pullenti.Semantic.Utils.ControlModelQuestion, SemanticRole> kp in cit00.Links) { Pullenti.Semantic.Utils.ControlModelQuestion q = kp.Key; if (q.Check(null, Pullenti.Morph.MorphCase.Nominative)) { sl = new SemanticLink() { Role = kp.Value, Rank = 2, Question = q, IsPassive = isRev0 }; if (sl.Role == SemanticRole.Agent) { sl.IsPassive = false; } else if (sl.Role == SemanticRole.Pacient && cit00.NominativeCanBeAgentAndPacient && vpt1.LastVerb.IsVerbReversive) { addagent = true; } break; } } } if (sl != null) { if (cit00 == null && morph.Case.IsInstrumental && isRev0) { sl.Rank -= 0.5; } if (morph.Case.IsAccusative) { sl.Rank -= 0.5; } if (sla2.BeginChar > vpt1.BeginChar) { sl.Rank -= 0.5; } if (err) { sl.Rank -= 0.5; } res.Add(sl); if (addagent) { res.Add(new SemanticLink() { Role = SemanticRole.Agent, Rank = sl.Rank, Question = sl.Question }); } } } } if (prep == null && isRev1 && morph.Case.IsInstrumental) { noInstr = true; Pullenti.Semantic.Utils.ControlModelItem cit00 = cit; SemanticLink sl = null; if (cit00 == null) { sl = new SemanticLink() { Modelled = true, Role = SemanticRole.Agent, Rank = 1, Question = Pullenti.Semantic.Utils.ControlModelQuestion.BaseInstrumental, IsPassive = true } } ; else { foreach (KeyValuePair <Pullenti.Semantic.Utils.ControlModelQuestion, SemanticRole> kp in cit00.Links) { Pullenti.Semantic.Utils.ControlModelQuestion q = kp.Key; if (q.Check(null, Pullenti.Morph.MorphCase.Instrumental)) { sl = new SemanticLink() { Role = kp.Value, Rank = 2, Question = q }; if (sl.Role == SemanticRole.Agent) { sl.IsPassive = true; } break; } } } if (sl != null) { if (cit00 == null && morph.Case.IsNominative) { sl.Rank -= 0.5; } if (morph.Case.IsAccusative) { sl.Rank -= 0.5; } if (sla2.BeginChar < vpt1.BeginChar) { sl.Rank -= 0.5; } res.Add(sl); if ((gr != null && gr.Model.Items.Count > 0 && gr.Model.Items[0].Typ == Pullenti.Semantic.Utils.ControlModelItemType.Verb) && gr.Model.Items[0].Links.ContainsKey(Pullenti.Semantic.Utils.ControlModelQuestion.BaseInstrumental)) { sl.Rank = 0; SemanticLink sl0 = new SemanticLink() { Question = sl.Question, Rank = 1, Role = gr.Model.Items[0].Links[Pullenti.Semantic.Utils.ControlModelQuestion.BaseInstrumental] }; res.Insert(0, sl0); } } } if (prep == null && morph.Case.IsDative && ((cit == null || !cit.Links.ContainsKey(Pullenti.Semantic.Utils.ControlModelQuestion.BaseDative)))) { SemanticLink sl = new SemanticLink() { Modelled = cit == null, Role = SemanticRole.Strong, Rank = 1, Question = Pullenti.Semantic.Utils.ControlModelQuestion.BaseDative }; if (morph.Case.IsAccusative || morph.Case.IsNominative) { sl.Rank -= 0.5; } if (vpt1.EndToken.Next != sla2.BeginToken) { sl.Rank -= 0.5; } if (cit != null) { sl.Rank -= 0.5; } res.Add(sl); } _createRoles(cit, prep, morph.Case, res, noNomin, noInstr); if (gr != null && gr.Model.Pacients.Count > 0) { bool ok = false; foreach (string n in gr.Model.Pacients) { if (sla2.Source != null) { if (sla2.Source.EndToken.IsValue(n, null)) { ok = true; break; } } else if (sla2.EndToken.IsValue(n, null)) { ok = true; break; } } if (ok) { if (res.Count == 0) { ok = false; if (prep == null && isRev1 && morph.Case.IsNominative) { ok = true; } else if (prep == null && !isRev1 && morph.Case.IsAccusative) { ok = true; } if (ok) { res.Add(new SemanticLink() { Role = SemanticRole.Pacient, Question = (isRev1 ? Pullenti.Semantic.Utils.ControlModelQuestion.BaseNominative : Pullenti.Semantic.Utils.ControlModelQuestion.BaseAccusative), Idiom = true }); } } else { foreach (SemanticLink r in res) { r.Rank += 4; if (r.Role == SemanticRole.Common) { r.Role = SemanticRole.Strong; } if (vpt1.EndToken.Next == sla2.BeginToken) { r.Rank += 2; } r.Idiom = true; } } } } return(res); }
public SemAttributeEx(Pullenti.Ner.MetaToken mt) { Token = mt; }
static int _analizeListItems(List <FragToken> chi, int ind) { if (ind >= chi.Count) { return(-1); } FragToken res = chi[ind]; Pullenti.Ner.Instrument.InstrumentKind ki = res.Kind; if (((ki == Pullenti.Ner.Instrument.InstrumentKind.Chapter || ki == Pullenti.Ner.Instrument.InstrumentKind.Clause || ki == Pullenti.Ner.Instrument.InstrumentKind.Content) || ki == Pullenti.Ner.Instrument.InstrumentKind.Item || ki == Pullenti.Ner.Instrument.InstrumentKind.Subitem) || ki == Pullenti.Ner.Instrument.InstrumentKind.ClausePart || ki == Pullenti.Ner.Instrument.InstrumentKind.Indention) { } else { return(-1); } if (res.HasChanges && res.MultilineChangesValue != null) { Pullenti.Ner.MetaToken ci = res.MultilineChangesValue; FragToken cit = new FragToken(ci.BeginToken, ci.EndToken) { Kind = Pullenti.Ner.Instrument.InstrumentKind.Citation }; res.Children.Add(cit); if (Pullenti.Ner.Core.BracketHelper.IsBracket(cit.BeginToken.Previous, true)) { cit.BeginToken = cit.BeginToken.Previous; } if (Pullenti.Ner.Core.BracketHelper.IsBracket(cit.EndToken.Next, true)) { cit.EndToken = cit.EndToken.Next; if (cit.EndToken.Next != null && cit.EndToken.Next.IsCharOf(";.")) { cit.EndToken = cit.EndToken.Next; } } res.FillByContentChildren(); if (res.Children[0].HasChanges) { } Pullenti.Ner.Instrument.InstrumentKind citKind = Pullenti.Ner.Instrument.InstrumentKind.Undefined; if (ci.Tag is Pullenti.Ner.Decree.DecreeChangeReferent) { Pullenti.Ner.Decree.DecreeChangeReferent dcr = ci.Tag as Pullenti.Ner.Decree.DecreeChangeReferent; if (dcr.Value != null && dcr.Value.NewItems.Count > 0) { string mnem = dcr.Value.NewItems[0]; int i; if ((((i = mnem.IndexOf(' ')))) > 0) { mnem = mnem.Substring(0, i); } citKind = Pullenti.Ner.Decree.Internal.PartToken._getInstrKindByTyp(Pullenti.Ner.Decree.Internal.PartToken._getTypeByAttrName(mnem)); } else if (dcr.Owners.Count > 0 && (dcr.Owners[0] is Pullenti.Ner.Decree.DecreePartReferent) && dcr.Kind == Pullenti.Ner.Decree.DecreeChangeKind.New) { Pullenti.Ner.Decree.DecreePartReferent pat = dcr.Owners[0] as Pullenti.Ner.Decree.DecreePartReferent; int min = 0; foreach (Pullenti.Ner.Slot s in pat.Slots) { Pullenti.Ner.Decree.Internal.PartToken.ItemType ty = Pullenti.Ner.Decree.Internal.PartToken._getTypeByAttrName(s.TypeName); if (ty == Pullenti.Ner.Decree.Internal.PartToken.ItemType.Undefined) { continue; } int l = Pullenti.Ner.Decree.Internal.PartToken._getRank(ty); if (l == 0) { continue; } if (l > min || min == 0) { min = l; citKind = Pullenti.Ner.Decree.Internal.PartToken._getInstrKindByTyp(ty); } } } } FragToken sub = null; if (citKind != Pullenti.Ner.Instrument.InstrumentKind.Undefined && citKind != Pullenti.Ner.Instrument.InstrumentKind.Appendix) { sub = new FragToken(ci.BeginToken, ci.EndToken); ContentAnalyzeWhapper wr = new ContentAnalyzeWhapper(); wr.Analyze(sub, null, true, citKind); sub.Kind = Pullenti.Ner.Instrument.InstrumentKind.Content; } else { sub = FragToken.CreateDocument(ci.BeginToken, ci.EndChar, citKind); } if (sub == null || sub.Children.Count == 0) { } else if ((sub.Kind == Pullenti.Ner.Instrument.InstrumentKind.Content && sub.Children.Count > 0 && sub.Children[0].BeginToken == sub.BeginToken) && sub.Children[sub.Children.Count - 1].EndToken == sub.EndToken) { cit.Children.AddRange(sub.Children); } else { cit.Children.Add(sub); } return(1); } int endChar = res.EndChar; if (res.Itok == null) { res.Itok = InstrToken1.Parse(res.BeginToken, true, null, 0, null, false, res.EndChar, false, false); } List <LineToken> lines = LineToken.ParseList(res.BeginToken, endChar, null); if (lines == null || (lines.Count < 1)) { return(-1); } int ret = 1; if (res.Kind == Pullenti.Ner.Instrument.InstrumentKind.Content) { for (int j = ind + 1; j < chi.Count; j++) { if (chi[j].Kind == Pullenti.Ner.Instrument.InstrumentKind.Content) { List <LineToken> lines2 = LineToken.ParseList(chi[j].BeginToken, chi[j].EndChar, lines[lines.Count - 1]); if (lines2 == null || (lines2.Count < 1)) { break; } if (!lines2[0].IsListItem) { if ((lines2.Count > 1 && lines2[1].IsListItem && lines2[0].EndToken.IsCharOf(":")) && !lines2[0].BeginToken.Chars.IsCapitalUpper) { lines2[0].IsListItem = true; } else { break; } } lines.AddRange(lines2); ret = (j - ind) + 1; } else if (chi[j].Kind != Pullenti.Ner.Instrument.InstrumentKind.Editions && chi[j].Kind != Pullenti.Ner.Instrument.InstrumentKind.Comment) { break; } } } if (lines.Count < 2) { return(-1); } if ((lines.Count > 1 && lines[0].IsListItem && lines[1].IsListItem) && lines[0].Number != 1) { if (lines.Count == 2 || !lines[2].IsListItem) { lines[0].IsListItem = (lines[1].IsListItem = false); } } for (int i = 0; i < lines.Count; i++) { if (lines[i].IsListItem) { if (i > 0 && lines[i - 1].IsListItem) { continue; } if (((i + 1) < lines.Count) && lines[i + 1].IsListItem) { } else { lines[i].IsListItem = false; continue; } int j; bool newLine = false; for (j = i + 1; j < lines.Count; j++) { if (!lines[j].IsListItem) { break; } else if (lines[j].IsNewlineBefore) { newLine = true; } } if (newLine) { continue; } if (i > 0 && lines[i - 1].EndToken.IsChar(':')) { continue; } for (j = i; j < lines.Count; j++) { if (!lines[j].IsListItem) { break; } else { lines[j].IsListItem = false; } } } } if (lines.Count > 2) { LineToken last = lines[lines.Count - 1]; LineToken last2 = lines[lines.Count - 2]; if ((!last.IsListItem && last.EndToken.IsChar('.') && last2.IsListItem) && last2.EndToken.IsChar(';')) { if ((last.LengthChar < (last2.LengthChar * 2)) || last.BeginToken.Chars.IsAllLower) { last.IsListItem = true; } } } for (int i = 0; i < (lines.Count - 1); i++) { if (!lines[i].IsListItem && !lines[i + 1].IsListItem) { if (((i + 2) < lines.Count) && lines[i + 2].IsListItem && lines[i + 1].EndToken.IsChar(':')) { } else { lines[i].EndToken = lines[i + 1].EndToken; lines.RemoveAt(i + 1); i--; } } } for (int i = 0; i < (lines.Count - 1); i++) { if (lines[i].IsListItem) { if (lines[i].Number == 1) { bool ok = true; int num = 1; int nonum = 0; for (int j = i + 1; j < lines.Count; j++) { if (!lines[j].IsListItem) { ok = false; break; } else if (lines[j].Number > 0) { num++; if (lines[j].Number != num) { ok = false; break; } } else { nonum++; } } if (!ok || nonum == 0 || (num < 2)) { break; } LineToken lt = lines[i]; for (int j = i + 1; j < lines.Count; j++) { if (lines[j].Number > 0) { lt = lines[j]; } else { List <LineToken> chli = lt.Tag as List <LineToken>; if (chli == null) { lt.Tag = (chli = new List <LineToken>()); } lt.EndToken = lines[j].EndToken; chli.Add(lines[j]); lines.RemoveAt(j); j--; } } } } } int cou = 0; foreach (LineToken li in lines) { if (li.IsListItem) { cou++; } } if (cou < 2) { return(-1); } for (int i = 0; i < lines.Count; i++) { if (lines[i].IsListItem) { int i0 = i; bool ok = true; cou = 1; for (; i < lines.Count; i++, cou++) { if (!lines[i].IsListItem) { break; } else if (lines[i].Number != cou) { ok = false; } } if (!ok) { for (i = i0; i < lines.Count; i++) { if (!lines[i].IsListItem) { break; } else { lines[i].Number = 0; } } } if (cou > 3 && lines[i0].BeginToken.GetSourceText() != lines[i0 + 1].BeginToken.GetSourceText() && lines[i0 + 1].BeginToken.GetSourceText() == lines[i0 + 2].BeginToken.GetSourceText()) { string pref = lines[i0 + 1].BeginToken.GetSourceText(); ok = true; for (int j = i0 + 2; j < i; j++) { if (pref != lines[j].BeginToken.GetSourceText()) { ok = false; break; } } if (!ok) { continue; } Pullenti.Ner.Token tt = null; ok = false; for (tt = lines[i0].EndToken.Previous; tt != null && tt != lines[i0].BeginToken; tt = tt.Previous) { if (tt.GetSourceText() == pref) { ok = true; break; } } if (ok) { LineToken li0 = new LineToken(lines[i0].BeginToken, tt.Previous); lines[i0].BeginToken = tt; lines.Insert(i0, li0); i++; } } } } foreach (LineToken li in lines) { li.CorrectBeginToken(); FragToken ch = new FragToken(li.BeginToken, li.EndToken) { Kind = (li.IsListItem ? Pullenti.Ner.Instrument.InstrumentKind.ListItem : Pullenti.Ner.Instrument.InstrumentKind.Content), Number = li.Number }; if (ch.Kind == Pullenti.Ner.Instrument.InstrumentKind.Content && ch.EndToken.IsChar(':')) { ch.Kind = Pullenti.Ner.Instrument.InstrumentKind.ListHead; } res.Children.Add(ch); List <LineToken> chli = li.Tag as List <LineToken>; if (chli != null) { foreach (LineToken lt in chli) { ch.Children.Add(new FragToken(lt.BeginToken, lt.EndToken) { Kind = Pullenti.Ner.Instrument.InstrumentKind.ListItem }); } if (ch.BeginChar < ch.Children[0].BeginChar) { ch.Children.Insert(0, new FragToken(ch.BeginToken, ch.Children[0].BeginToken.Previous) { Kind = Pullenti.Ner.Instrument.InstrumentKind.Content }); } } } return(ret); }