public static List <BlockTitleToken> TryAttachList(Pullenti.Ner.Token t) { BlockTitleToken content = null; BlockTitleToken intro = null; List <BlockTitleToken> lits = null; for (Pullenti.Ner.Token tt = t; tt != null; tt = tt.Next) { if (tt.IsNewlineBefore) { BlockTitleToken btt = TryAttach(tt, false, null); if (btt == null) { continue; } if (btt.Typ == BlkTyps.Index) { content = btt; break; } if (btt.Typ == BlkTyps.Intro) { Pullenti.Ner.Token tt2 = btt.EndToken.Next; for (int k = 0; k < 5; k++) { BlockLine li = BlockLine.Create(tt2, null); if (li == null) { break; } if (li.HasContentItemTail || li.Typ == BlkTyps.IndexItem) { content = btt; break; } if (li.HasVerb) { break; } if (li.Typ != BlkTyps.Undefined) { if ((li.BeginChar - btt.EndChar) < 400) { content = btt; break; } } tt2 = li.EndToken.Next; } if (content == null) { intro = btt; } break; } if (btt.Typ == BlkTyps.Literature) { if (lits == null) { lits = new List <BlockTitleToken>(); } lits.Add(btt); } } } if (content == null && intro == null && ((lits == null || lits.Count != 1))) { return(null); } List <BlockTitleToken> res = new List <BlockTitleToken>(); Pullenti.Ner.Core.TerminCollection chapterNames = new Pullenti.Ner.Core.TerminCollection(); Pullenti.Ner.Token t0 = null; if (content != null) { res.Add(content); int cou = 0; int err = 0; for (Pullenti.Ner.Token tt = content.EndToken.Next; tt != null; tt = tt.Next) { if (!tt.IsNewlineBefore) { continue; } BlockLine li = BlockLine.Create(tt, null); if (li == null) { break; } if (li.HasVerb) { if (li.EndToken.IsChar('.')) { break; } if (li.LengthChar > 100) { break; } } BlockTitleToken btt = TryAttach(tt, true, null); if (btt == null) { continue; } err = 0; if (btt.Typ == BlkTyps.Intro) { if (content.Typ == BlkTyps.Intro || cou > 2) { break; } } cou++; tt = (content.EndToken = btt.EndToken); if (btt.Value != null) { chapterNames.AddString(btt.Value, null, null, false); } } content.Typ = BlkTyps.Index; t0 = content.EndToken.Next; } else if (intro != null) { t0 = intro.BeginToken; } else if (lits != null) { t0 = t; } else { return(null); } bool first = true; for (Pullenti.Ner.Token tt = t0; tt != null; tt = tt.Next) { if (!tt.IsNewlineBefore) { continue; } if (tt.IsValue("СЛАБОЕ", null)) { } BlockTitleToken btt = TryAttach(tt, false, chapterNames); if (btt == null) { continue; } if (res.Count == 104) { } tt = btt.EndToken; if (content != null && btt.Typ == BlkTyps.Index) { continue; } if (res.Count > 0 && res[res.Count - 1].Typ == BlkTyps.Literature) { if (btt.Typ != BlkTyps.Appendix && btt.Typ != BlkTyps.Misc && btt.Typ != BlkTyps.Literature) { if (btt.Typ == BlkTyps.Chapter && (res[res.Count - 1].EndChar < ((tt.Kit.Sofa.Text.Length * 3) / 4))) { } else { continue; } } } if (first) { if ((tt.BeginChar - t0.BeginChar) > 300) { BlockTitleToken btt0 = new BlockTitleToken(t0, (t0.Previous == null ? t0 : t0.Previous)); btt0.Typ = BlkTyps.Chapter; btt0.Value = "Похоже на начало"; res.Add(btt0); } } res.Add(btt); tt = btt.EndToken; first = false; } for (int i = 0; i < (res.Count - 1); i++) { if (res[i].Typ == BlkTyps.Literature && res[i + 1].Typ == res[i].Typ) { res.RemoveAt(i + 1); i--; } } return(res); }
public static BlockLine Create(Pullenti.Ner.Token t, Pullenti.Ner.Core.TerminCollection names) { if (t == null) { return(null); } BlockLine res = new BlockLine(t, t); for (Pullenti.Ner.Token tt = t; tt != null; tt = tt.Next) { if (tt != t && tt.IsNewlineBefore) { break; } else { res.EndToken = tt; } } int nums = 0; while (t != null && t.Next != null && t.EndChar <= res.EndChar) { if (t is Pullenti.Ner.NumberToken) { } else { Pullenti.Ner.NumberToken rom = Pullenti.Ner.Core.NumberHelper.TryParseRoman(t); if (rom != null && rom.EndToken.Next != null) { t = rom.EndToken; } else { break; } } if (t.Next.IsChar('.')) { } else if ((t.Next is Pullenti.Ner.TextToken) && !t.Next.Chars.IsAllLower) { } else { break; } res.NumberEnd = t; t = t.Next; if (t.IsChar('.') && t.Next != null) { res.NumberEnd = t; t = t.Next; } if (t.IsNewlineBefore) { return(res); } nums++; } Pullenti.Ner.Core.TerminToken tok = m_Ontology.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok == null) { Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null && npt1.EndToken != npt1.BeginToken) { tok = m_Ontology.TryParse(npt1.Noun.BeginToken, Pullenti.Ner.Core.TerminParseAttr.No); } } if (tok != null) { if (t.Previous != null && t.Previous.IsChar(':')) { tok = null; } } if (tok != null) { BlkTyps typ = (BlkTyps)tok.Termin.Tag; if (typ == BlkTyps.Conslusion) { if (t.IsNewlineAfter) { } else if (t.Next != null && t.Next.Morph.Class.IsPreposition && t.Next.Next != null) { Pullenti.Ner.Core.TerminToken tok2 = m_Ontology.TryParse(t.Next.Next, Pullenti.Ner.Core.TerminParseAttr.No); if (tok2 != null && ((BlkTyps)tok2.Termin.Tag) == BlkTyps.Chapter) { } else { tok = null; } } else { tok = null; } } if (t.Kit.BaseLanguage != t.Morph.Language) { tok = null; } if (typ == BlkTyps.Index && !t.IsValue("ОГЛАВЛЕНИЕ", null)) { if (!t.IsNewlineAfter && t.Next != null) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.IsNewlineAfter && npt.Morph.Case.IsGenitive) { tok = null; } else if (npt == null) { tok = null; } } } if ((typ == BlkTyps.Intro && tok != null && !tok.IsNewlineAfter) && t.IsValue("ВВЕДЕНИЕ", null)) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t.Next, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null && npt.Morph.Case.IsGenitive) { tok = null; } } if (tok != null) { if (res.NumberEnd == null) { res.NumberEnd = tok.EndToken; if (res.NumberEnd.EndChar > res.EndChar) { res.EndToken = res.NumberEnd; } } res.Typ = typ; t = tok.EndToken; if (t.Next != null && t.Next.IsCharOf(":.")) { t = t.Next; res.EndToken = t; } if (t.IsNewlineAfter || t.Next == null) { return(res); } t = t.Next; } } if (t.IsChar('§') && (t.Next is Pullenti.Ner.NumberToken)) { res.Typ = BlkTyps.Chapter; res.NumberEnd = t; t = t.Next; } if (names != null) { Pullenti.Ner.Core.TerminToken tok2 = names.TryParse(t, Pullenti.Ner.Core.TerminParseAttr.No); if (tok2 != null && tok2.EndToken.IsNewlineAfter) { res.EndToken = tok2.EndToken; res.IsExistName = true; if (res.Typ == BlkTyps.Undefined) { BlockLine li2 = Create((res.NumberEnd == null ? null : res.NumberEnd.Next), null); if (li2 != null && ((li2.Typ == BlkTyps.Literature || li2.Typ == BlkTyps.Intro || li2.Typ == BlkTyps.Conslusion))) { res.Typ = li2.Typ; } else { res.Typ = BlkTyps.Chapter; } } return(res); } } Pullenti.Ner.Token t1 = res.EndToken; if ((((t1 is Pullenti.Ner.NumberToken) || t1.IsChar('.'))) && t1.Previous != null) { t1 = t1.Previous; if (t1.IsChar('.')) { res.HasContentItemTail = true; for (; t1 != null && t1.BeginChar > res.BeginChar; t1 = t1.Previous) { if (!t1.IsChar('.')) { break; } } } } res.IsAllUpper = true; for (; t != null && t.EndChar <= t1.EndChar; t = t.Next) { if (!(t is Pullenti.Ner.TextToken) || !t.Chars.IsLetter) { res.NotWords++; } else { Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary(); if (mc.IsUndefined) { res.NotWords++; } else if (t.LengthChar > 2) { res.Words++; } if (!t.Chars.IsAllUpper) { res.IsAllUpper = false; } if ((t as Pullenti.Ner.TextToken).IsPureVerb) { if (!(t as Pullenti.Ner.TextToken).Term.EndsWith("ING")) { res.HasVerb = true; } } } } if (res.Typ == BlkTyps.Undefined) { Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse((res.NumberEnd == null ? res.BeginToken : res.NumberEnd.Next), Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt != null) { if (npt.Noun.IsValue("ХАРАКТЕРИСТИКА", null) || npt.Noun.IsValue("СОДЕРЖАНИЕ", "ЗМІСТ")) { bool ok = true; for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.IsChar('.')) { continue; } Pullenti.Ner.Core.NounPhraseToken npt2 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt2 == null || !npt2.Morph.Case.IsGenitive) { ok = false; break; } tt = npt2.EndToken; if (tt.EndChar > res.EndChar) { res.EndToken = tt; if (!tt.IsNewlineAfter) { for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next) { if (res.EndToken.IsNewlineAfter) { break; } } } } } if (ok) { res.Typ = BlkTyps.Intro; res.IsExistName = true; } } else if (npt.Noun.IsValue("ВЫВОД", "ВИСНОВОК") || npt.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")) { bool ok = true; for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.IsCharOf(",.") || tt.IsAnd) { continue; } Pullenti.Ner.Core.NounPhraseToken npt1 = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt1 != null) { if (npt1.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ") || npt1.Noun.IsValue("РЕКОМЕНДАЦИЯ", "РЕКОМЕНДАЦІЯ") || npt1.Noun.IsValue("ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ")) { tt = npt1.EndToken; if (tt.EndChar > res.EndChar) { res.EndToken = tt; if (!tt.IsNewlineAfter) { for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next) { if (res.EndToken.IsNewlineAfter) { break; } } } } continue; } } ok = false; break; } if (ok) { res.Typ = BlkTyps.Conslusion; res.IsExistName = true; } } if (res.Typ == BlkTyps.Undefined && npt != null && npt.EndChar <= res.EndChar) { bool ok = false; int publ = 0; if (_isPub(npt)) { ok = true; publ = 1; } else if ((npt.Noun.IsValue("СПИСОК", null) || npt.Noun.IsValue("УКАЗАТЕЛЬ", "ПОКАЖЧИК") || npt.Noun.IsValue("ПОЛОЖЕНИЕ", "ПОЛОЖЕННЯ")) || npt.Noun.IsValue("ВЫВОД", "ВИСНОВОК") || npt.Noun.IsValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")) { if (npt.EndChar == res.EndChar) { return(null); } ok = true; } if (ok) { if (npt.BeginToken == npt.EndToken && npt.Noun.IsValue("СПИСОК", null) && npt.EndChar == res.EndChar) { ok = false; } for (Pullenti.Ner.Token tt = npt.EndToken.Next; tt != null && tt.EndChar <= res.EndChar; tt = tt.Next) { if (tt.IsCharOf(",.:") || tt.IsAnd || tt.Morph.Class.IsPreposition) { continue; } if (tt.IsValue("ОТРАЖЕНЫ", "ВІДОБРАЖЕНІ")) { continue; } npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(tt, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); if (npt == null) { ok = false; break; } if (((_isPub(npt) || npt.Noun.IsValue("РАБОТА", "РОБОТА") || npt.Noun.IsValue("ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ")) || npt.Noun.IsValue("АВТОР", null) || npt.Noun.IsValue("ТРУД", "ПРАЦЯ")) || npt.Noun.IsValue("ТЕМА", null) || npt.Noun.IsValue("ДИССЕРТАЦИЯ", "ДИСЕРТАЦІЯ")) { tt = npt.EndToken; if (_isPub(npt)) { publ++; } if (tt.EndChar > res.EndChar) { res.EndToken = tt; if (!tt.IsNewlineAfter) { for (; res.EndToken.Next != null; res.EndToken = res.EndToken.Next) { if (res.EndToken.IsNewlineAfter) { break; } } } } continue; } ok = false; break; } if (ok) { res.Typ = BlkTyps.Literature; res.IsExistName = true; if (publ == 0 && (res.EndChar < (((res.Kit.Sofa.Text.Length * 2) / 3)))) { if (res.NumberEnd != null) { res.Typ = BlkTyps.Misc; } else { res.Typ = BlkTyps.Undefined; } } } } } } } return(res); }
public static BlockTitleToken TryAttach(Pullenti.Ner.Token t, bool isContentItem = false, Pullenti.Ner.Core.TerminCollection names = null) { if (t == null) { return(null); } if (!t.IsNewlineBefore) { return(null); } if (t.Chars.IsAllLower) { return(null); } BlockLine li = BlockLine.Create(t, names); if (li == null) { return(null); } if (li.Words == 0 && li.Typ == BlkTyps.Undefined) { return(null); } if (li.Typ == BlkTyps.Index) { } if (li.IsExistName) { return new BlockTitleToken(t, li.EndToken) { Typ = li.Typ } } ; if (li.EndToken == li.NumberEnd || ((li.EndToken.IsCharOf(".:") && li.EndToken.Previous == li.NumberEnd))) { BlockTitleToken res2 = new BlockTitleToken(t, li.EndToken) { Typ = li.Typ }; if (li.Typ == BlkTyps.Chapter || li.Typ == BlkTyps.Appendix) { BlockLine li2 = BlockLine.Create(li.EndToken.Next, names); if ((li2 != null && li2.Typ == BlkTyps.Undefined && li2.IsAllUpper) && li2.Words > 0) { res2.EndToken = li2.EndToken; for (Pullenti.Ner.Token tt = res2.EndToken.Next; tt != null; tt = tt.Next) { li2 = BlockLine.Create(tt, names); if (li2 == null) { break; } if (li2.Typ != BlkTyps.Undefined || !li2.IsAllUpper || li2.Words == 0) { break; } tt = (res2.EndToken = li2.EndToken); } } } return(res2); } if (li.NumberEnd == null) { return(null); } BlockTitleToken res = new BlockTitleToken(t, li.EndToken) { Typ = li.Typ }; if (res.Typ == BlkTyps.Undefined) { if (li.Words < 1) { return(null); } if (li.HasVerb) { return(null); } if (!isContentItem) { if (!li.IsAllUpper || li.NotWords > (li.Words / 2)) { return(null); } } res.Typ = BlkTyps.Chapter; if ((li.NumberEnd.EndChar - t.BeginChar) == 7 && li.NumberEnd.Next != null && li.NumberEnd.Next.IsHiphen) { res.Typ = BlkTyps.Undefined; } } if (li.HasContentItemTail && isContentItem) { res.Typ = BlkTyps.IndexItem; } if (res.Typ == BlkTyps.Chapter || res.Typ == BlkTyps.Appendix) { if (li.HasVerb) { return(null); } if (li.NotWords > li.Words && !isContentItem) { return(null); } for (t = li.EndToken.Next; t != null; t = t.Next) { BlockLine li2 = BlockLine.Create(t, names); if (li2 == null) { break; } if (li2.HasVerb || (li2.Words < 1)) { break; } if (!li2.IsAllUpper && !isContentItem) { break; } if (li2.Typ != BlkTyps.Undefined || li2.NumberEnd != null) { break; } t = (res.EndToken = li2.EndToken); if (isContentItem && li2.HasContentItemTail) { res.Typ = BlkTyps.IndexItem; break; } } } for (Pullenti.Ner.Token tt = res.EndToken; tt != null && tt.BeginChar > li.NumberEnd.EndChar; tt = tt.Previous) { if ((tt is Pullenti.Ner.TextToken) && tt.Chars.IsLetter) { res.Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(li.NumberEnd.Next, tt, Pullenti.Ner.Core.GetTextAttr.No); break; } } if ((res.Typ == BlkTyps.Index || res.Typ == BlkTyps.Intro || res.Typ == BlkTyps.Conslusion) || res.Typ == BlkTyps.Literature) { if (res.Value != null && res.Value.Length > 100) { return(null); } if (li.Words < li.NotWords) { return(null); } } return(res); } }