static bool _canBeEquals(InstrToken1 i1, InstrToken1 i2) { if (i1.Typ != i2.Typ) { return(false); } if (i1.Numbers.Count > 0 && i2.Numbers.Count > 0) { if (i1.Numbers.Count != i2.Numbers.Count) { return(false); } for (int i = 0; i < i1.Numbers.Count; i++) { if (i1.Numbers[i] != i2.Numbers[i]) { return(false); } } } if (!Pullenti.Ner.Core.MiscHelper.CanBeEqualsEx(i1.Value, i2.Value, Pullenti.Ner.Core.CanBeEqualsAttr.IgnoreNonletters | Pullenti.Ner.Core.CanBeEqualsAttr.IgnoreUppercase)) { return(false); } return(true); }
public static void ParseNumber(Pullenti.Ner.Token t, InstrToken1 res, InstrToken1 prev) { _parseNumber(t, res, prev); if ((res.Numbers.Count > 0 && res.NumEndToken != null && !res.IsNewlineAfter) && res.NumEndToken.Next != null && res.NumEndToken.Next.IsHiphen) { InstrToken1 res1 = new InstrToken1(res.NumEndToken.Next.Next, res.NumEndToken.Next.Next); _parseNumber(res1.BeginToken, res1, res); if (res1.Numbers.Count == res.Numbers.Count) { int i; for (i = 0; i < (res.Numbers.Count - 1); i++) { if (res.Numbers[i] != res1.Numbers[i]) { break; } } if (i >= (res.Numbers.Count - 1) && (res.LastNumber < res1.LastNumber) && res1.NumEndToken != null) { res.MinNumber = res.Numbers[res.Numbers.Count - 1]; res.Numbers[res.Numbers.Count - 1] = res1.Numbers[res.Numbers.Count - 1]; res.NumSuffix = res1.NumSuffix; res.EndToken = (res.NumEndToken = res1.NumEndToken); } } } if (res.Numbers.Count > 0 && res.NumEndToken != null && res.Typ == InstrToken1.Types.Line) { Pullenti.Ner.Token tt = res.NumEndToken; bool ok = true; if (tt.Next != null && tt.Next.IsHiphen) { ok = false; } else if (!tt.IsWhitespaceAfter) { if (tt.Next != null && ((tt.Next.Chars.IsCapitalUpper || tt.Next.Chars.IsAllUpper || (tt.Next is Pullenti.Ner.ReferentToken)))) { } else { ok = false; } } if (!ok) { res.Numbers.Clear(); res.NumEndToken = (res.NumBeginToken = null); } } }
public static void CreateNumber(FragToken owner, InstrToken1 itok) { if (itok.NumBeginToken == null || itok.NumEndToken == null) { return; } FragToken num = new FragToken(itok.NumBeginToken, itok.NumEndToken) { Kind = Pullenti.Ner.Instrument.InstrumentKind.Number, DefVal = true, Itok = itok }; owner.Children.Add(num); if (itok.NumTyp == NumberTypes.TwoDigits) { owner.Number = itok.FirstNumber; owner.SubNumber = itok.LastNumber; } else if (itok.NumTyp == NumberTypes.ThreeDigits) { owner.Number = itok.FirstNumber; owner.SubNumber = itok.MiddleNumber; owner.SubNumber2 = itok.LastNumber; } else if (itok.NumTyp == NumberTypes.FourDigits && itok.Numbers.Count == 4) { owner.Number = itok.FirstNumber; owner.SubNumber = Pullenti.Ner.Decree.Internal.PartToken.GetNumber(itok.Numbers[1]); owner.SubNumber2 = Pullenti.Ner.Decree.Internal.PartToken.GetNumber(itok.Numbers[2]); owner.SubNumber3 = itok.LastNumber; } else { owner.Number = itok.LastNumber; } owner.MinNumber = itok.LastMinNumber; owner.Itok = itok; }
public static void CorrectIndex(List <InstrToken1> lines) { if (lines.Count < 10) { return; } if (lines[0].Typ == InstrToken1.Types.Clause || lines[0].Typ == InstrToken1.Types.Chapter) { } else { return; } List <InstrToken1> index = new List <InstrToken1>(); index.Add(lines[0]); List <InstrToken1> content = new List <InstrToken1>(); int i; int indText = 0; int conText = 0; for (i = 1; i < lines.Count; i++) { if (lines[i].Typ == lines[0].Typ) { if (_canBeEquals(lines[i], lines[0])) { break; } else { index.Add(lines[i]); } } else { indText += lines[i].LengthChar; } } int cInd = i; for (; i < lines.Count; i++) { if (lines[i].Typ == lines[0].Typ) { content.Add(lines[i]); } else { conText += lines[i].LengthChar; } } if (index.Count == content.Count && index.Count > 2) { if ((indText * 10) < conText) { lines[0] = new InstrToken1(lines[0].BeginToken, lines[cInd - 1].EndToken) { IndexNoKeyword = true, Typ = InstrToken1.Types.Index }; lines.RemoveRange(1, cInd - 1); } } }
static int _analizeListItems(List <FragToken> chi, int ind) { if (ind >= chi.Count) { return(-1); } FragToken res = chi[ind]; Pullenti.Ner.Instrument.InstrumentKind ki = res.Kind; if (((ki == Pullenti.Ner.Instrument.InstrumentKind.Chapter || ki == Pullenti.Ner.Instrument.InstrumentKind.Clause || ki == Pullenti.Ner.Instrument.InstrumentKind.Content) || ki == Pullenti.Ner.Instrument.InstrumentKind.Item || ki == Pullenti.Ner.Instrument.InstrumentKind.Subitem) || ki == Pullenti.Ner.Instrument.InstrumentKind.ClausePart || ki == Pullenti.Ner.Instrument.InstrumentKind.Indention) { } else { return(-1); } if (res.HasChanges && res.MultilineChangesValue != null) { Pullenti.Ner.MetaToken ci = res.MultilineChangesValue; FragToken cit = new FragToken(ci.BeginToken, ci.EndToken) { Kind = Pullenti.Ner.Instrument.InstrumentKind.Citation }; res.Children.Add(cit); if (Pullenti.Ner.Core.BracketHelper.IsBracket(cit.BeginToken.Previous, true)) { cit.BeginToken = cit.BeginToken.Previous; } if (Pullenti.Ner.Core.BracketHelper.IsBracket(cit.EndToken.Next, true)) { cit.EndToken = cit.EndToken.Next; if (cit.EndToken.Next != null && cit.EndToken.Next.IsCharOf(";.")) { cit.EndToken = cit.EndToken.Next; } } res.FillByContentChildren(); if (res.Children[0].HasChanges) { } Pullenti.Ner.Instrument.InstrumentKind citKind = Pullenti.Ner.Instrument.InstrumentKind.Undefined; if (ci.Tag is Pullenti.Ner.Decree.DecreeChangeReferent) { Pullenti.Ner.Decree.DecreeChangeReferent dcr = ci.Tag as Pullenti.Ner.Decree.DecreeChangeReferent; if (dcr.Value != null && dcr.Value.NewItems.Count > 0) { string mnem = dcr.Value.NewItems[0]; int i; if ((((i = mnem.IndexOf(' ')))) > 0) { mnem = mnem.Substring(0, i); } citKind = Pullenti.Ner.Decree.Internal.PartToken._getInstrKindByTyp(Pullenti.Ner.Decree.Internal.PartToken._getTypeByAttrName(mnem)); } else if (dcr.Owners.Count > 0 && (dcr.Owners[0] is Pullenti.Ner.Decree.DecreePartReferent) && dcr.Kind == Pullenti.Ner.Decree.DecreeChangeKind.New) { Pullenti.Ner.Decree.DecreePartReferent pat = dcr.Owners[0] as Pullenti.Ner.Decree.DecreePartReferent; int min = 0; foreach (Pullenti.Ner.Slot s in pat.Slots) { Pullenti.Ner.Decree.Internal.PartToken.ItemType ty = Pullenti.Ner.Decree.Internal.PartToken._getTypeByAttrName(s.TypeName); if (ty == Pullenti.Ner.Decree.Internal.PartToken.ItemType.Undefined) { continue; } int l = Pullenti.Ner.Decree.Internal.PartToken._getRank(ty); if (l == 0) { continue; } if (l > min || min == 0) { min = l; citKind = Pullenti.Ner.Decree.Internal.PartToken._getInstrKindByTyp(ty); } } } } FragToken sub = null; if (citKind != Pullenti.Ner.Instrument.InstrumentKind.Undefined && citKind != Pullenti.Ner.Instrument.InstrumentKind.Appendix) { sub = new FragToken(ci.BeginToken, ci.EndToken); ContentAnalyzeWhapper wr = new ContentAnalyzeWhapper(); wr.Analyze(sub, null, true, citKind); sub.Kind = Pullenti.Ner.Instrument.InstrumentKind.Content; } else { sub = FragToken.CreateDocument(ci.BeginToken, ci.EndChar, citKind); } if (sub == null || sub.Children.Count == 0) { } else if ((sub.Kind == Pullenti.Ner.Instrument.InstrumentKind.Content && sub.Children.Count > 0 && sub.Children[0].BeginToken == sub.BeginToken) && sub.Children[sub.Children.Count - 1].EndToken == sub.EndToken) { cit.Children.AddRange(sub.Children); } else { cit.Children.Add(sub); } return(1); } int endChar = res.EndChar; if (res.Itok == null) { res.Itok = InstrToken1.Parse(res.BeginToken, true, null, 0, null, false, res.EndChar, false, false); } List <LineToken> lines = LineToken.ParseList(res.BeginToken, endChar, null); if (lines == null || (lines.Count < 1)) { return(-1); } int ret = 1; if (res.Kind == Pullenti.Ner.Instrument.InstrumentKind.Content) { for (int j = ind + 1; j < chi.Count; j++) { if (chi[j].Kind == Pullenti.Ner.Instrument.InstrumentKind.Content) { List <LineToken> lines2 = LineToken.ParseList(chi[j].BeginToken, chi[j].EndChar, lines[lines.Count - 1]); if (lines2 == null || (lines2.Count < 1)) { break; } if (!lines2[0].IsListItem) { if ((lines2.Count > 1 && lines2[1].IsListItem && lines2[0].EndToken.IsCharOf(":")) && !lines2[0].BeginToken.Chars.IsCapitalUpper) { lines2[0].IsListItem = true; } else { break; } } lines.AddRange(lines2); ret = (j - ind) + 1; } else if (chi[j].Kind != Pullenti.Ner.Instrument.InstrumentKind.Editions && chi[j].Kind != Pullenti.Ner.Instrument.InstrumentKind.Comment) { break; } } } if (lines.Count < 2) { return(-1); } if ((lines.Count > 1 && lines[0].IsListItem && lines[1].IsListItem) && lines[0].Number != 1) { if (lines.Count == 2 || !lines[2].IsListItem) { lines[0].IsListItem = (lines[1].IsListItem = false); } } for (int i = 0; i < lines.Count; i++) { if (lines[i].IsListItem) { if (i > 0 && lines[i - 1].IsListItem) { continue; } if (((i + 1) < lines.Count) && lines[i + 1].IsListItem) { } else { lines[i].IsListItem = false; continue; } int j; bool newLine = false; for (j = i + 1; j < lines.Count; j++) { if (!lines[j].IsListItem) { break; } else if (lines[j].IsNewlineBefore) { newLine = true; } } if (newLine) { continue; } if (i > 0 && lines[i - 1].EndToken.IsChar(':')) { continue; } for (j = i; j < lines.Count; j++) { if (!lines[j].IsListItem) { break; } else { lines[j].IsListItem = false; } } } } if (lines.Count > 2) { LineToken last = lines[lines.Count - 1]; LineToken last2 = lines[lines.Count - 2]; if ((!last.IsListItem && last.EndToken.IsChar('.') && last2.IsListItem) && last2.EndToken.IsChar(';')) { if ((last.LengthChar < (last2.LengthChar * 2)) || last.BeginToken.Chars.IsAllLower) { last.IsListItem = true; } } } for (int i = 0; i < (lines.Count - 1); i++) { if (!lines[i].IsListItem && !lines[i + 1].IsListItem) { if (((i + 2) < lines.Count) && lines[i + 2].IsListItem && lines[i + 1].EndToken.IsChar(':')) { } else { lines[i].EndToken = lines[i + 1].EndToken; lines.RemoveAt(i + 1); i--; } } } for (int i = 0; i < (lines.Count - 1); i++) { if (lines[i].IsListItem) { if (lines[i].Number == 1) { bool ok = true; int num = 1; int nonum = 0; for (int j = i + 1; j < lines.Count; j++) { if (!lines[j].IsListItem) { ok = false; break; } else if (lines[j].Number > 0) { num++; if (lines[j].Number != num) { ok = false; break; } } else { nonum++; } } if (!ok || nonum == 0 || (num < 2)) { break; } LineToken lt = lines[i]; for (int j = i + 1; j < lines.Count; j++) { if (lines[j].Number > 0) { lt = lines[j]; } else { List <LineToken> chli = lt.Tag as List <LineToken>; if (chli == null) { lt.Tag = (chli = new List <LineToken>()); } lt.EndToken = lines[j].EndToken; chli.Add(lines[j]); lines.RemoveAt(j); j--; } } } } } int cou = 0; foreach (LineToken li in lines) { if (li.IsListItem) { cou++; } } if (cou < 2) { return(-1); } for (int i = 0; i < lines.Count; i++) { if (lines[i].IsListItem) { int i0 = i; bool ok = true; cou = 1; for (; i < lines.Count; i++, cou++) { if (!lines[i].IsListItem) { break; } else if (lines[i].Number != cou) { ok = false; } } if (!ok) { for (i = i0; i < lines.Count; i++) { if (!lines[i].IsListItem) { break; } else { lines[i].Number = 0; } } } if (cou > 3 && lines[i0].BeginToken.GetSourceText() != lines[i0 + 1].BeginToken.GetSourceText() && lines[i0 + 1].BeginToken.GetSourceText() == lines[i0 + 2].BeginToken.GetSourceText()) { string pref = lines[i0 + 1].BeginToken.GetSourceText(); ok = true; for (int j = i0 + 2; j < i; j++) { if (pref != lines[j].BeginToken.GetSourceText()) { ok = false; break; } } if (!ok) { continue; } Pullenti.Ner.Token tt = null; ok = false; for (tt = lines[i0].EndToken.Previous; tt != null && tt != lines[i0].BeginToken; tt = tt.Previous) { if (tt.GetSourceText() == pref) { ok = true; break; } } if (ok) { LineToken li0 = new LineToken(lines[i0].BeginToken, tt.Previous); lines[i0].BeginToken = tt; lines.Insert(i0, li0); i++; } } } } foreach (LineToken li in lines) { li.CorrectBeginToken(); FragToken ch = new FragToken(li.BeginToken, li.EndToken) { Kind = (li.IsListItem ? Pullenti.Ner.Instrument.InstrumentKind.ListItem : Pullenti.Ner.Instrument.InstrumentKind.Content), Number = li.Number }; if (ch.Kind == Pullenti.Ner.Instrument.InstrumentKind.Content && ch.EndToken.IsChar(':')) { ch.Kind = Pullenti.Ner.Instrument.InstrumentKind.ListHead; } res.Children.Add(ch); List <LineToken> chli = li.Tag as List <LineToken>; if (chli != null) { foreach (LineToken lt in chli) { ch.Children.Add(new FragToken(lt.BeginToken, lt.EndToken) { Kind = Pullenti.Ner.Instrument.InstrumentKind.ListItem }); } if (ch.BeginChar < ch.Children[0].BeginChar) { ch.Children.Insert(0, new FragToken(ch.BeginToken, ch.Children[0].BeginToken.Previous) { Kind = Pullenti.Ner.Instrument.InstrumentKind.Content }); } } } return(ret); }
static void _parseNumber(Pullenti.Ner.Token t, InstrToken1 res, InstrToken1 prev) { if (((t is Pullenti.Ner.NumberToken) && (t as Pullenti.Ner.NumberToken).IntValue != null && (t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit) && ((t as Pullenti.Ner.NumberToken).IntValue.Value < 3000)) { if (res.Numbers.Count >= 4) { } if (t.Morph.Class.IsAdjective && res.TypContainerRank == 0) { return; } Pullenti.Ner.Core.NumberExToken nwp = Pullenti.Ner.Core.NumberHelper.TryParseNumberWithPostfix(t); if (nwp != null) { if (nwp.EndToken.IsWhitespaceBefore) { } else { return; } } if ((t.Next != null && (t.WhitespacesAfterCount < 3) && t.Next.Chars.IsLetter) && t.Next.Chars.IsAllLower) { if (!t.IsWhitespaceAfter && t.Next.LengthChar == 1) { } else if (res.Numbers.Count == 0) { res.NumTyp = NumberTypes.Digit; res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString()); res.NumBeginToken = (res.NumEndToken = (res.EndToken = t)); return; } else { return; } } if (res.NumTyp == NumberTypes.Undefined) { res.NumTyp = NumberTypes.Digit; } else { res.NumTyp = NumberTypes.Combo; } if (res.Numbers.Count > 0 && t.IsWhitespaceBefore) { return; } if (res.Numbers.Count == 0) { res.NumBeginToken = t; } if ((t.Next != null && t.Next.IsHiphen && (t.Next.Next is Pullenti.Ner.NumberToken)) && (t.Next.Next as Pullenti.Ner.NumberToken).IntValue != null && (t.Next.Next as Pullenti.Ner.NumberToken).IntValue.Value > (t as Pullenti.Ner.NumberToken).IntValue.Value) { res.MinNumber = (t as Pullenti.Ner.NumberToken).Value.ToString(); t = t.Next.Next; } else if (((t.Next != null && t.Next.IsCharOf(")") && t.Next.Next != null) && t.Next.Next.IsHiphen && (t.Next.Next.Next is Pullenti.Ner.NumberToken)) && (t.Next.Next.Next as Pullenti.Ner.NumberToken).IntValue != null && (t.Next.Next.Next as Pullenti.Ner.NumberToken).IntValue.Value > (t as Pullenti.Ner.NumberToken).IntValue.Value) { res.MinNumber = (t as Pullenti.Ner.NumberToken).Value.ToString(); t = t.Next.Next.Next; } res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString()); res.EndToken = (res.NumEndToken = t); res.NumSuffix = null; for (Pullenti.Ner.Token ttt = t.Next; ttt != null && (res.Numbers.Count < 4); ttt = ttt.Next) { bool ok1 = false; bool ok2 = false; if ((ttt.IsCharOf("._") && !ttt.IsWhitespaceAfter && (ttt.Next is Pullenti.Ner.NumberToken)) && (((ttt.Next as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit || (((ttt.Next as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words) && ttt.Next.Chars.IsLatinLetter && !ttt.IsWhitespaceAfter)))) { ok1 = true; } else if ((ttt.IsCharOf("(<") && (ttt.Next is Pullenti.Ner.NumberToken) && ttt.Next.Next != null) && ttt.Next.Next.IsCharOf(")>")) { ok2 = true; } if (ok1 || ok2) { ttt = ttt.Next; res.Numbers.Add((ttt as Pullenti.Ner.NumberToken).Value.ToString()); res.NumTyp = (res.Numbers.Count == 2 ? NumberTypes.TwoDigits : (res.Numbers.Count == 3 ? NumberTypes.ThreeDigits : NumberTypes.FourDigits)); if ((ttt.Next != null && ttt.Next.IsCharOf(")>") && ttt.Next.Next != null) && ttt.Next.Next.IsChar('.')) { ttt = ttt.Next; } else if (ok2) { ttt = ttt.Next; } t = (res.EndToken = (res.NumEndToken = ttt)); continue; } if (((ttt is Pullenti.Ner.TextToken) && ttt.LengthChar == 1 && ttt.Chars.IsLetter) && !ttt.IsWhitespaceBefore && res.Numbers.Count == 1) { res.Numbers.Add((ttt as Pullenti.Ner.TextToken).Term); res.NumTyp = NumberTypes.Combo; t = (res.EndToken = (res.NumEndToken = ttt)); continue; } break; } if (t.Next != null && t.Next.IsCharOf(").")) { res.NumSuffix = t.Next.GetSourceText(); t = (res.EndToken = (res.NumEndToken = t.Next)); } return; } if (((t is Pullenti.Ner.NumberToken) && (t as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Words && res.TypContainerRank > 0) && res.Numbers.Count == 0) { res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString()); res.NumTyp = NumberTypes.Digit; res.NumBeginToken = t; if (t.Next != null && t.Next.IsChar('.')) { t = t.Next; res.NumSuffix = "."; } res.EndToken = (res.NumEndToken = t); return; } Pullenti.Ner.NumberToken nt = Pullenti.Ner.Core.NumberHelper.TryParseRoman(t); if ((nt != null && nt.Value == "10" && t.Next != null) && t.Next.IsChar(')')) { nt = null; } if (nt != null && nt.Value == "100") { nt = null; } if (nt != null && nt.Typ == Pullenti.Ner.NumberSpellingType.Roman) { if (res.NumTyp == NumberTypes.Undefined) { res.NumTyp = NumberTypes.Roman; } else { res.NumTyp = NumberTypes.Combo; } if (res.Numbers.Count > 0 && t.IsWhitespaceBefore) { return; } if (res.Numbers.Count == 0) { res.NumBeginToken = t; } res.Numbers.Add(nt.Value.ToString()); t = (res.EndToken = (res.NumEndToken = nt.EndToken)); if (res.NumTyp == NumberTypes.Roman && ((res.Typ == InstrToken1.Types.Chapter || res.Typ == InstrToken1.Types.Section || res.Typ == InstrToken1.Types.Line))) { if ((t.Next != null && t.Next.IsCharOf("._<") && (t.Next.Next is Pullenti.Ner.NumberToken)) && (t.Next.Next as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit) { t = t.Next.Next; res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString()); res.NumTyp = NumberTypes.TwoDigits; if (t.Next != null && t.Next.IsChar('>')) { t = t.Next; } res.EndToken = (res.NumEndToken = t); if ((t.Next != null && t.Next.IsCharOf("._<") && (t.Next.Next is Pullenti.Ner.NumberToken)) && (t.Next.Next as Pullenti.Ner.NumberToken).Typ == Pullenti.Ner.NumberSpellingType.Digit) { t = t.Next.Next; res.Numbers.Add((t as Pullenti.Ner.NumberToken).Value.ToString()); res.NumTyp = NumberTypes.ThreeDigits; if (t.Next != null && t.Next.IsChar('>')) { t = t.Next; } res.EndToken = (res.NumEndToken = t); } } } if (t.Next != null && t.Next.IsCharOf(").")) { res.NumSuffix = t.Next.GetSourceText(); t = (res.EndToken = (res.NumEndToken = t.Next)); } return; } if (((t is Pullenti.Ner.TextToken) && t.LengthChar == 1 && t.Chars.IsLetter) && t == res.BeginToken) { if ((!t.IsWhitespaceAfter && (t.Next is Pullenti.Ner.NumberToken) && t.Next.Next != null) && t.Next.Next.IsChar('.')) { res.NumBeginToken = t; res.NumTyp = NumberTypes.Digit; res.Numbers.Add((t.Next as Pullenti.Ner.NumberToken).Value.ToString()); res.NumSuffix = (t as Pullenti.Ner.TextToken).Term + "."; t = (res.EndToken = (res.NumEndToken = t.Next.Next)); return; } if (t.Next != null && t.Next.IsCharOf(".)")) { if (((t.Next.IsChar('.') && (t.Next.Next is Pullenti.Ner.NumberToken) && t.Next.Next.Next != null) && t.Next.Next.Next.IsChar(')') && !t.Next.IsWhitespaceAfter) && !t.Next.Next.IsWhitespaceAfter) { res.NumTyp = NumberTypes.TwoDigits; res.Numbers.Add((t as Pullenti.Ner.TextToken).Term); res.Numbers.Add((t.Next.Next as Pullenti.Ner.NumberToken).Value.ToString()); res.NumSuffix = ")"; res.NumBeginToken = t; t = (res.EndToken = (res.NumEndToken = t.Next.Next.Next)); return; } if (t.Next.IsChar('.') && ((t.Chars.IsAllUpper || (t.Next.Next is Pullenti.Ner.NumberToken)))) { } else { InstrToken1 tmp1 = new InstrToken1(t, t.Next); tmp1.Numbers.Add((t as Pullenti.Ner.TextToken).Term); if (tmp1.LastNumber > 1 && t.Next.IsCharOf(".") && ((prev == null || (prev.LastNumber + 1) != tmp1.LastNumber))) { } else { if (res.Numbers.Count == 0) { res.NumBeginToken = t; } res.NumTyp = NumberTypes.Letter; res.Numbers.Add((t as Pullenti.Ner.TextToken).Term); res.NumBeginToken = t; t = (res.EndToken = (res.NumEndToken = t.Next)); res.NumSuffix = t.GetSourceText(); return; } } } } }
public static List <InstrToken1> ExtractMainSequence(List <InstrToken1> lines, bool checkSpecTexts, bool canSubNumbers) { List <InstrToken1> res = null; int manySpecCharLines = 0; for (int i = 0; i < lines.Count; i++) { InstrToken1 li = lines[i]; if (li.AllUpper && li.TitleTyp != InstrToken1.StdTitleType.Undefined) { if (res != null && res.Count > 0 && res[res.Count - 1].Tag == null) { res[res.Count - 1].Tag = li; } } if (li.Numbers.Count == 0) { continue; } if (li.LastNumber == 901) { } if (li.NumTyp == NumberTypes.Letter) { } if (li.Typ != InstrToken1.Types.Line) { continue; } if (res == null) { res = new List <InstrToken1>(); if (li.Numbers.Count == 1 && li.Numbers[0] == "1" && li.NumTyp == NumberTypes.Digit) { if ((((i + 1) < lines.Count) && lines[i + 1].Numbers.Count == 1 && lines[i + 1].Numbers[0] == "1") && lines[i + 1].NumTyp == NumberTypes.Digit) { for (int ii = i + 2; ii < lines.Count; ii++) { if (lines[ii].NumTyp == NumberTypes.Roman && lines[ii].Numbers.Count > 0) { if (lines[ii].Numbers[0] == "2") { li.NumTyp = NumberTypes.Roman; } break; } } } } } else { if (res[0].NumSuffix != null) { if (li.NumSuffix != null && li.NumSuffix != res[0].NumSuffix) { continue; } } if (res[0].Numbers.Count != li.Numbers.Count) { if (li.BeginToken.Previous != null && li.BeginToken.Previous.IsChar(':')) { continue; } if (res[0].NumSuffix == null || CalcDelta(res[res.Count - 1], li, true) != 1) { continue; } if (!canSubNumbers) { if (((i + 1) < lines.Count) && CalcDelta(res[res.Count - 1], lines[i + 1], false) == 1 && CalcDelta(li, lines[i + 1], true) == 1) { } else { continue; } } } else { if (res[0].NumTyp == NumberTypes.Roman && li.NumTyp != NumberTypes.Roman) { continue; } if (res[0].NumTyp != NumberTypes.Roman && li.NumTyp == NumberTypes.Roman) { if (li.Numbers.Count == 1 && li.Numbers[0] == "1" && res.Count == 1) { res.Clear(); res.Add(li); continue; } continue; } if (res[0].NumTyp != NumberTypes.Letter && li.NumTyp == NumberTypes.Letter) { continue; } } } res.Add(li); if (li.HasManySpecChars) { manySpecCharLines++; } } if (res == null) { return(null); } if (checkSpecTexts) { if (manySpecCharLines > (res.Count / 2)) { return(null); } } for (int i = 0; i < (res.Count - 1); i++) { if (CalcDelta(res[i], res[i + 1], false) == 2) { int ii0 = lines.IndexOf(res[i]); int ii1 = lines.IndexOf(res[i + 1], ii0); for (int j = ii0 + 1; j < ii1; j++) { if (lines[j].Numbers.Count > 0) { if (CalcDelta(res[i], lines[j], true) == 1 && NumberingHelper.CalcDelta(lines[j], res[i + 1], true) == 1) { res.Insert(i + 1, lines[j]); break; } } } } } bool ch = true; while (ch) { ch = false; for (int i = 1; i < res.Count; i++) { int d = CalcDelta(res[i - 1], res[i], false); if (res[i - 1].NumSuffix == res[i].NumSuffix) { if (d == 1) { continue; } if (((d > 1 && (d < 20))) || ((d == 0 && res[i - 1].NumTyp == res[i].NumTyp && res[i - 1].Numbers.Count == res[i].Numbers.Count))) { if (CalcDelta(res[i], res[i - 1], false) > 0) { if (res[i - 1].Tag != null && i > 2) { res.RemoveRange(i, res.Count - i); ch = true; i--; continue; } } if ((i + 1) < res.Count) { int dd = CalcDelta(res[i], res[i + 1], false); if (dd == 1) { if (res[i].LastNumber == 1 && res[i].Numbers.Count == res[i - 1].Numbers.Count) { } else { continue; } } else { dd = CalcDelta(res[i - 1], res[i + 1], false); if (dd == 1) { res.RemoveAt(i); i--; ch = true; continue; } } } else if (d > 3) { res.RemoveAt(i); i--; ch = true; continue; } else { continue; } } } int j; for (j = i + 1; j < res.Count; j++) { int dd = CalcDelta(res[j - 1], res[j], false); if (dd != 1 && dd != 2) { break; } if (res[j - 1].NumSuffix != res[j].NumSuffix) { break; } } if ((d == 0 && CalcDelta(res[i - 1], res[i], true) == 1 && res[i - 1].NumSuffix != null) && res[i].NumSuffix == res[i - 1].NumSuffix) { d = 1; } if (d != 1 && j > (i + 1)) { res.RemoveRange(i, j - i); i--; ch = true; continue; } if (d == 1) { if ((i + 1) >= res.Count) { continue; } int dd = CalcDelta(res[i], res[i + 1], false); if (dd == 1 && res[i - 1].NumSuffix == res[i + 1].NumSuffix) { if (res[i].NumSuffix != res[i - 1].NumSuffix) { res[i].NumSuffix = res[i - 1].NumSuffix; res[i].IsNumDoubt = false; ch = true; } continue; } } if ((i + 1) < res.Count) { int dd = CalcDelta(res[i - 1], res[i + 1], false); if (dd == 1 && res[i - 1].NumSuffix == res[i + 1].NumSuffix) { if (d == 1 && CalcDelta(res[i], res[i + 1], true) == 1) { } else { res.RemoveAt(i); ch = true; continue; } } } else if (d == 0 || d > 10 || res[i - 1].NumSuffix != res[i].NumSuffix) { res.RemoveAt(i); ch = true; continue; } } } int hasSuf = 0; foreach (InstrToken1 r in res) { if ((r.NumSuffix != null || r.TypContainerRank > 0 || r.Numbers.Count > 1) || r.AllUpper || r.NumTyp == NumberTypes.Roman) { hasSuf++; } } if (hasSuf == 0) { if (res.Count < 5) { return(null); } } if (res.Count >= 2) { if (res[0] != lines[0]) { int tot = res[0].BeginToken.BeginChar - lines[0].BeginToken.BeginChar; tot += (lines[lines.Count - 1].EndToken.EndChar - res[res.Count - 1].EndToken.EndChar); int blk = res[res.Count - 1].EndToken.EndChar - res[0].BeginToken.BeginChar; int i = lines.IndexOf(res[res.Count - 1]); if (i > 0) { List <InstrToken1> lines1 = new List <InstrToken1>(lines); lines1.RemoveRange(0, i + 1); List <InstrToken1> res1 = ExtractMainSequence(lines1, checkSpecTexts, canSubNumbers); if (res1 != null && res1.Count > 2) { blk += (res1[res1.Count - 1].EndChar - res1[0].BeginChar); } } if ((blk * 3) < tot) { if ((blk * 5) < tot) { return(null); } foreach (InstrToken1 r in res) { if (!r.AllUpper && !r.HasChanges) { return(null); } } } } if (res[0].LastNumber == 1 && res[0].Numbers.Count == 1) { List <InstrToken1> res0 = new List <InstrToken1>(); res0.Add(res[0]); int i; for (i = 1; i < res.Count; i++) { int j; for (j = i + 1; j < res.Count; j++) { if (res[j].LastNumber == 1 && res[j].Numbers.Count == 1) { break; } } if ((j - i) < 3) { break; } j--; int jj; int errs = 0; for (jj = i + 1; jj < j; jj++) { int d = CalcDelta(res[jj - 1], res[jj], false); if (d == 1) { } else if (d > 1 && (d < 3)) { errs++; } else { break; } } if ((jj < j) || errs > 1) { break; } if (j < (res.Count - 1)) { if (CalcDelta(res0[res0.Count - 1], res[j], false) != 1) { break; } res0.Add(res[j]); } i = j; } if (i >= res.Count && res0.Count > 1) { return(res0); } } if (res.Count > 500) { return(null); } return(res); } if (res.Count == 1 && lines[0] == res[0]) { if (hasSuf > 0) { return(res); } if (lines.Count > 1 && lines[1].Numbers.Count == (lines[0].Numbers.Count + 1)) { for (int i = 0; i < lines[0].Numbers.Count; i++) { if (lines[1].Numbers[i] != lines[0].Numbers[i]) { return(null); } } return(res); } } return(null); }
public static int CalcDelta(InstrToken1 prev, InstrToken1 next, bool canSubNumbers) { int n1 = prev.LastNumber; int n2 = next.LastNumber; if (next.LastMinNumber > 0) { n2 = next.LastMinNumber; } if (prev.Numbers.Count == next.Numbers.Count) { if (prev.TypContainerRank > 0 && prev.TypContainerRank == next.TypContainerRank) { } else if (prev.NumTyp == next.NumTyp) { } else { return(0); } if (prev.Numbers.Count > 1) { for (int i = 0; i < (prev.Numbers.Count - 1); i++) { if (prev.Numbers[i] != next.Numbers[i]) { return(0); } } } if (n1 >= n2) { return(0); } return(n2 - n1); } if (!canSubNumbers) { return(0); } if ((prev.Numbers.Count + 1) == next.Numbers.Count && next.Numbers.Count > 0) { if (prev.TypContainerRank > 0 && prev.TypContainerRank == next.TypContainerRank) { } else if (prev.NumTyp == NumberTypes.Digit && next.NumTyp == NumberTypes.TwoDigits) { } else if (prev.NumTyp == NumberTypes.TwoDigits && next.NumTyp == NumberTypes.ThreeDigits) { } else if (prev.NumTyp == NumberTypes.ThreeDigits && next.NumTyp == NumberTypes.FourDigits) { } else if (prev.NumTyp == NumberTypes.Letter && next.NumTyp == NumberTypes.TwoDigits && char.IsLetter(next.Numbers[0][0])) { } else { return(0); } for (int i = 0; i < prev.Numbers.Count; i++) { if (prev.Numbers[i] != next.Numbers[i]) { return(0); } } return(n2); } if ((prev.Numbers.Count - 1) == next.Numbers.Count && prev.Numbers.Count > 1) { if (prev.TypContainerRank > 0 && prev.TypContainerRank == next.TypContainerRank) { } else if (prev.NumTyp == NumberTypes.TwoDigits) { if (next.NumTyp == NumberTypes.Digit) { } else if (next.NumTyp == NumberTypes.Letter && char.IsLetter(prev.Numbers[0][0])) { } } else if (prev.NumTyp == NumberTypes.ThreeDigits && next.NumTyp == NumberTypes.TwoDigits) { } else if (prev.NumTyp == NumberTypes.FourDigits && next.NumTyp == NumberTypes.ThreeDigits) { } else { return(0); } for (int i = 0; i < (prev.Numbers.Count - 2); i++) { if (prev.Numbers[i] != next.Numbers[i]) { return(0); } } if (!int.TryParse(prev.Numbers[prev.Numbers.Count - 2], out n1)) { if (prev.Numbers.Count == 2) { n1 = prev.FirstNumber; } else { return(0); } } if ((n1 + 1) != n2) { return(0); } return(n2 - n1); } if ((prev.Numbers.Count - 2) == next.Numbers.Count && prev.Numbers.Count > 2) { if (prev.TypContainerRank > 0 && prev.TypContainerRank == next.TypContainerRank) { } else if (prev.NumTyp == NumberTypes.ThreeDigits && next.NumTyp == NumberTypes.Digit) { } else if (prev.NumTyp == NumberTypes.FourDigits && next.NumTyp == NumberTypes.TwoDigits) { } else { return(0); } for (int i = 0; i < (prev.Numbers.Count - 3); i++) { if (prev.Numbers[i] != next.Numbers[i]) { return(0); } } if (!int.TryParse(prev.Numbers[prev.Numbers.Count - 3], out n1)) { return(0); } if ((n1 + 1) != n2) { return(0); } return(n2 - n1); } if ((prev.Numbers.Count - 3) == next.Numbers.Count && prev.Numbers.Count > 3) { if (prev.TypContainerRank > 0 && prev.TypContainerRank == next.TypContainerRank) { } else if (prev.NumTyp == NumberTypes.FourDigits && next.NumTyp == NumberTypes.Digit) { } else { return(0); } for (int i = 0; i < (prev.Numbers.Count - 4); i++) { if (prev.Numbers[i] != next.Numbers[i]) { return(0); } } if (!int.TryParse(prev.Numbers[prev.Numbers.Count - 4], out n1)) { return(0); } if ((n1 + 1) != n2) { return(0); } return(n2 - n1); } return(0); }