Exemple #1
0
        public static List <BlockTitleToken> TryAttachList(Pullenti.Ner.Token t)
        {
            BlockTitleToken        content = null;
            BlockTitleToken        intro   = null;
            List <BlockTitleToken> lits    = null;

            for (Pullenti.Ner.Token tt = t; tt != null; tt = tt.Next)
            {
                if (tt.IsNewlineBefore)
                {
                    BlockTitleToken btt = TryAttach(tt, false, null);
                    if (btt == null)
                    {
                        continue;
                    }
                    if (btt.Typ == BlkTyps.Index)
                    {
                        content = btt;
                        break;
                    }
                    if (btt.Typ == BlkTyps.Intro)
                    {
                        Pullenti.Ner.Token tt2 = btt.EndToken.Next;
                        for (int k = 0; k < 5; k++)
                        {
                            BlockLine li = BlockLine.Create(tt2, null);
                            if (li == null)
                            {
                                break;
                            }
                            if (li.HasContentItemTail || li.Typ == BlkTyps.IndexItem)
                            {
                                content = btt;
                                break;
                            }
                            if (li.HasVerb)
                            {
                                break;
                            }
                            if (li.Typ != BlkTyps.Undefined)
                            {
                                if ((li.BeginChar - btt.EndChar) < 400)
                                {
                                    content = btt;
                                    break;
                                }
                            }
                            tt2 = li.EndToken.Next;
                        }
                        if (content == null)
                        {
                            intro = btt;
                        }
                        break;
                    }
                    if (btt.Typ == BlkTyps.Literature)
                    {
                        if (lits == null)
                        {
                            lits = new List <BlockTitleToken>();
                        }
                        lits.Add(btt);
                    }
                }
            }
            if (content == null && intro == null && ((lits == null || lits.Count != 1)))
            {
                return(null);
            }
            List <BlockTitleToken> res = new List <BlockTitleToken>();

            Pullenti.Ner.Core.TerminCollection chapterNames = new Pullenti.Ner.Core.TerminCollection();
            Pullenti.Ner.Token t0 = null;
            if (content != null)
            {
                res.Add(content);
                int cou = 0;
                int err = 0;
                for (Pullenti.Ner.Token tt = content.EndToken.Next; tt != null; tt = tt.Next)
                {
                    if (!tt.IsNewlineBefore)
                    {
                        continue;
                    }
                    BlockLine li = BlockLine.Create(tt, null);
                    if (li == null)
                    {
                        break;
                    }
                    if (li.HasVerb)
                    {
                        if (li.EndToken.IsChar('.'))
                        {
                            break;
                        }
                        if (li.LengthChar > 100)
                        {
                            break;
                        }
                    }
                    BlockTitleToken btt = TryAttach(tt, true, null);
                    if (btt == null)
                    {
                        continue;
                    }
                    err = 0;
                    if (btt.Typ == BlkTyps.Intro)
                    {
                        if (content.Typ == BlkTyps.Intro || cou > 2)
                        {
                            break;
                        }
                    }
                    cou++;
                    tt = (content.EndToken = btt.EndToken);
                    if (btt.Value != null)
                    {
                        chapterNames.AddString(btt.Value, null, null, false);
                    }
                }
                content.Typ = BlkTyps.Index;
                t0          = content.EndToken.Next;
            }
            else if (intro != null)
            {
                t0 = intro.BeginToken;
            }
            else if (lits != null)
            {
                t0 = t;
            }
            else
            {
                return(null);
            }
            bool first = true;

            for (Pullenti.Ner.Token tt = t0; tt != null; tt = tt.Next)
            {
                if (!tt.IsNewlineBefore)
                {
                    continue;
                }
                if (tt.IsValue("СЛАБОЕ", null))
                {
                }
                BlockTitleToken btt = TryAttach(tt, false, chapterNames);
                if (btt == null)
                {
                    continue;
                }
                if (res.Count == 104)
                {
                }
                tt = btt.EndToken;
                if (content != null && btt.Typ == BlkTyps.Index)
                {
                    continue;
                }
                if (res.Count > 0 && res[res.Count - 1].Typ == BlkTyps.Literature)
                {
                    if (btt.Typ != BlkTyps.Appendix && btt.Typ != BlkTyps.Misc && btt.Typ != BlkTyps.Literature)
                    {
                        if (btt.Typ == BlkTyps.Chapter && (res[res.Count - 1].EndChar < ((tt.Kit.Sofa.Text.Length * 3) / 4)))
                        {
                        }
                        else
                        {
                            continue;
                        }
                    }
                }
                if (first)
                {
                    if ((tt.BeginChar - t0.BeginChar) > 300)
                    {
                        BlockTitleToken btt0 = new BlockTitleToken(t0, (t0.Previous == null ? t0 : t0.Previous));
                        btt0.Typ   = BlkTyps.Chapter;
                        btt0.Value = "Похоже на начало";
                        res.Add(btt0);
                    }
                }
                res.Add(btt);
                tt    = btt.EndToken;
                first = false;
            }
            for (int i = 0; i < (res.Count - 1); i++)
            {
                if (res[i].Typ == BlkTyps.Literature && res[i + 1].Typ == res[i].Typ)
                {
                    res.RemoveAt(i + 1);
                    i--;
                }
            }
            return(res);
        }
Exemple #2
0
        public static BlockTitleToken TryAttach(Pullenti.Ner.Token t, bool isContentItem = false, Pullenti.Ner.Core.TerminCollection names = null)
        {
            if (t == null)
            {
                return(null);
            }
            if (!t.IsNewlineBefore)
            {
                return(null);
            }
            if (t.Chars.IsAllLower)
            {
                return(null);
            }
            BlockLine li = BlockLine.Create(t, names);

            if (li == null)
            {
                return(null);
            }
            if (li.Words == 0 && li.Typ == BlkTyps.Undefined)
            {
                return(null);
            }
            if (li.Typ == BlkTyps.Index)
            {
            }
            if (li.IsExistName)
            {
                return new BlockTitleToken(t, li.EndToken)
                       {
                           Typ = li.Typ
                       }
            }
            ;
            if (li.EndToken == li.NumberEnd || ((li.EndToken.IsCharOf(".:") && li.EndToken.Previous == li.NumberEnd)))
            {
                BlockTitleToken res2 = new BlockTitleToken(t, li.EndToken)
                {
                    Typ = li.Typ
                };

                if (li.Typ == BlkTyps.Chapter || li.Typ == BlkTyps.Appendix)
                {
                    BlockLine li2 = BlockLine.Create(li.EndToken.Next, names);
                    if ((li2 != null && li2.Typ == BlkTyps.Undefined && li2.IsAllUpper) && li2.Words > 0)
                    {
                        res2.EndToken = li2.EndToken;
                        for (Pullenti.Ner.Token tt = res2.EndToken.Next; tt != null; tt = tt.Next)
                        {
                            li2 = BlockLine.Create(tt, names);
                            if (li2 == null)
                            {
                                break;
                            }
                            if (li2.Typ != BlkTyps.Undefined || !li2.IsAllUpper || li2.Words == 0)
                            {
                                break;
                            }
                            tt = (res2.EndToken = li2.EndToken);
                        }
                    }
                }
                return(res2);
            }
            if (li.NumberEnd == null)
            {
                return(null);
            }
            BlockTitleToken res = new BlockTitleToken(t, li.EndToken)
            {
                Typ = li.Typ
            };

            if (res.Typ == BlkTyps.Undefined)
            {
                if (li.Words < 1)
                {
                    return(null);
                }
                if (li.HasVerb)
                {
                    return(null);
                }
                if (!isContentItem)
                {
                    if (!li.IsAllUpper || li.NotWords > (li.Words / 2))
                    {
                        return(null);
                    }
                }
                res.Typ = BlkTyps.Chapter;
                if ((li.NumberEnd.EndChar - t.BeginChar) == 7 && li.NumberEnd.Next != null && li.NumberEnd.Next.IsHiphen)
                {
                    res.Typ = BlkTyps.Undefined;
                }
            }
            if (li.HasContentItemTail && isContentItem)
            {
                res.Typ = BlkTyps.IndexItem;
            }
            if (res.Typ == BlkTyps.Chapter || res.Typ == BlkTyps.Appendix)
            {
                if (li.HasVerb)
                {
                    return(null);
                }
                if (li.NotWords > li.Words && !isContentItem)
                {
                    return(null);
                }
                for (t = li.EndToken.Next; t != null; t = t.Next)
                {
                    BlockLine li2 = BlockLine.Create(t, names);
                    if (li2 == null)
                    {
                        break;
                    }
                    if (li2.HasVerb || (li2.Words < 1))
                    {
                        break;
                    }
                    if (!li2.IsAllUpper && !isContentItem)
                    {
                        break;
                    }
                    if (li2.Typ != BlkTyps.Undefined || li2.NumberEnd != null)
                    {
                        break;
                    }
                    t = (res.EndToken = li2.EndToken);
                    if (isContentItem && li2.HasContentItemTail)
                    {
                        res.Typ = BlkTyps.IndexItem;
                        break;
                    }
                }
            }
            for (Pullenti.Ner.Token tt = res.EndToken; tt != null && tt.BeginChar > li.NumberEnd.EndChar; tt = tt.Previous)
            {
                if ((tt is Pullenti.Ner.TextToken) && tt.Chars.IsLetter)
                {
                    res.Value = Pullenti.Ner.Core.MiscHelper.GetTextValue(li.NumberEnd.Next, tt, Pullenti.Ner.Core.GetTextAttr.No);
                    break;
                }
            }
            if ((res.Typ == BlkTyps.Index || res.Typ == BlkTyps.Intro || res.Typ == BlkTyps.Conslusion) || res.Typ == BlkTyps.Literature)
            {
                if (res.Value != null && res.Value.Length > 100)
                {
                    return(null);
                }
                if (li.Words < li.NotWords)
                {
                    return(null);
                }
            }
            return(res);
        }
    }