예제 #1
0
        /// <summary>
        /// 用于对TXT文件进行分章
        /// </summary>
        /// <param name="total">文本内容</param>
        /// <param name="bookTitle">书籍标题</param>
        /// <returns></returns>
        private List <Chapter> GetTxtChapters(string total, string bookTitle = "")
        {
            int            count    = 0;
            List <Chapter> chapters = new List <Chapter>();
            int            number   = 1;
            Chapter        chapter  = new Chapter();

            chapter.Title       = bookTitle;
            chapter.Index       = 0;
            chapter.StartLength = 0;
            chapters.Add(chapter);
            StringBuilder builder     = new StringBuilder();
            int           parseLength = 0;

            foreach (var line in total.Split(Environment.NewLine))
            {
                if (string.IsNullOrEmpty(line.Trim()))
                {
                    parseLength += line.Length + Environment.NewLine.Length;
                    continue;
                }
                if (number >= 1)
                {
                    string title = "";
                    if (IsExtra(line))
                    {
                        title = line;
                    }
                    else if (IsTitle(line))
                    {
                        title = ChapterDivisionRegex.Match(line).Value;
                    }
                    if (!string.IsNullOrEmpty(title))
                    {
                        count++;
                        parseLength += builder.ToString().Length;
                        builder.Clear();
                        chapter = new Chapter(count, title, parseLength);
                        chapters.Add(chapter);
                        number = 0;
                    }
                }

                builder.Append(line);
                parseLength += Environment.NewLine.Length;
                number++;
                if (number >= MAX_TXT_PARSE_LINES)
                {
                    //为了避免某个文档一直没有匹配到新章节而不停的向StringBuilder中添加内容,导致内存溢出,这里对StringBuilder的大小进行了一定的限制
                    //即解析的行数达到一定的数目之后,即使没有匹配到新章节也将StringBuilder清空,同时更新parseLength。
                    //注意:这个数目的设定会影响到解析的时间,请谨慎设置!!!!
                    parseLength += builder.ToString().Length;
                    builder.Clear();
                    number = 1;
                }
            }

            return(chapters);
        }
예제 #2
0
        private bool IsTitle(string line)
        {
            var    numberRegex = new Regex("^[第]?[0-9零一二三四五六七八九十百千]+$");
            string title       = "";
            bool   result      = false;

            result = ChapterDivisionRegex.IsMatch(line);
            title  = ChapterDivisionRegex.Match(line).Value;

            if (result)
            {
                if (title.Length > MAX_CHAPTER_LENGTH)
                {
                    return(false);
                }
                int temp_index = line.IndexOf(title);
                if (temp_index > 0)
                {
                    string prev = line.Substring(0, temp_index);
                    // 此举是为了检测是否有类似章节名的内容
                    for (int i = 0; i < prev.Length; i++)
                    {
                        if (prev[i] >= 0x4e00 && prev[i] <= 0x9fbb)
                        {
                            return(false);
                        }
                    }
                }
                //int index = 0;
                //for (int i = 0; i < title.Length; i++)
                //{
                //    if (ChapterEndKey.Any(p => p == title[i].ToString()))
                //    {
                //        index = i;
                //        break;
                //    }
                //}
                //if (index <= temp_index)
                //    index = line.Length;
                //string chapter = title.Substring(temp_index, index - 1).Trim();
                //if (!numberRegex.IsMatch(chapter))
                //{
                //    result = false;
                //}
            }
            return(result);
        }
예제 #3
0
        /// <summary>
        /// 用于对TXT文件进行分章
        /// </summary>
        /// <param name="book">TXT文件</param>
        /// <returns></returns>
        private async Task <List <Chapter> > GetTxtChapters(StorageFile book)
        {
            if (book == null)
            {
                throw new ArgumentNullException();
            }
            int            count    = 0;
            List <Chapter> chapters = new List <Chapter>();
            var            total    = await GetTxtContent(book);

            int     number  = 1;
            Chapter chapter = new Chapter();

            chapter.Title       = book.DisplayName.Replace(".txt", "", StringComparison.OrdinalIgnoreCase);
            chapter.Index       = 0;
            chapter.StartLength = 0;
            chapters.Add(chapter);
            StringBuilder builder     = new StringBuilder();
            int           parseLength = 0;

            foreach (var line in total.Split(Environment.NewLine))
            {
                if (string.IsNullOrEmpty(line.Trim()))
                {
                    parseLength += line.Length + Environment.NewLine.Length;
                    continue;
                }
                if (number >= 1)
                {
                    string title = "";
                    if (IsExtra(line))
                    {
                        title = line;
                    }
                    else if (IsTitle(line))
                    {
                        title = ChapterDivisionRegex.Match(line).Value;
                    }
                    if (!string.IsNullOrEmpty(title))
                    {
                        count++;
                        parseLength += builder.ToString().Length;
                        builder.Clear();
                        chapter = new Chapter(count, title, parseLength);
                        chapters.Add(chapter);
                        number = 0;
                    }
                }

                builder.Append(line);
                parseLength += Environment.NewLine.Length;
                number++;
                if (number >= MAX_TXT_PARSE_LINES)
                {
                    //为了避免某个文档一直没有匹配到新章节而不停的向StringBuilder中添加内容,导致内存溢出,这里对StringBuilder的大小进行了一定的限制
                    //即解析的行数达到一定的数目之后,即使没有匹配到新章节也将StringBuilder清空,同时更新parseLength。
                    //注意:这个数目的设定会影响到解析的时间,请谨慎设置!!!!
                    parseLength += builder.ToString().Length;
                    builder.Clear();
                    number = 1;
                }
            }

            return(chapters);
        }