/// <summary> /// 用于对TXT文件进行分章 /// </summary> /// <param name="total">文本内容</param> /// <param name="bookTitle">书籍标题</param> /// <returns></returns> private List <Chapter> GetTxtChapters(string total, string bookTitle = "") { int count = 0; List <Chapter> chapters = new List <Chapter>(); int number = 1; Chapter chapter = new Chapter(); chapter.Title = bookTitle; chapter.Index = 0; chapter.StartLength = 0; chapters.Add(chapter); StringBuilder builder = new StringBuilder(); int parseLength = 0; foreach (var line in total.Split(Environment.NewLine)) { if (string.IsNullOrEmpty(line.Trim())) { parseLength += line.Length + Environment.NewLine.Length; continue; } if (number >= 1) { string title = ""; if (IsExtra(line)) { title = line; } else if (IsTitle(line)) { title = ChapterDivisionRegex.Match(line).Value; } if (!string.IsNullOrEmpty(title)) { count++; parseLength += builder.ToString().Length; builder.Clear(); chapter = new Chapter(count, title, parseLength); chapters.Add(chapter); number = 0; } } builder.Append(line); parseLength += Environment.NewLine.Length; number++; if (number >= MAX_TXT_PARSE_LINES) { //为了避免某个文档一直没有匹配到新章节而不停的向StringBuilder中添加内容,导致内存溢出,这里对StringBuilder的大小进行了一定的限制 //即解析的行数达到一定的数目之后,即使没有匹配到新章节也将StringBuilder清空,同时更新parseLength。 //注意:这个数目的设定会影响到解析的时间,请谨慎设置!!!! parseLength += builder.ToString().Length; builder.Clear(); number = 1; } } return(chapters); }
private bool IsTitle(string line) { var numberRegex = new Regex("^[第]?[0-9零一二三四五六七八九十百千]+$"); string title = ""; bool result = false; result = ChapterDivisionRegex.IsMatch(line); title = ChapterDivisionRegex.Match(line).Value; if (result) { if (title.Length > MAX_CHAPTER_LENGTH) { return(false); } int temp_index = line.IndexOf(title); if (temp_index > 0) { string prev = line.Substring(0, temp_index); // 此举是为了检测是否有类似章节名的内容 for (int i = 0; i < prev.Length; i++) { if (prev[i] >= 0x4e00 && prev[i] <= 0x9fbb) { return(false); } } } //int index = 0; //for (int i = 0; i < title.Length; i++) //{ // if (ChapterEndKey.Any(p => p == title[i].ToString())) // { // index = i; // break; // } //} //if (index <= temp_index) // index = line.Length; //string chapter = title.Substring(temp_index, index - 1).Trim(); //if (!numberRegex.IsMatch(chapter)) //{ // result = false; //} } return(result); }
/// <summary> /// 用于对TXT文件进行分章 /// </summary> /// <param name="book">TXT文件</param> /// <returns></returns> private async Task <List <Chapter> > GetTxtChapters(StorageFile book) { if (book == null) { throw new ArgumentNullException(); } int count = 0; List <Chapter> chapters = new List <Chapter>(); var total = await GetTxtContent(book); int number = 1; Chapter chapter = new Chapter(); chapter.Title = book.DisplayName.Replace(".txt", "", StringComparison.OrdinalIgnoreCase); chapter.Index = 0; chapter.StartLength = 0; chapters.Add(chapter); StringBuilder builder = new StringBuilder(); int parseLength = 0; foreach (var line in total.Split(Environment.NewLine)) { if (string.IsNullOrEmpty(line.Trim())) { parseLength += line.Length + Environment.NewLine.Length; continue; } if (number >= 1) { string title = ""; if (IsExtra(line)) { title = line; } else if (IsTitle(line)) { title = ChapterDivisionRegex.Match(line).Value; } if (!string.IsNullOrEmpty(title)) { count++; parseLength += builder.ToString().Length; builder.Clear(); chapter = new Chapter(count, title, parseLength); chapters.Add(chapter); number = 0; } } builder.Append(line); parseLength += Environment.NewLine.Length; number++; if (number >= MAX_TXT_PARSE_LINES) { //为了避免某个文档一直没有匹配到新章节而不停的向StringBuilder中添加内容,导致内存溢出,这里对StringBuilder的大小进行了一定的限制 //即解析的行数达到一定的数目之后,即使没有匹配到新章节也将StringBuilder清空,同时更新parseLength。 //注意:这个数目的设定会影响到解析的时间,请谨慎设置!!!! parseLength += builder.ToString().Length; builder.Clear(); number = 1; } } return(chapters); }