List <string> ParseChapter(List <string> contents) { if (contents == null || contents.Count < 3) { return(null); } var title = UtilityService.RemoveWhitespaces(contents[1].Trim()).Replace("\r", "").Replace("\n", "").Replace("\t", ""); var start = title.PositionOf("<h4"); start = title.PositionOf(">", start + 1); var end = title.PositionOf("</h4>", start + 1); if (start > 0 && end > 0) { title = title.Substring(start + 1, end - start - 1).Trim().Replace("♦", " "); "Đồng tác giả|Dịch giả|Người dịch|Dịch viện|Chuyển ngữ|Dịch ra|Anh dịch|Dịch thuật|Bản dịch|Hiệu đính|Biên Tập|Biên soạn|đánh máy bổ sung|Nguyên tác|Nguyên bản|Dịch theo|Dịch từ|Theo bản|Biên dịch|Tổng Hợp|Tủ Sách|Tuyển tập|Sách Xuất Bản Tại|Chủ biên|Chủ nhiệm".Split('|') .ForEach(excluded => { start = title.IndexOf(excluded, StringComparison.OrdinalIgnoreCase); if (start > -1) { end = title.IndexOf("<br>", start, StringComparison.OrdinalIgnoreCase); if (end < 0) { end = title.Length - 4; } title = title.Remove(start, end - start + 4).Trim(); } }); while (title.IsStartsWith("<br>")) { title = title.Substring(4).Trim(); } while (title.IsEndsWith("<br>")) { title = title.Substring(0, title.Length - 4).Trim(); } title = title.Replace("<br>", ": ").Replace("<BR>", " - ").Trim(); } start = title.PositionOf("<div class=\"hr"); if (start > 0) { var tit = ""; do { start = title.PositionOf("<span", start + 1); start = start < 0 ? -1 : title.PositionOf(">", start + 1) + 1; end = start < 0 ? -1 : title.PositionOf("</span>", start); var t = start > 0 && end > 0 ? title.Substring(start, end - start).Trim() : ""; if (!t.Equals("") && !t.IsEquals(this.Title) && !t.IsEquals(this.Author)) { tit += (tit != "" ? "<br>" : "") + t; } } while (start > 0 && end > 0); title = tit.Replace("<br>", ": ").Replace("<BR>", " - ").Trim(); } title = UtilityService.ClearTag(title, "img").Trim(); title = UtilityService.RemoveTag(title, "br").Trim(); title = UtilityService.RemoveTag(title, "p").Trim(); title = UtilityService.RemoveTag(title, "i").Trim(); title = UtilityService.RemoveTag(title, "b").Trim(); title = UtilityService.RemoveTag(title, "em").Trim(); title = UtilityService.RemoveTag(title, "strong").Trim(); while (title.IndexOf(" ") > 0) { title = title.Replace(" ", " "); } while (title.IndexOf("- -") > 0) { title = title.Replace("- -", "-"); } while (title.IndexOf(": -") > 0) { title = title.Replace(": -", ":"); } title = title.Trim().Replace("( ", "(").Replace(" )", ")").Replace("- (", "(").Replace(": :", ":").GetNormalized(); while (title.StartsWith(")") || title.StartsWith("]")) { title = title.Right(title.Length - 1).Trim(); } while (title.EndsWith("(") || title.EndsWith("[")) { title = title.Left(title.Length - 1).Trim(); } while (title.StartsWith(":")) { title = title.Right(title.Length - 1).Trim(); } while (title.EndsWith(":")) { title = title.Left(title.Length - 1).Trim(); } if (title.Equals(title.ToUpper())) { title = title.ToLower().GetNormalized(); } var body = UtilityService.RemoveWhitespaces(contents[2].Trim()).Replace(StringComparison.OrdinalIgnoreCase, "\r", "").Replace(StringComparison.OrdinalIgnoreCase, "\n", "").Replace(StringComparison.OrdinalIgnoreCase, "\t", ""); body = UtilityService.RemoveTagAttributes(body, "p"); body = UtilityService.RemoveTagAttributes(body, "div"); body = UtilityService.ClearTag(body, "script"); body = UtilityService.ClearComments(body); body = UtilityService.RemoveMsOfficeTags(body); body = body.Replace(StringComparison.OrdinalIgnoreCase, "<div></div>", "</p><p>").Trim(); if (body.IsStartsWith("<div") && !body.IsEndsWith("</div>")) { body = body.Remove(0, body.IndexOf(">") + 1); body = "<p>" + body + "</p>"; } while (body.IsStartsWith("<div>")) { body = body.Substring(5).Trim(); } while (body.IsEndsWith("</div>")) { body = body.Substring(0, body.Length - 6).Trim(); } start = body.PositionOf("<?xml"); while (start > -1) { end = body.PositionOf(">", start); body = body.Remove(start, end - start + 1); start = body.PositionOf("<?xml"); } "strong|em|p|img".Split('|') .ForEach(tag => body = body.Replace(StringComparison.OrdinalIgnoreCase, "<" + tag, "<" + tag).Replace(StringComparison.OrdinalIgnoreCase, "</" + tag + ">", "</" + tag + ">")); body = body.Replace(StringComparison.OrdinalIgnoreCase, "<DIV class=\"truyen_text\"></DIV></STRONG>", "</STRONG>\n<p>"); body = body.Replace(StringComparison.OrdinalIgnoreCase, "<DIV class=\"truyen_text\"></DIV></EM>", "</EM>\n<p>"); var headingTags = "h1|h2|h3|h4|h5|h6".Split('|'); headingTags.ForEach(tag => { body = body.Replace(StringComparison.OrdinalIgnoreCase, "<" + tag + "><div class=\"truyen_text\"></div>", "<" + tag + "> ").Replace(StringComparison.OrdinalIgnoreCase, "<" + tag + "><div class=\"truyen_text\"> </div>", "<" + tag + ">"); body = UtilityService.RemoveTagAttributes(body, tag); }); body = body.Replace(StringComparison.OrdinalIgnoreCase, "<div class=\"truyen_text\"></div>", "</p><p>").Replace(StringComparison.OrdinalIgnoreCase, "<div class=\"truyen_text\"> </div>", "</p><p>"); body = body.Replace(StringComparison.OrdinalIgnoreCase, "<div class=\"truyen_text\">", "<p>").Replace(StringComparison.OrdinalIgnoreCase, "<div", "<p").Replace(StringComparison.OrdinalIgnoreCase, "</div>", "</p>"); body = body.Replace(StringComparison.OrdinalIgnoreCase, "</li></p>", "</li>").Replace(StringComparison.OrdinalIgnoreCase, "<p><li>", "<li>"); body = body.Replace(StringComparison.OrdinalIgnoreCase, "<p></ul></p>", "</ul>").Replace(StringComparison.OrdinalIgnoreCase, "<p></ol></p>", "</ol>"); body = body.Replace(StringComparison.OrdinalIgnoreCase, "<i class=\"calibre7\"", "<i").Replace(StringComparison.OrdinalIgnoreCase, "<img class=\"calibre1\"", "<img").Replace(StringComparison.OrdinalIgnoreCase, "<b class=\"calibre4\"", "<b"); body = body.Replace(StringComparison.OrdinalIgnoreCase, "<p> <b>", "<p><b>").Replace(StringComparison.OrdinalIgnoreCase, ". </b>", ".</b> ").Replace(StringComparison.OrdinalIgnoreCase, ". </i>", ".</i> "); body = body.Replace(StringComparison.OrdinalIgnoreCase, "<p align=\"center\"> <", "<p align=\"center\"><").Replace(StringComparison.OrdinalIgnoreCase, "<p> <", "<p><").Replace(StringComparison.OrdinalIgnoreCase, "<p> ", "<p>"); body = body.Replace(StringComparison.OrdinalIgnoreCase, "<p><p>", "<p>").Replace(StringComparison.OrdinalIgnoreCase, "</p></p>", "</p>").Replace(StringComparison.OrdinalIgnoreCase, ". </p> ", ".</p>"); headingTags.ForEach(tag => { body = body.Replace(StringComparison.OrdinalIgnoreCase, "<" + tag + "> <", "<" + tag + "><").Replace(StringComparison.OrdinalIgnoreCase, "> </" + tag + ">", "></" + tag + ">"); body = body.Replace(StringComparison.OrdinalIgnoreCase, "<" + tag + "></" + tag + ">", "").Replace(StringComparison.OrdinalIgnoreCase, "<" + tag + "> </" + tag + ">", ""); body = body.Replace(StringComparison.OrdinalIgnoreCase, "<" + tag + "></p>", "<" + tag + ">").Replace(StringComparison.OrdinalIgnoreCase, "<p></" + tag + ">", "</" + tag + ">"); body = body.Replace(StringComparison.OrdinalIgnoreCase, "<" + tag + "><strong>", "<" + tag + ">").Replace(StringComparison.OrdinalIgnoreCase, "</strong></" + tag + ">", "</" + tag + ">"); body = body.Replace(StringComparison.OrdinalIgnoreCase, "<" + tag + "><em>", "<" + tag + ">").Replace(StringComparison.OrdinalIgnoreCase, "</em></" + tag + ">", "</" + tag + ">"); body = body.Replace(StringComparison.OrdinalIgnoreCase, "<p><" + tag + ">", "<" + tag + ">").Replace(StringComparison.OrdinalIgnoreCase, "</" + tag + "></p>", "</" + tag + ">").Replace(StringComparison.OrdinalIgnoreCase, "<" + tag + "></p>", ""); }); headingTags.ForEach(tag => { body = body.Replace(StringComparison.OrdinalIgnoreCase, "<p><" + tag + ">", "<" + tag + ">").Replace(StringComparison.OrdinalIgnoreCase, "</" + tag + "></p>", "</" + tag + ">"); start = body.PositionOf("<" + tag + ">"); while (start > -1) { end = body.PositionOf("</" + tag + ">", start + 1); var heading = body.Substring(start + 4, end - start - 4); body = body.Remove(start, end - start + 5); var pos = heading.PositionOf("<"); while (pos > -1) { end = heading.PositionOf(">", pos); if (end > 0) { heading = heading.Remove(pos, end - pos + 1); } pos = heading.PositionOf("<"); } body = body.Insert(start, "<" + tag + ">" + heading + "</" + tag + ">"); start = body.PositionOf("<" + tag + ">", start + 1); } }); start = body.PositionOf("<p id=\"chuhoain\""); while (start > -1) { end = body.PositionOf("</span><p>", start); var img = body.PositionOf("<img", start); if (start > -1 && end > start && img > start) { var imgStart = body.PositionOf("src=\"", img) + 5; var imgEnd = -1; if (imgStart < 0) { imgStart = body.PositionOf("src='", img) + 5; imgEnd = body.PositionOf("'", imgStart); } else { imgEnd = body.PositionOf("\"", imgStart); } var imgChar = body.Substring(imgStart, imgEnd - imgStart); body = body.Remove(start, end - start + 10); body = body.Insert(start, "<p>" + this.GetImageCharacter(imgChar)); } start = body.PositionOf("<p id=\"chuhoain\"", start + 1); } start = body.PositionOf("<img"); while (start > -1) { end = body.PositionOf(">", start + 1); var img = body.PositionOf("src=\"https://vnthuquan.net/userfiles/images/chu%20cai/cotich", start); if (img < 0) { img = body.PositionOf("src='https://vnthuquan.net/userfiles/images/chu%20cai/cotich", start); } if (img > -1 && end > img) { end = body.PositionOf("\"", img + 5); if (end < 0) { end = body.PositionOf("'", img + 5); } var imgChar = body.Substring(img + 5, end - img + 5); end = body.PositionOf("<p>", start); if (end < 0) { end = body.PositionOf(">", start) + 1; } else { end += 3; } string str = body.Substring(start, end - start); body = body.Remove(start, end - start); body = body.Insert(start, this.GetImageCharacter(imgChar)); } start = body.PositionOf("<img", start + 1); } if (body.Equals("</p><p>")) { body = ""; } else { body = this.NormalizeChapterBody(body); body = body.Replace(StringComparison.OrdinalIgnoreCase, "<h1>", "<h2>").Replace(StringComparison.OrdinalIgnoreCase, "</h1>", "</h2>"); } return(new List <string> { title, body }); }