Exemple #1
0
        public static List <string> ParseChapter(string html)
        {
            int start = html.IndexOf("<div class='chapter_navigator'>", StringComparison.OrdinalIgnoreCase);

            if (start < 0)
            {
                start = html.IndexOf("<div class='mobile_chapter_navigator'>", StringComparison.OrdinalIgnoreCase) > 0
                                                        ? html.IndexOf("<div class='mobile_chapter_navigator'>", StringComparison.OrdinalIgnoreCase)
                                                        : html.IndexOf("<div id='story_detail'", StringComparison.OrdinalIgnoreCase);
            }
            start = html.IndexOf("ms_chapter", start + 1, StringComparison.OrdinalIgnoreCase);
            start = start < 0 ? -1 : html.IndexOf(">", start + 1, StringComparison.OrdinalIgnoreCase);
            int end = start < 0 ? -1 : html.IndexOf("</div>", start + 1, StringComparison.OrdinalIgnoreCase);

            string title = (start > -1 && end > -1 ? html.Substring(start + 1, end - start - 1).Trim() : "").GetNormalized();

            while (title.IndexOf("  ") > -1)
            {
                title = title.Replace("  ", " ");
            }

            if (!title.Equals(""))
            {
                start = html.IndexOf("<div", start + 1, StringComparison.OrdinalIgnoreCase);
                if (title.IndexOf("<div id='dropcap", StringComparison.OrdinalIgnoreCase) > -1 || title.IndexOf("<div id ='dropcap", StringComparison.OrdinalIgnoreCase) > -1)
                {
                    title = "";
                }
                else if (title.ToLower().Equals("null"))
                {
                    title = "";
                }
            }
            else
            {
                start = html.IndexOf("<span class='dropcap", start + 1, StringComparison.OrdinalIgnoreCase);
                if (start < 0)
                {
                    if (html.StartsWith("<div class='ms_text"))
                    {
                        start = 0;
                    }
                    else
                    {
                        start = html.IndexOf("ms_chapter", start + 1, StringComparison.OrdinalIgnoreCase) > 0
                                                                        ? html.IndexOf("ms_chapter", start + 1, StringComparison.OrdinalIgnoreCase)
                                                                        : html.IndexOf("<div style='height: 50px;'></div>", end + 1, StringComparison.OrdinalIgnoreCase) < html.IndexOf("<div class='ms_text'>", end + 1, StringComparison.OrdinalIgnoreCase)
                                                                                ? html.IndexOf("<div style='height: 50px;'></div>", end + 1, StringComparison.OrdinalIgnoreCase)
                                                                                : -1;
                        start = start < 0 ? html.IndexOf("<div class='ms_text'>", end + 1, StringComparison.OrdinalIgnoreCase) : html.IndexOf("</div>", start + 1, StringComparison.OrdinalIgnoreCase) + 6;
                    }
                }
            }

            end = html.IndexOf("<div style='height: 50px;'></div>", start + 1, StringComparison.OrdinalIgnoreCase);
            if (end < 0)
            {
                end = html.IndexOf("<div class='navigator_bottom'>", start + 1, StringComparison.OrdinalIgnoreCase);
                if (end < 0)
                {
                    end = html.IndexOf("<div class='mobile_chapter_navigator'>", start + 1, StringComparison.OrdinalIgnoreCase);
                }
                if (end < 0)
                {
                    end = html.IndexOf("</form>", start + 1, StringComparison.OrdinalIgnoreCase);
                }
            }

            string body = start > -1 && end > -1 ? html.Substring(start, end - start).Trim() : "";

            body = body.Replace(StringComparison.OrdinalIgnoreCase, "<div class='ms_text'>", "<p>").Replace(StringComparison.OrdinalIgnoreCase, "<div", "<p").Replace(StringComparison.OrdinalIgnoreCase, "</div>", "</p>");

            if (body.StartsWith("<span class='dropcap", StringComparison.OrdinalIgnoreCase))
            {
                body = "<p>" + body;
            }

            start = body.IndexOf("<p", StringComparison.OrdinalIgnoreCase);
            end   = body.IndexOf("</p>", start + 1, StringComparison.OrdinalIgnoreCase);
            while (start > -1 && end > -1)
            {
                int dropcap = body.IndexOf("'dropcap", start + 1, StringComparison.OrdinalIgnoreCase);
                if (dropcap > -1 && dropcap < end)
                {
                    string paragraph = body.Substring(start, end - start + 4);
                    body = body.Remove(start, end - start + 4);

                    string dropcapChar = "";
                    dropcap = paragraph.IndexOf("class=");
                    if (dropcap > 0)
                    {
                        dropcap    += 7;
                        dropcapChar = paragraph.Substring(dropcap - 1, 1);
                        end         = paragraph.IndexOf(dropcapChar, dropcap + 1);
                        dropcapChar = paragraph.Substring(dropcap, end - dropcap);
                        dropcapChar = dropcapChar[dropcapChar.Length - 1].ToString();
                    }
                    paragraph = Utility.RemoveTag(Utility.RemoveTag(paragraph, "p"), "span").Trim();
                    if (paragraph.Equals(""))
                    {
                        paragraph = dropcapChar;
                    }
                    body = body.Insert(start, (body.StartsWith("<p>") ? "" : "<p>") + paragraph);
                }

                start = body.IndexOf("<p", start + 1, StringComparison.OrdinalIgnoreCase);
                end   = body.IndexOf("</p>", start + 1, StringComparison.OrdinalIgnoreCase);
            }

            body = ISach.NormalizeBody(body.Replace(" \n", "").Replace("\r", "").Replace("\n", ""));

            if (title.Equals("") &&
                (body.StartsWith("<p>Quyển ", StringComparison.OrdinalIgnoreCase) ||
                 body.StartsWith("<p>Phần ", StringComparison.OrdinalIgnoreCase) || body.StartsWith("<p>Chương ", StringComparison.OrdinalIgnoreCase)))
            {
                start = 0;
                end   = body.IndexOf("</p>") + 4;
                title = Utility.RemoveTag(body.Substring(0, end - start), "p").Trim();
                body  = body.Remove(0, end - start);
            }

            return(new List <string>()
            {
                title, body
            });
        }
Exemple #2
0
 public static string NormalizeBody(string input)
 {
     return(ISach.NormalizeBody(input, -1));
 }