コード例 #1
0
ファイル: Volume.cs プロジェクト: EusthEnoptEron/BakaPrince
        /// <summary>
        /// Parses volume information based on its title element.
        /// </summary>
        /// <param name="hEl"></param>
        public Volume(CQ ul)
        {
            ul.Find("li").Each((el) =>
            {
                var link = new CQ(el).Find("a").First();
                if (link.Count() == 0) return;

                var name = Regex.Replace(link.Attr("href"), "^.+title=", "");

                if (name.Contains("Illustrations") && _illustrationsPage == null)
                {
                    _illustrationsPage = new IllustrationsPage(name);
                }
                else
                {
                    _chapters.Add(new Chapter(name, link.Text()));
                }
            });
        }
コード例 #2
0
        private void ImportContentImages(string content, string postTitle, string blogTitle)
        {
            CQ document = content;

            if (!String.IsNullOrWhiteSpace(document.Render()))
            {
                var images = document.Select("img");

                images.Each((element) =>
                {
                    CQ cqElement = new CQ(element);

                    string imageSrc = cqElement.Attr("src");
                    string altText = cqElement.Attr("alt");
                    string imgFile = Path.GetFileName(imageSrc);

                    ImportImage(imageSrc, postTitle, blogTitle);
                });
            }
        }
コード例 #3
0
ファイル: Page.cs プロジェクト: EusthEnoptEron/BakaPrince
        private string PrepareHtml(string html)
        {
            // Make title
            if (!Notitle)
            {
                html = "<h2>" + Title + "</h2>" + html;
            }
             html = "<span class=\"invisible chapterstart\">" + (Noheader ? "" : Title) + "</span>" + html;

            // Make sure page break is set
            if (Pagebreak)
            {
                html = "<span class=\"invisible pagebreak\"></span>" + html;
            }

            CQ dom = CQ.CreateFragment("<div class=\"content\">" + html + "</div>");

            // Remove next/prev table
            dom.Find("table:contains('Forward'):contains('Back')").Last().Remove();

            // Find images
            foreach(IDomElement aNode in dom.Find("a.image")) {
                var a = new CQ(aNode);
                var img = new CQ(a.Find("img"));

                var src = img.Attr("src").Replace("/thumb", "");
                src = Regex.Replace(src, @"[.](jpg|png|gif)\/.+$", @".$1", RegexOptions.IgnoreCase);

                var image = new Image(src, new Uri(Wiki)) {Sashie = true};

                CQ node = a.Closest(".thumb").Add(a).First();

                if (_images.Count == 0 && EntryPicture)
                {
                    // We can view it as a full-fledged image since we don't need to worry about text-flow
                    image.Sashie = false;
                    dom.Before(image.Html);
                }
                else
                {
                    node.Before(image.Html);
                    //node.After("<span class=\"image-stopper\"></span>");
                }

                node.Remove();

                _images.Add(image);
            }

            // Catch references
            foreach (IDomElement supNode in dom.Find("sup.reference"))
            {
                var sup = new CQ(supNode);
                CQ footnote = "<span class=\"fn\"></span>";
                CQ oldFootnote = dom.Find("#" + sup.Attr("id").Replace("_ref-", "_note-"));

                footnote.Html(oldFootnote.Find(".reference-text").Html());

                oldFootnote.Remove();
                sup.Before(footnote).Remove();
            }
            // Remove possible reference title
            dom.Find(".references").Prev(":header").Remove();

            // Remove edit links
            dom.Find(".editsection, #toc").Remove();

            // Make smart quotes
            dom.Find("p:contains('\"'), p:contains(\"'\"), li:contains('\"'), li:contains(\"'\")").Each((el) =>
            {
                CQ p = new CQ(el);
                string pHtml = p.Html();

                // Replace quotes
                if (Regex.Matches(pHtml, "&quot;").Count % 2 == 0)
                {

                    pHtml = Regex.Replace(pHtml, "&quot;(.+?)&quot;", "“$1”");
                }
                else
                {
                    Console.WriteLine("NOTICE: possible quotes problem ({0})", pHtml.Trim());
                }

                // Replace single quotes (\b doesn't work)
                pHtml = Regex.Replace(pHtml, "(?<!\\w)'(.+?)'(?!\\w)", "‘$1’");
                // Replace apostrophes
                pHtml = Regex.Replace(pHtml, "'", "’");

                p.Html(pHtml);
            });

            // Parse Ruby
            dom.Find("span > span > span").Each(el =>
            {
                var rubySpan = new CQ(el);
                if(rubySpan.Css("position") == "relative" && rubySpan.Css("left") == "-50%") {
                    var textSpan = rubySpan.Parent().Siblings("span");
                    var containerSpan = textSpan.Parent();
                    if (textSpan.Length == 1 && containerSpan.Css("white-space") == "nowrap")
                    {
                        // Okay, this is ruby.
                        var ruby = new CQ("<ruby>");
                        ruby.Html(textSpan.Html());
                        ruby.Append(new CQ("<rp>(</rp>"));
                        ruby.Append(new CQ("<rt>").Html(rubySpan.Html()));
                        ruby.Append(new CQ("<rp>)</rp>"));

                        containerSpan.ReplaceWith(
                            ruby
                        );

                    }
                }
            });

            // Hakomari specific
            foreach (IDomElement star in dom.Find("p:contains(✵)"))
            {
                star.InnerHTML = "<img src=\"" + (new Uri( Helper.GetAssetsPath() + "blackstar.jpg" )) + "\">";
            }

            return dom.Render();
        }
コード例 #4
0
ファイル: Program.cs プロジェクト: lifk/ScraperTest
        public void getListOfEpisodes(object sender, DownloadStringCompletedEventArgs e)
        {
            var dom = CQ.Create(e.Result);

            var list = dom[".chapters_list tbody .lang_English td:first-child a"];
            int i = 0;
            foreach (var element in list)
            {
                i++;
                var test = new CQ(element);
                Console.WriteLine(test.Attr("href") + "/1");
                GetEpisodeImageUrls(test.Attr("href")+"/1", i);
            }
        }
コード例 #5
0
        public static string LinkContentImages(string content)
        {
            CQ document = content;

            if (!String.IsNullOrWhiteSpace(document.Render()))
            {
                var images = document.Select("img");

                images.Each((element) =>
                {
                    CQ cqElement = new CQ(element);

                    cqElement.BuildSitefinityReference(LinkedImageTagType.Image);
                });

                // Repeat this process for any links that point to image
                // resources.
                var imageLinks = document.Select("a");

                imageLinks.Each((element) =>
                {
                    CQ cqElement = new CQ(element);

                    string imageSrc = cqElement.Attr("href");

                    if (imageSrc.EndsWith(".jpg") ||
                        imageSrc.EndsWith(".png") ||
                        imageSrc.EndsWith(".gif") ||
                        imageSrc.EndsWith(".tiff"))
                    {
                        cqElement.BuildSitefinityReference(LinkedImageTagType.Anchor);
                    }
                });
            }

            return document.Render();
        }
コード例 #6
0
        private void ImportContentImages(string content, string postTitle, string blogTitle)
        {
            CQ document = content;

            if (!String.IsNullOrWhiteSpace(document.Render()))
            {
                var images = document.Select("img");

                images.Each((element) =>
                {
                    CQ cqElement = new CQ(element);

                    string imageSrc = cqElement.Attr("src");
                    string altText = cqElement.Attr("alt");
                    string imgFile = Path.GetFileName(imageSrc);

                    ImportImage(imageSrc, postTitle, blogTitle);
                });

                // Repeat this process for any links that point to image
                // resources.
                var imageLinks = document.Select("a");

                imageLinks.Each((element) =>
                {
                    CQ cqElement = new CQ(element);

                    string imageSrc = cqElement.Attr("href");

                    if (imageSrc.EndsWith(".jpg") ||
                        imageSrc.EndsWith(".png") ||
                        imageSrc.EndsWith(".gif") ||
                        imageSrc.EndsWith(".tiff"))
                    {
                        ImportImage(imageSrc, postTitle, blogTitle);
                    }
                });
            }
        }
コード例 #7
0
        private CQ Eval(Step step, CQ html)
        {
            string[] separators = { " " };

            // Multiple filters can be combined with a space
            var filters = step.Filter.Split(separators, StringSplitOptions.RemoveEmptyEntries);

            switch (step.Action)
            {
                case "select":
                    var result = string.Empty;
                    foreach (var filter in filters)
                    {
                        result += html[filter].RenderSelection().Trim() + "\n";
                    }

                    return result;
                case "remove":
                    foreach (var filter in filters)
                    {
                        var selection = html[filter];
                        selection.Remove();
                    }

                    return html;
                case "attr":
                    return html.Attr(filters[0]);
                default:
                    return html;
            }
        }