Ejemplo n.º 1
0
        // Get the interactive's tagline or short description
        // Previously this has been difficult to pin-point
        // However I found this on 11/01/2019, they've got it in a META tag at the top of the HTML
        // E.g. <META NAME="description" content="How will young James fare alone with his mature, womanly neighbors? ">
        public string GetInteractiveStoryShortDescription(WdcResponse wdcPayload)
        {
            Regex interactiveShortDescRegex = new Regex("(?<=<META NAME=\"description\" content=\").+?(?=\">)", RegexOptions.IgnoreCase);
            Match interactiveShortDescMatch = interactiveShortDescRegex.Match(wdcPayload.WebResponse);

            if (!interactiveShortDescMatch.Success)
            {
                log.Warn($"Couldn't find the short description for interactive story '{wdcPayload.Address}'"); // Just a warning, don't throw an exception over it
            }
            return(HttpUtility.HtmlDecode(WdcUtil.CleanHtmlSymbols(interactiveShortDescMatch.Value)));
        }
Ejemplo n.º 2
0
        // Get the interactive story's description
        public string GetInteractiveStoryDescription(WdcResponse wdcPayload)
        {
            Regex interactiveDescRegex = new Regex("(?<=<td align=left class=\"norm\">).+?(?=<\\/td>)", RegexOptions.IgnoreCase | RegexOptions.Singleline);
            Match interactiveDescMatch = interactiveDescRegex.Match(wdcPayload.WebResponse);

            if (!interactiveDescMatch.Success)
            {
                throw new WritingClientHtmlParseException($"Couldn't find the description for interactive story '{wdcPayload.Address}'", wdcPayload.Address, wdcPayload.WebResponse);
            }
            return(HttpUtility.HtmlDecode(WdcUtil.CleanHtmlSymbols(interactiveDescMatch.Value)));
        }
Ejemplo n.º 3
0
        // Get the interactive story's title
        // This method grabs it from within the <title> element, not sure if it gets truncated or not.
        public string GetInteractiveStoryTitle(WdcResponse wdcPayload)
        {
            Regex interactiveTitleRegex = new Regex("(?<=<title>).+?(?= - Writing\\.Com<\\/title>)", RegexOptions.IgnoreCase);
            Match interactiveTitleMatch = interactiveTitleRegex.Match(wdcPayload.WebResponse);

            if (!interactiveTitleMatch.Success)
            {
                throw new WritingClientHtmlParseException($"Couldn't find the title for interactive story '{wdcPayload.Address}'", wdcPayload.Address, wdcPayload.WebResponse);
            }
            return(HttpUtility.HtmlDecode(WdcUtil.CleanHtmlSymbols(interactiveTitleMatch.Value)));
        }
Ejemplo n.º 4
0
        // Get the available choices
        // This one is going to be complicated, because none of the divs or whatnot have ID's
        // First, get a chunk of the HTML that contains the choices, we'll break them down later
        public IEnumerable <WdcInteractiveChapterChoice> GetInteractiveChapterChoices(WdcResponse payload)
        {
            if (IsInteractiveChapterEnd(payload))
            {
                return(null);
            }

            var choices = new List <WdcInteractiveChapterChoice>();

            Regex chapterChoicesChunkRegex = new Regex("(?<=<b>You have the following choice(s)?:<\\/b>).*?(?=<\\/div><div id=\"end_of_choices\")",
                                                       RegexOptions.Singleline | RegexOptions.IgnoreCase);
            Match chapterChoicesChunkMatch = chapterChoicesChunkRegex.Match(payload.WebResponse);

            if (!chapterChoicesChunkMatch.Success)
            {
                throw new WritingClientHtmlParseException($"Couldn't find the HTML chunk containing choices for interactive chapter '{payload.Address}'", payload.Address, payload.WebResponse);
            }
            string chapterChoicesChunkHtml = chapterChoicesChunkMatch.Value;

            // Then try to get the individual choices
            Regex           chapterChoicesRegex   = new Regex("<a .*?href=\".+?\">.+?<\\/a>", RegexOptions.IgnoreCase);
            MatchCollection chapterChoicesMatches = chapterChoicesRegex.Matches(chapterChoicesChunkHtml);

            foreach (Match match in chapterChoicesMatches)
            {
                var    newChoice = new WdcInteractiveChapterChoice();
                string choiceUrl;

                // Get the URL
                Regex choiceUrlRegex = new Regex("(?<=href=\").+?(?=\")");
                Match choiceUrlMatch = choiceUrlRegex.Match(match.Value);
                if (!choiceUrlMatch.Success)
                {
                    throw new WritingClientHtmlParseException($"Could not find the URL of choice '{match.Value}' on interactive chapter '{payload.Address}'", payload.Address, payload.WebResponse);
                }
                choiceUrl = choiceUrlMatch.Value;

                // Get just the numbers from the URL
                newChoice.PathLink = WdcUtil.GetFinalParmFromUrl(choiceUrl);

                // Get the choice name / description
                // Get what's in between the > and the <
                int indexOfGt = match.Value.IndexOf('>');
                int indexofLt = match.Value.LastIndexOf('<') - 1;
                newChoice.Name = HttpUtility.HtmlDecode(match.Value.Substring(indexOfGt + 1, indexofLt - indexOfGt));

                choices.Add(newChoice);
            }

            return(choices.ToArray());
        }
Ejemplo n.º 5
0
        public IEnumerable <Uri> GetInteractiveChapterList(string interactiveID, Uri pathToRoot, WdcResponse wdcPayload)
        {
            var chapters = new List <Uri>();

            // Find the links to the interactive's pages
            // Create the regex that will find chapter links
            // E.g. https:\/\/www\.writing\.com\/main\/interact\/item_id\/1824771-short-stories-by-the-people\/map\/(\d)+
            string chapterLinkRegexPattern = pathToRoot.ToString() + string.Format("main/interact/item_id/{0}/map/{1}", interactiveID, @"(\d)+");

            chapterLinkRegexPattern = WdcUtil.RegexSafeUrl(chapterLinkRegexPattern);
            Regex           chapterLinkRegex = new Regex(chapterLinkRegexPattern, RegexOptions.IgnoreCase);
            MatchCollection matches          = chapterLinkRegex.Matches(wdcPayload.WebResponse);

            foreach (Match match in matches)
            {
                chapters.Add(new Uri(match.Value));
            }

            return(chapters.ToArray());
        }
Ejemplo n.º 6
0
        public WdcTitleReaderResult ReadPageTitle(string pageTitle)
        {
            var r = new WdcTitleReaderResult();

            if (string.IsNullOrEmpty(pageTitle))
            {
                return(r);
            }

            // Start by trimming off the " - Writing.com"
            Regex titleTailPattern = new Regex(" - writing\\.com", RegexOptions.IgnoreCase);

            pageTitle = titleTailPattern.Replace(pageTitle, "");

            // Look for the ": ", and split it
            var indexOfSeparator = pageTitle.IndexOf(TITLE_SEPARATOR);

            if (indexOfSeparator < 0)
            {
                // Didn't find separator, is just a simple page name
                r.StoryName = WdcUtil.CleanHtmlSymbols(pageTitle.Trim());
            }
            else
            {
                // Found separator, there are 2 parts
                var pageTitleSplit = pageTitle.Split(TITLE_SEPARATOR);

                // THe recent chapters page uses "Recent chapters: (story title)"
                bool backwards = pageTitleSplit[0] == "Recent Chapters";

                r.StoryName = WdcUtil.CleanHtmlSymbols(
                    pageTitleSplit[backwards ? 1 : 0].Trim()
                    );
                r.PageName = WdcUtil.CleanHtmlSymbols(
                    pageTitleSplit[backwards ? 0 : 1].Trim()
                    );
            }

            return(r);
        }
Ejemplo n.º 7
0
        public WdcInteractiveChapter GetInteractiveChaper(string interactiveID, string chapterPath, WdcResponse payload)
        {
            if (!WdcUtil.IsValidChapterPath(chapterPath))
            {
                throw new ArgumentException($"Chapter '{chapterPath}' is not a valid chapter path", nameof(chapterPath));
            }

            var chapter = new WdcInteractiveChapter();

            chapter.Path    = chapterPath;
            chapter.Title   = GetInteractiveChapterTitle(payload);
            chapter.Content = GetInteractiveChapterContent(payload);
            if (chapterPath != "1")
            {
                chapter.SourceChoiceTitle = GetInteractiveChapterSourceChoice(payload);                     // Only get the source choice if it's not the first chapter
            }
            else
            {
                chapter.SourceChoiceTitle = "";
            }
            chapter.LastUpdated = DateTime.Now;
            // TODO chapter author
            chapter.Author = GetInteractiveChapterAuthor(payload);

            var choices = GetInteractiveChapterChoices(payload);

            if (choices == null)
            {
                chapter.IsEnd = true;
            }
            else
            {
                chapter.Choices.AddRange(choices);
            }

            return(chapter);
        }