// Get the author // <a title="Username: rpcity Member Since: July 4th, 2002 Click for links!" style="font - size:1em; font - weight:bold; cursor: pointer; ">SmittySmith</a> public WdcAuthor GetInteractiveChapterAuthor(WdcResponse payload) { Regex chapterAuthorChunkRegex = new Regex("<a title=\" Username: .*?<\\/a>", RegexOptions.IgnoreCase | RegexOptions.Singleline); Match chapterAuthorChunkMatch = chapterAuthorChunkRegex.Match(payload.WebResponse); if (!chapterAuthorChunkMatch.Success) { throw new WritingClientHtmlParseException($"Couldn't find the HTML chunk containing the author for the interactive chapter '{payload.Address}'", payload.Address, payload.WebResponse); } string chapterAuthorChunk = chapterAuthorChunkMatch.Value; // Get the author username Regex chapterAuthorUsernameRegex = new Regex("(?<=Username: )[a-zA-Z]+"); Match chapterAuthorUsernameMatch = chapterAuthorUsernameRegex.Match(chapterAuthorChunk); string chapterAuthorUsername = chapterAuthorUsernameMatch.Value; // Get the author display name Regex chapterAuthorNameRegex = new Regex("(?<=>).+?(?=<)"); Match chapterAuthorNameMatch = chapterAuthorNameRegex.Match(chapterAuthorChunk); string chapterAuthorName = chapterAuthorNameMatch.Value; return(new WdcAuthor() { Name = chapterAuthorName, Username = chapterAuthorUsername }); }
public bool IsInteractiveChapterEnd(WdcResponse payload) { //Regex chapterEndRegex = new Regex("<big>THE END.<\\/big>");// Turns out this doesn't work, because they HTML tagging is sloppy and overlaps. <i><b>THE END.</i></b> Regex chapterEndRegex = new Regex(">You have come to the end of the story. You can:<\\/"); return(chapterEndRegex.IsMatch(payload.WebResponse)); }
public async Task WdcReaderInteractiveStoryShortDescription() { WdcResponse payload = new WdcResponse(); payload.WebResponse = TestUtil.GetDataFile("sample_set_13_06_2019.Looking for adventure - homepage - logged in.html"); var result = _reader.GetInteractiveStoryShortDescription(payload); Assert.AreEqual("Four people looking for adventure, and dealing with past demons", result); }
public async Task WdcReaderInteractiveStoryTitle() { WdcResponse payload = new WdcResponse(); payload.WebResponse = TestUtil.GetDataFile("sample_set_13_06_2019.Looking for adventure - homepage - logged in.html"); var result = _reader.GetInteractiveStoryTitle(payload); Assert.AreEqual("Looking for adventure", result); }
public async Task WdcReaderInteractiveChapterFirstPageLoggedIn() { // Expected results var expectedChapter = new WdcInteractiveChapter(); expectedChapter.Path = "1"; expectedChapter.Title = "The Great War"; expectedChapter.SourceChoiceTitle = string.Empty; expectedChapter.Content = TestUtil.GetDataFile("expected_set_13_06_2019.WdcReaderInteractiveChapter1_Content.txt"); expectedChapter.IsEnd = false; expectedChapter.Author = new WdcAuthor() { Name = "The Nameless Hermit", Username = "******", }; expectedChapter.Choices.Add(new WdcInteractiveChapterChoice() { PathLink = "11", Name = "Be Jace" }); expectedChapter.Choices.Add(new WdcInteractiveChapterChoice() { PathLink = "12", Name = "Be Rhea" }); expectedChapter.Choices.Add(new WdcInteractiveChapterChoice() { PathLink = "13", Name = "Be Marek" }); expectedChapter.Choices.Add(new WdcInteractiveChapterChoice() { PathLink = "14", Name = "Be Tara" }); WdcResponse payload = new WdcResponse(); payload.WebResponse = TestUtil.GetDataFile("sample_set_13_06_2019.Looking for adventure - chapter 1 - logged in.html"); payload.Address = "https://www.writing.com/main/interact/item_id/209084-Looking-for-adventure/map/1"; WdcInteractiveChapter testChapter = _reader.GetInteractiveChaper("TEST", expectedChapter.Path, payload); // Tests //Assert.AreEqual(expectedChapterTitle, chapterResult.Title); //Assert.AreEqual(expectedChapterSourceTitle, chapterResult.SourceChoiceTitle); //Assert.AreEqual(expectedChapterContent, chapterResult.Content); //Assert.AreEqual(expectedIsEnd, chapterResult.IsEnd); //Assert.AreEqual(expectedAuthorName, chapterResult.Author.Name); //Assert.AreEqual(expectedAuthorUsername, chapterResult.Author.Username); //for (var i=0; i < chapterResult.Choices.Count; i++) //{ // Assert.AreEqual(expectedChoices[i].Name, chapterResult.Choices[i].Name, "Chapter choice name doesn't match"); // Assert.AreEqual(expectedChoices[i].PathLink, chapterResult.Choices[i].PathLink, "Chapter choice path doesn't match"); //} CompareInteractiveChapters(expectedChapter, testChapter); }
// Get the chapter content // WDC has changed the layout, and doesn't have "KonaBody" in it anymore // It looks like they've just set it to <div class=""> in the HTML, and that's the only instance of an empty class private string GetInteractiveChapterContentM2(WdcResponse payload) { Regex chapterContentRegex = new Regex("(?<=<div class=\"\">).+?(?=<\\/div>)", RegexOptions.IgnoreCase | RegexOptions.Singleline); Match chapterContentMatch = chapterContentRegex.Match(payload.WebResponse); if (!chapterContentMatch.Success) { throw new WritingClientHtmlParseException($"Couldn't find the content for the interactive chapter '{payload.Address}'", payload.Address, payload.WebResponse); } return(HttpUtility.HtmlDecode(chapterContentMatch.Value)); }
public async Task WdcReaderInteractiveStoryAuthor() { WdcResponse payload = new WdcResponse(); payload.WebResponse = TestUtil.GetDataFile("sample_set_13_06_2019.Looking for adventure - homepage - logged in.html"); WdcAuthor result = _reader.GetInteractiveStoryAuthor(payload); Assert.AreEqual("The Nameless hermit", result.Name, "The author's name is not what was expected"); Assert.AreEqual("blackdragon", result.Username, "The author's username is not what was expected"); }
// Search for the choice that lead to this chapter // This usually has the more fleshed out title, as the legit title can sometimes be truncated public string GetInteractiveChapterSourceChoice(WdcResponse payload) { Regex chapterSourceChoiceRegex = new Regex(@"(?<=This choice: <b>).*?(?=<\/b>)", RegexOptions.IgnoreCase | RegexOptions.Singleline); Match chapterSourceChoiceMatch = chapterSourceChoiceRegex.Match(payload.WebResponse); if (!chapterSourceChoiceMatch.Success) // If we can't find it, and it's not the first chapter { throw new WritingClientHtmlParseException($"Couldn't find the interactive chapter's source choice and this isn't the first chapter, for chapter '{payload.Address}'", payload.Address, payload.WebResponse); } return(HttpUtility.HtmlDecode(chapterSourceChoiceMatch.Value)); }
// Get the interactive story's title // This method grabs it from within the <title> element, not sure if it gets truncated or not. public string GetInteractiveStoryTitle(WdcResponse wdcPayload) { Regex interactiveTitleRegex = new Regex("(?<=<title>).+?(?= - Writing\\.Com<\\/title>)", RegexOptions.IgnoreCase); Match interactiveTitleMatch = interactiveTitleRegex.Match(wdcPayload.WebResponse); if (!interactiveTitleMatch.Success) { throw new WritingClientHtmlParseException($"Couldn't find the title for interactive story '{wdcPayload.Address}'", wdcPayload.Address, wdcPayload.WebResponse); } return(HttpUtility.HtmlDecode(WdcUtil.CleanHtmlSymbols(interactiveTitleMatch.Value))); }
// Get the interactive story's description public string GetInteractiveStoryDescription(WdcResponse wdcPayload) { Regex interactiveDescRegex = new Regex("(?<=<td align=left class=\"norm\">).+?(?=<\\/td>)", RegexOptions.IgnoreCase | RegexOptions.Singleline); Match interactiveDescMatch = interactiveDescRegex.Match(wdcPayload.WebResponse); if (!interactiveDescMatch.Success) { throw new WritingClientHtmlParseException($"Couldn't find the description for interactive story '{wdcPayload.Address}'", wdcPayload.Address, wdcPayload.WebResponse); } return(HttpUtility.HtmlDecode(WdcUtil.CleanHtmlSymbols(interactiveDescMatch.Value))); }
// Get the interactive's tagline or short description // Previously this has been difficult to pin-point // However I found this on 11/01/2019, they've got it in a META tag at the top of the HTML // E.g. <META NAME="description" content="How will young James fare alone with his mature, womanly neighbors? "> public string GetInteractiveStoryShortDescription(WdcResponse wdcPayload) { Regex interactiveShortDescRegex = new Regex("(?<=<META NAME=\"description\" content=\").+?(?=\">)", RegexOptions.IgnoreCase); Match interactiveShortDescMatch = interactiveShortDescRegex.Match(wdcPayload.WebResponse); if (!interactiveShortDescMatch.Success) { log.Warn($"Couldn't find the short description for interactive story '{wdcPayload.Address}'"); // Just a warning, don't throw an exception over it } return(HttpUtility.HtmlDecode(WdcUtil.CleanHtmlSymbols(interactiveShortDescMatch.Value))); }
public async Task WdcReaderInteractiveStoryDescription() { WdcResponse payload = new WdcResponse(); payload.WebResponse = TestUtil.GetDataFile("sample_set_13_06_2019.Looking for adventure - homepage - logged in.html"); var result = _reader.GetInteractiveStoryDescription(payload); var expected = TestUtil.GetDataFile("expected_set_13_06_2019.WdcReaderInteractiveStoryDescription-ExpectedResult_13_06_2019.txt"); Assert.AreEqual(expected, result); }
public async Task <WdcResponse> GetInteractiveRecentAdditions(string interactiveID, CancellationToken ct) { Uri recentAdditionsUri = GetPathToInteractiveRecentAdditions(interactiveID); _log.DebugFormat("Getting interactive story recent additions: {0}", recentAdditionsUri); var r = new WdcResponse(); r.Address = recentAdditionsUri.ToString(); r.WebResponse = await GetWdcPage(recentAdditionsUri, ct); return(r); }
public async Task <WdcResponse> GetInteractiveOutline(string interactiveID, CancellationToken ct) { Uri outlineUri = GetPathToInteractiveOutline(interactiveID); _log.DebugFormat("Getting interactive story outline: {0}", outlineUri); var r = new WdcResponse(); r.Address = outlineUri.ToString(); r.WebResponse = await GetWdcPage(outlineUri, ct); return(r); }
public async Task <WdcResponse> GetInteractiveChapter(string interactiveID, string chapterID, CancellationToken ct) { Uri chapterUri = GetPathToInteractiveChapter(interactiveID, chapterID); _log.DebugFormat("Getting interactive story chapter: {0}", chapterUri); var r = new WdcResponse(); r.Address = chapterUri.ToString(); r.WebResponse = await GetWdcPage(chapterUri, ct); return(r); }
// Get chapter title // Method 2. Get it from between <big><big><b>...</b></big></big> // There are other isntances of the <big><b> tags in use, but only the chapter title gets wrapped in 2x of them // Isn't perfect, but until the website layout changes, it'll work // TODO: It doesn't work anymore :( chapter titles aren't being read. private string GetInteractiveChapterTitleM2(WdcResponse payload) { string chapterTitleRegexPattern = @"(?<=<big><big><b>).*?(?=<\/b><\/big><\/big>)"; Regex chapterTitleRegex = new Regex(chapterTitleRegexPattern, RegexOptions.IgnoreCase | RegexOptions.Singleline); Match chapterTitleMatch = chapterTitleRegex.Match(payload.WebResponse); if (!chapterTitleMatch.Success) { throw new WritingClientHtmlParseException($"Couldn't find the chapter title for chapter '{payload.Address}'", payload.Address, payload.WebResponse); } return(HttpUtility.HtmlDecode(chapterTitleMatch.Value)); }
public WdcInteractiveStory GetInteractiveStory(string interactiveID, WdcResponse wdcPayload) { log.DebugFormat("Getting interactive story: {0}", interactiveID); var story = new WdcInteractiveStory(); // Get interactive story title story.ID = interactiveID; story.Url = wdcPayload.Address; story.Name = GetInteractiveStoryTitle(wdcPayload); story.ShortDescription = GetInteractiveStoryShortDescription(wdcPayload); story.Description = GetInteractiveStoryDescription(wdcPayload); story.LastUpdatedInfo = DateTime.Now; return(story); }
public async Task WdcReaderInteractiveChapterFirstPageLoggedOut() { // Expected results var expectedChapter = new WdcInteractiveChapter(); expectedChapter.Path = "1"; expectedChapter.Title = "The Great War"; expectedChapter.SourceChoiceTitle = string.Empty; expectedChapter.Content = TestUtil.GetDataFile("expected_set_13_06_2019.WdcReaderInteractiveChapter1_Content.txt"); expectedChapter.IsEnd = false; expectedChapter.Author = new WdcAuthor() { Name = "The Nameless Hermit", Username = "******", }; expectedChapter.Choices.Add(new WdcInteractiveChapterChoice() { PathLink = "11", Name = "Be Jace" }); expectedChapter.Choices.Add(new WdcInteractiveChapterChoice() { PathLink = "12", Name = "Be Rhea" }); expectedChapter.Choices.Add(new WdcInteractiveChapterChoice() { PathLink = "13", Name = "Be Marek" }); expectedChapter.Choices.Add(new WdcInteractiveChapterChoice() { PathLink = "14", Name = "Be Tara" }); // Set things up WdcResponse payload = new WdcResponse(); payload.WebResponse = TestUtil.GetDataFile("sample_set_13_06_2019.Looking for adventure - chapter 1 - logged out.html"); payload.Address = "https://www.writing.com/main/interact/item_id/209084-Looking-for-adventure/map/1"; WdcInteractiveChapter testChapter = _reader.GetInteractiveChaper("TEST", expectedChapter.Path, payload); // Compare CompareInteractiveChapters(expectedChapter, testChapter); }
public async Task <WdcResponse> GetInteractiveOutline(string interactiveID, CancellationToken ct) { var r = new WdcResponse(); r.Address = interactiveID; r.WebResponse = DoInteractivesUnavailable(); if (string.IsNullOrEmpty(r.WebResponse)) { if (_mode == DummyWdcClientMode.LoggedIn || _mode == DummyWdcClientMode.LoggedInPaid) { r.WebResponse = GetPage("Looking for adventure - outline - logged in.html"); } else { r.WebResponse = GetPage("Looking for adventure - outline - logged out.html"); } } return(r); }
// Get chapter title // Method 3. Get it from within the page title. // So it looks like paid users get some sort of dynamic reading pages // where it AJAX loads chapter pages instead of loading static pages. // This should solve this by getting it from the page title. private string GetInteractiveChapterTitleM3(WdcResponse payload) { string pageTitlePattern = @"(?<=<title>).*?(?=<\/title>)"; Regex pageTitleRegex = new Regex(pageTitlePattern, RegexOptions.IgnoreCase | RegexOptions.Singleline); Match pageTitleMatch = pageTitleRegex.Match(payload.WebResponse); if (!pageTitleMatch.Success) { throw new WritingClientHtmlParseException($"Couldn't find the page title on page '{payload.Address}'", payload.Address, payload.WebResponse); } // Got the page title value, try to parse it var titleResponse = ReadPageTitle(pageTitleMatch.Value); if (string.IsNullOrEmpty(titleResponse.PageName)) { throw new WritingClientHtmlParseException($"Couldn't find the chapter title in the page title for chapter '{payload.Address}'", payload.Address, payload.WebResponse); } return(titleResponse.PageName); }
// TODO get author, looks like it'll be a pain in the ass telling the chapter author apart from the story author public WdcAuthor GetInteractiveStoryAuthor(WdcResponse wdcPayload) { throw new NotImplementedException(); }
public async Task <WdcInteractiveChapter> GetInteractiveChaper(string interactiveID, string chapterPath, IWdcClient wdcClient, CancellationToken ct) { WdcResponse payload = await wdcClient.GetInteractiveChapter(interactiveID, chapterPath, ct); return(GetInteractiveChaper(interactiveID, chapterPath, payload)); }
public WdcInteractiveChapter GetInteractiveChaper(string interactiveID, string chapterPath, WdcResponse payload) { if (!WdcUtil.IsValidChapterPath(chapterPath)) { throw new ArgumentException($"Chapter '{chapterPath}' is not a valid chapter path", nameof(chapterPath)); } var chapter = new WdcInteractiveChapter(); chapter.Path = chapterPath; chapter.Title = GetInteractiveChapterTitle(payload); chapter.Content = GetInteractiveChapterContent(payload); if (chapterPath != "1") { chapter.SourceChoiceTitle = GetInteractiveChapterSourceChoice(payload); // Only get the source choice if it's not the first chapter } else { chapter.SourceChoiceTitle = ""; } chapter.LastUpdated = DateTime.Now; // TODO chapter author chapter.Author = GetInteractiveChapterAuthor(payload); var choices = GetInteractiveChapterChoices(payload); if (choices == null) { chapter.IsEnd = true; } else { chapter.Choices.AddRange(choices); } return(chapter); }
// Get the chapter's title public string GetInteractiveChapterTitle(WdcResponse payload) => GetInteractiveChapterTitleM3(payload);
// Get the chapter's content, it's body public string GetInteractiveChapterContent(WdcResponse payload) => GetInteractiveChapterContentM2(payload);
// Get the available choices // This one is going to be complicated, because none of the divs or whatnot have ID's // First, get a chunk of the HTML that contains the choices, we'll break them down later public IEnumerable <WdcInteractiveChapterChoice> GetInteractiveChapterChoices(WdcResponse payload) { if (IsInteractiveChapterEnd(payload)) { return(null); } var choices = new List <WdcInteractiveChapterChoice>(); Regex chapterChoicesChunkRegex = new Regex("(?<=<b>You have the following choice(s)?:<\\/b>).*?(?=<\\/div><div id=\"end_of_choices\")", RegexOptions.Singleline | RegexOptions.IgnoreCase); Match chapterChoicesChunkMatch = chapterChoicesChunkRegex.Match(payload.WebResponse); if (!chapterChoicesChunkMatch.Success) { throw new WritingClientHtmlParseException($"Couldn't find the HTML chunk containing choices for interactive chapter '{payload.Address}'", payload.Address, payload.WebResponse); } string chapterChoicesChunkHtml = chapterChoicesChunkMatch.Value; // Then try to get the individual choices Regex chapterChoicesRegex = new Regex("<a .*?href=\".+?\">.+?<\\/a>", RegexOptions.IgnoreCase); MatchCollection chapterChoicesMatches = chapterChoicesRegex.Matches(chapterChoicesChunkHtml); foreach (Match match in chapterChoicesMatches) { var newChoice = new WdcInteractiveChapterChoice(); string choiceUrl; // Get the URL Regex choiceUrlRegex = new Regex("(?<=href=\").+?(?=\")"); Match choiceUrlMatch = choiceUrlRegex.Match(match.Value); if (!choiceUrlMatch.Success) { throw new WritingClientHtmlParseException($"Could not find the URL of choice '{match.Value}' on interactive chapter '{payload.Address}'", payload.Address, payload.WebResponse); } choiceUrl = choiceUrlMatch.Value; // Get just the numbers from the URL newChoice.PathLink = WdcUtil.GetFinalParmFromUrl(choiceUrl); // Get the choice name / description // Get what's in between the > and the < int indexOfGt = match.Value.IndexOf('>'); int indexofLt = match.Value.LastIndexOf('<') - 1; newChoice.Name = HttpUtility.HtmlDecode(match.Value.Substring(indexOfGt + 1, indexofLt - indexOfGt)); choices.Add(newChoice); } return(choices.ToArray()); }
public IEnumerable <Uri> GetInteractiveChapterList(string interactiveID, Uri pathToRoot, WdcResponse wdcPayload) { var chapters = new List <Uri>(); // Find the links to the interactive's pages // Create the regex that will find chapter links // E.g. https:\/\/www\.writing\.com\/main\/interact\/item_id\/1824771-short-stories-by-the-people\/map\/(\d)+ string chapterLinkRegexPattern = pathToRoot.ToString() + string.Format("main/interact/item_id/{0}/map/{1}", interactiveID, @"(\d)+"); chapterLinkRegexPattern = WdcUtil.RegexSafeUrl(chapterLinkRegexPattern); Regex chapterLinkRegex = new Regex(chapterLinkRegexPattern, RegexOptions.IgnoreCase); MatchCollection matches = chapterLinkRegex.Matches(wdcPayload.WebResponse); foreach (Match match in matches) { chapters.Add(new Uri(match.Value)); } return(chapters.ToArray()); }