// Get the interactive story's description public string GetInteractiveStoryDescription(WdcResponse wdcPayload) { Regex interactiveDescRegex = new Regex("(?<=<td align=left class=\"norm\">).+?(?=<\\/td>)", RegexOptions.IgnoreCase | RegexOptions.Singleline); Match interactiveDescMatch = interactiveDescRegex.Match(wdcPayload.WebResponse); if (!interactiveDescMatch.Success) { throw new WritingClientHtmlParseException($"Couldn't find the description for interactive story '{wdcPayload.Address}'", wdcPayload.Address, wdcPayload.WebResponse); } return(HttpUtility.HtmlDecode(WdcUtil.CleanHtmlSymbols(interactiveDescMatch.Value))); }
// Get the interactive story's title // This method grabs it from within the <title> element, not sure if it gets truncated or not. public string GetInteractiveStoryTitle(WdcResponse wdcPayload) { Regex interactiveTitleRegex = new Regex("(?<=<title>).+?(?= - Writing\\.Com<\\/title>)", RegexOptions.IgnoreCase); Match interactiveTitleMatch = interactiveTitleRegex.Match(wdcPayload.WebResponse); if (!interactiveTitleMatch.Success) { throw new WritingClientHtmlParseException($"Couldn't find the title for interactive story '{wdcPayload.Address}'", wdcPayload.Address, wdcPayload.WebResponse); } return(HttpUtility.HtmlDecode(WdcUtil.CleanHtmlSymbols(interactiveTitleMatch.Value))); }
// Get the interactive's tagline or short description // Previously this has been difficult to pin-point // However I found this on 11/01/2019, they've got it in a META tag at the top of the HTML // E.g. <META NAME="description" content="How will young James fare alone with his mature, womanly neighbors? "> public string GetInteractiveStoryShortDescription(WdcResponse wdcPayload) { Regex interactiveShortDescRegex = new Regex("(?<=<META NAME=\"description\" content=\").+?(?=\">)", RegexOptions.IgnoreCase); Match interactiveShortDescMatch = interactiveShortDescRegex.Match(wdcPayload.WebResponse); if (!interactiveShortDescMatch.Success) { log.Warn($"Couldn't find the short description for interactive story '{wdcPayload.Address}'"); // Just a warning, don't throw an exception over it } return(HttpUtility.HtmlDecode(WdcUtil.CleanHtmlSymbols(interactiveShortDescMatch.Value))); }
public WdcTitleReaderResult ReadPageTitle(string pageTitle) { var r = new WdcTitleReaderResult(); if (string.IsNullOrEmpty(pageTitle)) { return(r); } // Start by trimming off the " - Writing.com" Regex titleTailPattern = new Regex(" - writing\\.com", RegexOptions.IgnoreCase); pageTitle = titleTailPattern.Replace(pageTitle, ""); // Look for the ": ", and split it var indexOfSeparator = pageTitle.IndexOf(TITLE_SEPARATOR); if (indexOfSeparator < 0) { // Didn't find separator, is just a simple page name r.StoryName = WdcUtil.CleanHtmlSymbols(pageTitle.Trim()); } else { // Found separator, there are 2 parts var pageTitleSplit = pageTitle.Split(TITLE_SEPARATOR); // THe recent chapters page uses "Recent chapters: (story title)" bool backwards = pageTitleSplit[0] == "Recent Chapters"; r.StoryName = WdcUtil.CleanHtmlSymbols( pageTitleSplit[backwards ? 1 : 0].Trim() ); r.PageName = WdcUtil.CleanHtmlSymbols( pageTitleSplit[backwards ? 0 : 1].Trim() ); } return(r); }