public string RestorePreserved(string html) { StringBuilder sb = new StringBuilder(); HtmlExtractor ex = new HtmlExtractor(html); int pos = 0; while (ex.Seek("<span class='" + PRESERVE_CLASS + "'>").Success) { sb.Append(html, pos, ex.Element.Offset - pos); pos = ex.Element.Offset; BeginTag bt = (BeginTag)ex.Element; string elementId = bt.GetAttributeValue("id"); Match m = Regex.Match(elementId ?? "", @"^preserve([a-zA-Z0-9]+)$"); if (m.Success) { string preserveId = m.Groups[1].Value; string preservedValue; if (preserved.TryGetValue(preserveId, out preservedValue)) { sb.Append(preservedValue); ex.CollectTextUntil("span"); if (ex.Element == null) pos = html.Length; else pos = ex.Parser.Position; } } } sb.Append(html, pos, html.Length - pos); return sb.ToString(); }
private void ParsePostContent(XmlNode xmlNode, BlogPost blogPost) { // get raw content (decode base64 if necessary) string content; XmlNode base64Node = xmlNode.SelectSingleNode("base64"); if (base64Node != null) { byte[] contentBytes = Convert.FromBase64String(base64Node.InnerText); content = _utf8EncodingNoBOM.GetString(contentBytes); } else // no base64 encoding, just read text { content = xmlNode.InnerText; } // parse out the title and contents of the post HtmlExtractor ex = new HtmlExtractor(content); if (ex.Seek("<title>").Success) { SetPostTitleFromXmlValue(blogPost, ex.CollectTextUntil("title")); content = content.Substring(ex.Parser.Position).TrimStart('\r', '\n'); } if (content.Trim() != string.Empty) { HtmlExtractor ex2 = new HtmlExtractor(content); if (Options.SupportsExtendedEntries && ex2.Seek("<lj-cut>").Success) blogPost.SetContents(content.Substring(0, ex2.Element.Offset), content.Substring(ex2.Element.Offset + ex2.Element.Length)); else blogPost.Contents = content; } }
private bool AttemptGenericAtomLinkDetection(string url, string html, bool preferredOnly) { const string GENERIC_ATOM_PROVIDER_ID = "D48F1B5A-06E6-4f0f-BD76-74F34F520792"; if (html == null) return false; HtmlExtractor ex = new HtmlExtractor(html); if (ex .SeekWithin("<head>", "<body>") .SeekWithin("<link href rel='service' type='application/atomsvc+xml'>", "</head>") .Success) { IBlogProvider atomProvider = BlogProviderManager.FindProvider(GENERIC_ATOM_PROVIDER_ID); BeginTag bt = ex.Element as BeginTag; if (preferredOnly) { string classes = bt.GetAttributeValue("class"); if (classes == null) return false; if (!Regex.IsMatch(classes, @"\bpreferred\b")) return false; } string linkUrl = bt.GetAttributeValue("href"); Debug.WriteLine("Atom service link detected in the blog homepage"); _providerId = atomProvider.Id; _serviceName = atomProvider.Name; _clientType = atomProvider.ClientType; _blogName = string.Empty; _postApiUrl = linkUrl; IBlogClient client = BlogClientManager.CreateClient(atomProvider.ClientType, _postApiUrl, _credentials); client.VerifyCredentials(); _usersBlogs = client.GetUsersBlogs(); if (_usersBlogs.Length == 1) { _hostBlogId = _usersBlogs[0].Id; _blogName = _usersBlogs[0].Name; /* if (_usersBlogs[0].HomepageUrl != null && _usersBlogs[0].HomepageUrl.Length > 0) _homepageUrl = _usersBlogs[0].HomepageUrl; */ } // attempt to read the blog name from the homepage title if (_blogName == null || _blogName.Length == 0) { HtmlExtractor ex2 = new HtmlExtractor(html); if (ex2.Seek("<title>").Success) { _blogName = ex2.CollectTextUntil("title"); } } return true; } return false; }