public string RestorePreserved(string html)
 {
     StringBuilder sb = new StringBuilder();
     HtmlExtractor ex = new HtmlExtractor(html);
     int pos = 0;
     while (ex.Seek("<span class='" + PRESERVE_CLASS + "'>").Success)
     {
         sb.Append(html, pos, ex.Element.Offset - pos);
         pos = ex.Element.Offset;
         BeginTag bt = (BeginTag)ex.Element;
         string elementId = bt.GetAttributeValue("id");
         Match m = Regex.Match(elementId ?? "", @"^preserve([a-zA-Z0-9]+)$");
         if (m.Success)
         {
             string preserveId = m.Groups[1].Value;
             string preservedValue;
             if (preserved.TryGetValue(preserveId, out preservedValue))
             {
                 sb.Append(preservedValue);
                 ex.CollectTextUntil("span");
                 if (ex.Element == null)
                     pos = html.Length;
                 else
                     pos = ex.Parser.Position;
             }
         }
     }
     sb.Append(html, pos, html.Length - pos);
     return sb.ToString();
 }
        private void ParsePostContent(XmlNode xmlNode, BlogPost blogPost)
        {
            // get raw content (decode base64 if necessary)
            string content;
            XmlNode base64Node = xmlNode.SelectSingleNode("base64");
            if (base64Node != null)
            {
                byte[] contentBytes = Convert.FromBase64String(base64Node.InnerText);
                content = _utf8EncodingNoBOM.GetString(contentBytes);
            }
            else // no base64 encoding, just read text
            {
                content = xmlNode.InnerText;
            }

            // parse out the title and contents of the post
            HtmlExtractor ex = new HtmlExtractor(content);
            if (ex.Seek("<title>").Success)
            {
                SetPostTitleFromXmlValue(blogPost, ex.CollectTextUntil("title"));
                content = content.Substring(ex.Parser.Position).TrimStart('\r', '\n');

            }

            if (content.Trim() != string.Empty)
            {
                HtmlExtractor ex2 = new HtmlExtractor(content);
                if (Options.SupportsExtendedEntries && ex2.Seek("<lj-cut>").Success)
                    blogPost.SetContents(content.Substring(0, ex2.Element.Offset), content.Substring(ex2.Element.Offset + ex2.Element.Length));
                else
                    blogPost.Contents = content;
            }

        }
        private bool AttemptGenericAtomLinkDetection(string url, string html, bool preferredOnly)
        {
            const string GENERIC_ATOM_PROVIDER_ID = "D48F1B5A-06E6-4f0f-BD76-74F34F520792";

            if (html == null)
                return false;

            HtmlExtractor ex = new HtmlExtractor(html);
            if (ex
                .SeekWithin("<head>", "<body>")
                .SeekWithin("<link href rel='service' type='application/atomsvc+xml'>", "</head>")
                .Success)
            {
                IBlogProvider atomProvider = BlogProviderManager.FindProvider(GENERIC_ATOM_PROVIDER_ID);

                BeginTag bt = ex.Element as BeginTag;

                if (preferredOnly)
                {
                    string classes = bt.GetAttributeValue("class");
                    if (classes == null)
                        return false;
                    if (!Regex.IsMatch(classes, @"\bpreferred\b"))
                        return false;
                }

                string linkUrl = bt.GetAttributeValue("href");

                Debug.WriteLine("Atom service link detected in the blog homepage");

                _providerId = atomProvider.Id;
                _serviceName = atomProvider.Name;
                _clientType = atomProvider.ClientType;
                _blogName = string.Empty;
                _postApiUrl = linkUrl;

                IBlogClient client = BlogClientManager.CreateClient(atomProvider.ClientType, _postApiUrl, _credentials);
                client.VerifyCredentials();
                _usersBlogs = client.GetUsersBlogs();
                if (_usersBlogs.Length == 1)
                {
                    _hostBlogId = _usersBlogs[0].Id;
                    _blogName = _usersBlogs[0].Name;
                    /*
                                        if (_usersBlogs[0].HomepageUrl != null && _usersBlogs[0].HomepageUrl.Length > 0)
                                            _homepageUrl = _usersBlogs[0].HomepageUrl;
                    */
                }

                // attempt to read the blog name from the homepage title
                if (_blogName == null || _blogName.Length == 0)
                {
                    HtmlExtractor ex2 = new HtmlExtractor(html);
                    if (ex2.Seek("<title>").Success)
                    {
                        _blogName = ex2.CollectTextUntil("title");
                    }
                }

                return true;
            }
            return false;
        }