public bool IsMatch(Element e)
            {
                BeginTag tag = e as BeginTag;

                if (tag == null)
                {
                    return(false);
                }

                if (!tag.NameEquals("meta"))
                {
                    return(false);
                }

                if (tag.GetAttributeValue("name") != "generator")
                {
                    return(false);
                }

                string generator = tag.GetAttributeValue("content");

                if (generator == null || CaseInsensitiveComparer.DefaultInvariant.Compare("blogger", generator) != 0)
                {
                    return(false);
                }

                return(true);
            }
        private void HandleInput(HtmlForm parentForm, BeginTag inputTag)
        {
            string type = inputTag.GetAttributeValue("type");

            if (type != null)
            {
                type = type.Trim().ToLowerInvariant();
            }

            string name  = inputTag.GetAttributeValue("name");
            string value = inputTag.GetAttributeValue("value");

            switch (type)
            {
            case "password":
                new Textbox(parentForm, name, value);
                break;

            case "checkbox":
            {
                int  dummy;
                bool isChecked = inputTag.GetAttribute("checked", true, 0, out dummy) != null;
                new Checkbox(parentForm, name, value, isChecked);
                break;
            }

            case "radio":
            {
                int  dummy;
                bool isChecked = inputTag.GetAttribute("checked", true, 0, out dummy) != null;
                new Radio(parentForm, name, value, isChecked);
                break;
            }

            case "submit":
                new SubmitButton(parentForm, name, value);
                break;

            case "image":
                new ImageButton(parentForm, name, value, inputTag.GetAttributeValue("src"));
                break;

            case "hidden":
                new Hidden(parentForm, name, value);
                break;

            case "text":
            default:
                new Textbox(parentForm, name, value);
                break;
            }
        }
        public bool IsBlogger()
        {
            if (Regex.IsMatch(homepageUrl, @"^http://.+\.blogspot\.com($|/)", RegexOptions.IgnoreCase) ||
                Regex.IsMatch(homepageUrl, @"^http(s)?://(www\.)?blogger\.com($|/)", RegexOptions.IgnoreCase) ||
                new HtmlExtractor(html).Seek(new BloggerGeneratorCriterion()).Success)
            {
                return(true);
            }

            HtmlExtractor ex = new HtmlExtractor(html);

            while (ex.Seek("<link href rel='service.post' type='application/atom+xml'>").Success)
            {
                BeginTag bt       = (BeginTag)ex.Element;
                string   atomHref = bt.GetAttributeValue("href");

                // these obsolete Blogger atom links can't be used, but are
                // still a good indication that it's Blogger
                if (atomHref.StartsWith("https://www.blogger.com/atom/", StringComparison.OrdinalIgnoreCase))
                {
                    return(true);
                }

                // any other blogger or blogspot atom link will be considered a match
                if (Regex.IsMatch(atomHref, @"^https?\:\/\/.+\.blog(ger|spot)\.com\/.*", RegexOptions.IgnoreCase))
                {
                    return(true);
                }
            }

            return(false);
        }
Exemple #4
0
        Size FindSizeAttribute(string input)
        {
            Size size = new Size(_width, _height);

            if (string.IsNullOrEmpty(input))
            {
                return(size);
            }

            try
            {
                RequiredAttribute[] attrWidth = new RequiredAttribute[] { new RequiredAttribute("width"), new RequiredAttribute("height") };
                IElementPredicate   predicate = new OrPredicate(new BeginTagPredicate("embed", attrWidth), new BeginTagPredicate("object", attrWidth));
                HtmlExtractor       ex        = new HtmlExtractor(input);
                if (ex.Seek(predicate).Success)
                {
                    BeginTag tag = (BeginTag)ex.Element;
                    size = new Size(Convert.ToInt32(tag.GetAttributeValue("width"), CultureInfo.InvariantCulture), Convert.ToInt32(tag.GetAttributeValue("height"), CultureInfo.InvariantCulture));
                }
            }
            catch (Exception ex)
            {
                Trace.Fail("Exception thrown while trying to find video size: " + ex);
            }

            return(size);
        }
        private void HandleTextarea(HtmlForm parentForm, BeginTag textareaTag)
        {
            string name  = textareaTag.GetAttributeValue("name");
            string value = parser.CollectTextUntil("textarea");

            new Textarea(parentForm, name, value);
        }
        public string RestorePreserved(string html)
        {
            StringBuilder sb  = new StringBuilder();
            HtmlExtractor ex  = new HtmlExtractor(html);
            int           pos = 0;

            while (ex.Seek("<span class='" + PRESERVE_CLASS + "'>").Success)
            {
                sb.Append(html, pos, ex.Element.Offset - pos);
                pos = ex.Element.Offset;
                BeginTag bt        = (BeginTag)ex.Element;
                string   elementId = bt.GetAttributeValue("id");
                Match    m         = Regex.Match(elementId ?? "", @"^preserve([a-zA-Z0-9]+)$");
                if (m.Success)
                {
                    string preserveId = m.Groups[1].Value;
                    string preservedValue;
                    if (preserved.TryGetValue(preserveId, out preservedValue))
                    {
                        sb.Append(preservedValue);
                        ex.CollectTextUntil("span");
                        if (ex.Element == null)
                        {
                            pos = html.Length;
                        }
                        else
                        {
                            pos = ex.Parser.Position;
                        }
                    }
                }
            }
            sb.Append(html, pos, html.Length - pos);
            return(sb.ToString());
        }
Exemple #7
0
        public static ImageViewer DetectImageViewer(string html, string sourceUrl)
        {
            List <ImageViewer>         viewers = imageViewers;
            LazyLoader <List <Regex> > regexes = new LazyLoader <List <Regex> >(delegate
            {
                List <Regex> regexList = new List <Regex>(viewers.Count);
                foreach (ImageViewer v in viewers)
                {
                    regexList.Add(new Regex(v.Pattern, RegexOptions.CultureInvariant));
                }
                return(regexList);
            });

            HtmlExtractor ex = new HtmlExtractor(html);

            while (ex.Seek("<script src>").Success)
            {
                BeginTag tag = (BeginTag)ex.Element;
                string   src = tag.GetAttributeValue("src");

                if (String.IsNullOrEmpty(src))
                {
                    continue;
                }

                try
                {
                    if (!UrlHelper.IsUrl(src))
                    {
                        // We need absolute URLs.
                        src = UrlHelper.EscapeRelativeURL(sourceUrl, src);
                    }

                    Uri srcUri = new Uri(src);
                    if (srcUri.IsAbsoluteUri)
                    {
                        // WinLive 248276: We want just the path portion since there could be an additional query or
                        // fragment on the URL that our regexs can't handle.
                        src = srcUri.GetLeftPart(UriPartial.Path);
                    }
                }
                catch (UriFormatException)
                {
                    // We'll just use the regex on the raw attribute value.
                }

                List <Regex> regexList = regexes.Value;
                for (int i = 0; i < regexList.Count; i++)
                {
                    if (regexList[i].IsMatch(src))
                    {
                        return(viewers[i]);
                    }
                }
            }
            return(null);
        }
        private HtmlForm HandleForm(BeginTag formTag)
        {
            string name   = formTag.GetAttributeValue("name");
            string action = formTag.GetAttributeValue("action");
            string method = formTag.GetAttributeValue("method");

            HtmlForm htmlForm = new HtmlForm(name, action, method);

            Element el;

            while (null != (el = parser.Next()))
            {
                if (el is EndTag && ((EndTag)el).NameEquals("form"))
                {
                    break;
                }

                BeginTag tag = el as BeginTag;
                if (tag == null)
                {
                    continue;
                }

                switch (tag.Name.ToLowerInvariant())
                {
                case "input":
                    HandleInput(htmlForm, tag);
                    break;

                case "select":
                    HandleSelect(htmlForm, tag);
                    break;

                case "textarea":
                    HandleTextarea(htmlForm, tag);
                    break;
                }
            }

            return(htmlForm);
        }
        public string ScanAndPreserve(string html)
        {
            StringBuilder    sb = new StringBuilder(html.Length);
            SimpleHtmlParser p  = new SimpleHtmlParser(html);
            Element          e;

            while (null != (e = p.Next()))
            {
                if (!(e is BeginTag))
                {
                    sb.Append(html, e.Offset, e.Length);
                    continue;
                }

                BeginTag bt = (BeginTag)e;

                if (bt.NameEquals("div"))
                {
                    switch (bt.GetAttributeValue("class"))
                    {
                    case ContentSourceManager.EDITABLE_SMART_CONTENT:
                    case ContentSourceManager.SMART_CONTENT:
                        sb.Append(html, e.Offset, e.Length);
                        sb.Append(p.CollectHtmlUntil("div"));
                        sb.Append("</div>");
                        continue;
                    }
                }

                if (!(bt.NameEquals("object") ||
                      bt.NameEquals("embed") ||
                      bt.NameEquals("noembed") ||
                      bt.NameEquals("script")))
                {
                    sb.Append(html, e.Offset, e.Length);
                    continue;
                }
                else
                {
                    string collected = p.CollectHtmlUntil(bt.Name);
                    string preserve  = bt.RawText + collected + "</" + bt.Name + ">";

                    string preserveId = Guid.NewGuid().ToString("N");
                    preserved[preserveId] = preserve;

                    sb.AppendFormat("<span id=\"preserve{0}\" class=\"{1}\">", preserveId, PRESERVE_CLASS);
                    sb.Append(preserve);
                    sb.Append("</span>");
                }
            }
            return(sb.ToString());
        }
Exemple #10
0
        /// <summary>
        /// Is the tag a meaningless tag such as <p></p> or <a href="..."></a> or <a href="...">&nbsp;</a>
        /// </summary>
        /// <param name="htmlParser"></param>
        /// <param name="bt"></param>
        /// <returns></returns>
        private static bool RemoveMeaninglessTags(SimpleHtmlParser htmlParser, BeginTag bt)
        {
            // Look to see if the tag is a <p> without any attributes
            if ((bt.NameEquals("p") && bt.Attributes.Length == 0 && !bt.HasResidue))
            {
                Element e = htmlParser.Peek(0);

                // Look to see if thereis a matching end tag to the element we are looking at
                if (e != null && e is EndTag && ((EndTag)e).NameEquals("p"))
                {
                    // eat up the end tag
                    htmlParser.Next();
                    return(true);
                }
            }

            // Look to see if the tag is an <a> without a style/id/name attribute, but has an href... meaning the link is not useful
            if ((bt.NameEquals("a") && bt.GetAttribute("name") == null && bt.GetAttributeValue("style") == null && bt.GetAttributeValue("id") == null && bt.GetAttributeValue("href") != null))
            {
                bool    hadWhiteSpaceText = false;
                Element e = htmlParser.Peek(0);

                // Look to see if the a just has whitespace inside of it
                if (e is Text && HtmlUtils.UnEscapeEntities(e.RawText, HtmlUtils.UnEscapeMode.NonMarkupText).Trim().Length == 0)
                {
                    e = htmlParser.Peek(1);
                    hadWhiteSpaceText = true;
                }

                // Look to see if thereis a matching end tag to the element we are looking at
                if (e != null && e is EndTag && ((EndTag)e).NameEquals("a"))
                {
                    // if this was an <a> with whitespace in the middle eat it up
                    if (hadWhiteSpaceText)
                    {
                        htmlParser.Next();
                    }
                    // eat up the end tag
                    htmlParser.Next();

                    return(true);
                }
            }

            return(false);
        }
Exemple #11
0
 private string IdFromEmbed(string input)
 {
     foreach (EmbedPattern check in _embedPatterns)
     {
         IElementPredicate predicate = new BeginTagPredicate("embed", new RequiredAttribute[] { new RequiredAttribute(check.Attr) });
         HtmlExtractor     ex        = new HtmlExtractor(input);
         ex = ex.Seek(predicate);
         if (ex.Success)
         {
             BeginTag bt     = ex.Element as BeginTag;
             string   srcRef = bt.GetAttributeValue(check.Attr);
             Match    m      = Regex.Match(srcRef, check.Pattern, RegexOptions.IgnoreCase);
             if (m.Success && m.Groups["id"].Success)
             {
                 return(m.Groups["id"].Value);
             }
         }
     }
     return(String.Empty);
 }
        private void HandleSelect(HtmlForm parentForm, BeginTag selectTag)
        {
            string name = selectTag.GetAttributeValue("name");
            int    dummy;
            bool   multiple = selectTag.GetAttribute("multiple", true, 0, out dummy) != null;

            ArrayList optionInfos = new ArrayList();

            Element el = parser.Next();

            while (el != null)
            {
                BeginTag tag = el as BeginTag;
                if (tag != null && tag.NameEquals("option"))
                {
                    string value      = tag.GetAttributeValue("value");
                    bool   isSelected = tag.GetAttribute("selected", true, 0, out dummy) != null;

                    string label = string.Empty;
                    el = parser.Next();
                    if (el != null && el is Text)
                    {
                        label = HtmlUtils.UnEscapeEntities(el.ToString(), HtmlUtils.UnEscapeMode.NonMarkupText).TrimEnd(' ', '\r', '\n', '\t');
                        el    = parser.Next();
                    }
                    optionInfos.Add(new OptionInfo(value, label, isSelected));
                    continue;
                }

                if (el is EndTag && ((EndTag)el).NameEquals("select"))
                {
                    new Select(parentForm, name, multiple, (OptionInfo[])optionInfos.ToArray(typeof(OptionInfo)));
                    return;
                }

                el = parser.Next();
            }
        }
Exemple #13
0
 public bool MatchesEmbed(string input)
 {
     foreach (EmbedPattern check in _embedPatterns)
     {
         IElementPredicate predicate = new BeginTagPredicate("embed", new RequiredAttribute[] { new RequiredAttribute(check.Attr) });
         HtmlExtractor     ex        = new HtmlExtractor(input);
         ex = ex.Seek(predicate);
         if (ex.Success)
         {
             BeginTag bt     = ex.Element as BeginTag;
             string   srcRef = bt.GetAttributeValue(check.Attr);
             if (!Regex.IsMatch(srcRef, check.Pattern, RegexOptions.IgnoreCase))
             {
                 return(false);
             }
         }
         else
         {
             return(false); //didn't find embed tag with the attr
         }
     }
     return(true); //found all predicates
 }
Exemple #14
0
        /// <summary>
        /// Any setting that is derived from the homepage html needs to be in this function.  This function is turned
        /// on and off when detecting blog settings through the IncludeHomePageSettings.  None of these checks will be run
        /// if the internet is not active.  As each check is made, it does not need to be applied back the _content until the end
        /// at which time it will write the settings back to the registry.
        /// </summary>
        private void DetectHomePageSettings()
        {
            if (_homepageAccessor.HtmlDocument == null)
            {
                return;
            }

            IDictionary homepageSettings = new Hashtable();

            Debug.Assert(!UseManifestCache, "This code will not run correctly under the manifest cache, due to option overrides not being set");

            LightWeightHTMLMetaData metaData = new LightWeightHTMLMetaData(_homepageAccessor.HtmlDocument);

            if (metaData.Charset != null)
            {
                try
                {
                    homepageSettings.Add(BlogClientOptions.CHARACTER_SET, metaData.Charset);
                }
                catch (NotSupportedException)
                {
                    //not an actual encoding
                }
            }

            string docType = new LightWeightHTMLMetaData(_homepageAccessor.HtmlDocument).DocType;

            if (docType != null)
            {
                bool xhtml = docType.IndexOf("xhtml", StringComparison.OrdinalIgnoreCase) >= 0;
                if (xhtml)
                {
                    homepageSettings.Add(BlogClientOptions.REQUIRES_XHTML, true.ToString(CultureInfo.InvariantCulture));
                }
            }

            //checking whether blog is rtl
            HtmlExtractor extractor = new HtmlExtractor(_homepageAccessor.HtmlDocument.RawHtml);

            if (extractor.Seek(new OrPredicate(
                                   new SmartPredicate("<html dir>"),
                                   new SmartPredicate("<body dir>"))).Success)
            {
                BeginTag tag = (BeginTag)extractor.Element;
                string   dir = tag.GetAttributeValue("dir");
                if (String.Compare(dir, "rtl", StringComparison.OrdinalIgnoreCase) == 0)
                {
                    homepageSettings.Add(BlogClientOptions.TEMPLATE_IS_RTL, true.ToString(CultureInfo.InvariantCulture));
                }
            }

            if (_homepageAccessor.HtmlDocument != null)
            {
                string      html   = _homepageAccessor.OriginalHtml;
                ImageViewer viewer = DhtmlImageViewers.DetectImageViewer(html, _context.HomepageUrl);
                if (viewer != null)
                {
                    homepageSettings.Add(BlogClientOptions.DHTML_IMAGE_VIEWER, viewer.Name);
                }
            }

            _context.HomePageOverrides = homepageSettings;
        }
Exemple #15
0
        private bool AttemptGenericAtomLinkDetection(string url, string html, bool preferredOnly)
        {
            const string GENERIC_ATOM_PROVIDER_ID = "D48F1B5A-06E6-4f0f-BD76-74F34F520792";

            if (html == null)
            {
                return(false);
            }

            HtmlExtractor ex = new HtmlExtractor(html);

            if (ex
                .SeekWithin("<head>", "<body>")
                .SeekWithin("<link href rel='service' type='application/atomsvc+xml'>", "</head>")
                .Success)
            {
                IBlogProvider atomProvider = BlogProviderManager.FindProvider(GENERIC_ATOM_PROVIDER_ID);

                BeginTag bt = ex.Element as BeginTag;

                if (preferredOnly)
                {
                    string classes = bt.GetAttributeValue("class");
                    if (classes == null)
                    {
                        return(false);
                    }
                    if (!Regex.IsMatch(classes, @"\bpreferred\b"))
                    {
                        return(false);
                    }
                }

                string linkUrl = bt.GetAttributeValue("href");

                Debug.WriteLine("Atom service link detected in the blog homepage");

                _providerId  = atomProvider.Id;
                _serviceName = atomProvider.Name;
                _clientType  = atomProvider.ClientType;
                _blogName    = string.Empty;
                _postApiUrl  = GetAbsoluteUrl(url, linkUrl);

                IBlogClient client = BlogClientManager.CreateClient(atomProvider.ClientType, _postApiUrl, _credentials);
                client.VerifyCredentials();
                _usersBlogs = client.GetUsersBlogs();
                if (_usersBlogs.Length == 1)
                {
                    _hostBlogId = _usersBlogs[0].Id;
                    _blogName   = _usersBlogs[0].Name;

                    /*
                     *                  if (_usersBlogs[0].HomepageUrl != null && _usersBlogs[0].HomepageUrl.Length > 0)
                     *                      _homepageUrl = _usersBlogs[0].HomepageUrl;
                     */
                }

                // attempt to read the blog name from the homepage title
                if (_blogName == null || _blogName.Length == 0)
                {
                    HtmlExtractor ex2 = new HtmlExtractor(html);
                    if (ex2.Seek("<title>").Success)
                    {
                        _blogName = ex2.CollectTextUntil("title");
                    }
                }

                return(true);
            }
            return(false);
        }
Exemple #16
0
        /// <summary>
        /// Walks the current contents to find smart content areas.  When one is found, it calls the operation on the smart content.  The operation has a chance
        /// to return new content.  If the content is non-null it will replace the current content.
        /// </summary>
        /// <param name="contents">the raw HTML string whose structured blocks will be replaced.</param>
        /// <param name="operation">Delegate for generating replacement content.</param>
        /// <param name="editMode">If true, then the element's stylename will be activated for editing</param>
        /// <param name="continueOnError">
        /// true - if the plugin throws an exception, it keeps crawling the DOM
        /// false - if a plugin throws an exception, it stops processing the DOM and return empty string
        /// null - if a plugin throws an exception, this function will rethrow it
        /// </param
        /// <returns>the contents with structured blocks replaced.</returns>
        internal static string PerformOperation(string contents, SmartContentOperation operation, bool editMode, IContentSourceSidebarContext sourceContext, bool?continueOnError)
        {
            //replace all structured content blocks with their editor HTML
            //string html = PostBodyPreprocessor.Preprocess(contents);
            StringBuilder    sb     = new StringBuilder();
            SimpleHtmlParser parser = new SimpleHtmlParser(contents);

            for (Element e = parser.Next(); e != null; e = parser.Next())
            {
                if (e is BeginTag)
                {
                    BeginTag beginTag         = (BeginTag)e;
                    string   elementClassName = beginTag.GetAttributeValue("class");
                    if (ContentSourceManager.IsSmartContentClass(elementClassName))
                    {
                        ISmartContent sContent = null;
                        try
                        {
                            string contentSourceId, contentItemId;
                            string blockId = beginTag.GetAttributeValue("id");
                            if (blockId != null)
                            {
                                ContentSourceManager.ParseContainingElementId(blockId, out contentSourceId, out contentItemId);

                                ContentSourceInfo contentSource = sourceContext.FindContentSource(contentSourceId);
                                if (contentSource != null && contentSource.Instance is SmartContentSource)
                                {
                                    SmartContentSource sSource = (SmartContentSource)contentSource.Instance;
                                    sContent = sourceContext.FindSmartContent(contentItemId);
                                    if (sContent != null)
                                    {
                                        //write the div with the appropriate className
                                        string newClassName = editMode ? ContentSourceManager.EDITABLE_SMART_CONTENT : ContentSourceManager.SMART_CONTENT;
                                        beginTag.GetAttribute("class").Value = newClassName;

                                        //replace the inner HTML of the div with the source's editor HTML
                                        string content = parser.CollectHtmlUntil("div");

                                        sb.Append(e.ToString());

                                        operation(sourceContext, sSource, sContent, ref content);

                                        sb.Append(content);

                                        sb.Append("</div>");
                                        continue;
                                    }
                                }
                            }
                        }
                        catch (Exception ex)
                        {
                            Trace.WriteLine(String.Format(CultureInfo.InvariantCulture, "Error loading smart content item\r\n{0}", ex));
                            sContent = null;

                            if (continueOnError == null)
                            {
                                throw;
                            }

                            if (!continueOnError.Value)
                            {
                                return(String.Empty);
                            }
                        }

                        if (sContent == null)
                        {
                            //this element references an unknown smart content, so it should not be editable
                            Attr classAttr = beginTag.GetAttribute("class");
                            classAttr.Value = ContentSourceManager.SMART_CONTENT;
                        }
                    }
                }
                sb.Append(e.ToString());
            }

            return(sb.ToString());
        }