SimpleHtmlParser C# (CSharp) примеры использования

Пример #1

0

Показать файл

Файл: TitleEncodingTest.cs Проект: chonghua/OpenLiveWriter-zh-CN

        protected internal override void HandleResult(string homepageHtml, ITestResults results)
        {
            Regex regex = new Regex(Regex.Escape(guid1) + "(.*?)" + Regex.Escape(guid2));

            SimpleHtmlParser parser = new SimpleHtmlParser(homepageHtml);

            for (Element e = parser.Next(); e != null; e = parser.Next())
            {
                if (e is Text)
                {
                    Match m = regex.Match(e.ToString());
                    if (m.Success)
                    {
                        string str = m.Groups[1].Value;
                        if (str == HtmlUtils.EscapeEntities(TEST_STRING))
                        {
                            results.AddResult("requiresHtmlTitles", YES);
                        }
                        else if (str == HtmlUtils.EscapeEntities(HtmlUtils.EscapeEntities(TEST_STRING)))
                        {
                            results.AddResult("requiresHtmlTitles", NO);
                        }
                        else
                        {
                            results.AddResult("requiresHtmlTitles", "[ERROR] (value was: " + str + ")");
                        }

                        return;
                    }
                }
            }

            throw new InvalidOperationException("Title encoding test failed--title was not detected");
        }

Пример #2

0

Показать файл

        public List <string> ExtractTextsHtmlParserSharp()
        {
            stream.Seek(0, SeekOrigin.Begin);

            var simpleHtmlparser = new SimpleHtmlParser();
            var document         = simpleHtmlparser.Parse(new StreamReader(stream));
            var memoryStream     = new MemoryStream();

            document.Save(memoryStream);
            memoryStream.Seek(0, SeekOrigin.Begin);

            var texts = new List <string>();

            var reader = XmlReader.Create(memoryStream, new XmlReaderSettings {
                DtdProcessing = DtdProcessing.Parse
            });

            while (reader.Read())
            {
                if (reader.NodeType != XmlNodeType.Text && reader.NodeType != XmlNodeType.Whitespace)
                {
                    continue;
                }

                var value = reader.Value;
                if (value == "")
                {
                    continue;
                }

                texts.Add(reader.Value);
            }

            return(texts);
        }

Пример #3

0

Показать файл

Файл: PostBodyEditingElementBehavior.cs Проект: zcatt/OpenLiveWriter

        private static void DetachExtendedEntryBehavior(TemporaryFixupArgs args)
        {
            string html = args.Html;

            if (html.Contains(EXTENDED_ENTRY_ID))
            {
                //replace the EXTENDED_ENTRY_ID behavior div with the <!--more--> comment
                StringBuilder    output   = new StringBuilder(html.Length);
                SimpleHtmlParser parser   = new SimpleHtmlParser(html);
                SmartPredicate   splitDiv = new SmartPredicate(String.Format(CultureInfo.InvariantCulture, "<div id='{0}'>", EXTENDED_ENTRY_ID));
                for (Element el; null != (el = parser.Next());)
                {
                    if (splitDiv.IsMatch(el))
                    {
                        Element e = parser.Peek(0);
                        if (e is EndTag && ((EndTag)e).NameEquals("div"))
                        {
                            output.Append(BlogPost.ExtendedEntryBreak);
                            parser.Next();
                        }
                    }
                    else
                    {
                        output.Append(html, el.Offset, el.Length);
                    }
                }
                args.Html = output.ToString();
            }
        }

Пример #4

0

Показать файл

Файл: BlogPostExtensionDataList.cs Проект: zcatt/OpenLiveWriter

        public IExtensionData[] CalculateReferencedExtensionData(string content)
        {
            Hashtable datas = new Hashtable();

            ContentSourceManager.SmartContentPredicate predicate = new ContentSourceManager.SmartContentPredicate();
            SimpleHtmlParser p = new SimpleHtmlParser(content);

            for (Element el; null != (el = p.Next());)
            {
                if (predicate.IsMatch(el))
                {
                    BeginTag bt     = el as BeginTag;
                    Attr     idAttr = bt.GetAttribute("id");
                    if (idAttr != null) //Synchronized WP posts will strip ID attrs (bug 488143)
                    {
                        string smartContentSourceId;
                        string smartContentId;
                        string smartContentElementId = idAttr.Value;
                        ContentSourceManager.ParseContainingElementId(smartContentElementId, out smartContentSourceId, out smartContentId);
                        IExtensionData data = GetExtensionData(smartContentId);
                        if (data != null)
                        {
                            datas[smartContentId] = data;
                        }
                    }
                }
            }
            return((IExtensionData[])ArrayHelper.CollectionToArray(datas.Values, typeof(IExtensionData)));
        }

Пример #5

0

Показать файл

Файл: HtmlCleaner.cs Проект: chonghua/OpenLiveWriter-zh-CN

        /// <summary>
        /// Namespaced tags come with Office 2007 clipboard data and result in weird
        /// namespace declarations being inserted as text into the DOM.
        /// </summary>
        public static string StripNamespacedTagsAndCommentsAndMarkupDirectives(string html)
        {
            StringBuilder    output = new StringBuilder(html.Length);
            SimpleHtmlParser parser = new SimpleHtmlParser(html);

            for (Element el; null != (el = parser.Next());)
            {
                if (el is Tag && ((Tag)el).Name.IndexOf(':') >= 0)
                {
                    continue;
                }
                if (el is Comment)
                {
                    continue;
                }
                if (el is MarkupDirective)
                {
                    continue;
                }
                if (el is BeginTag)
                {
                    foreach (Attr attr in ((BeginTag)el).Attributes)
                    {
                        if (ILLEGAL_ATTR_REGEX.IsMatch(attr.Name))
                        {
                            ((BeginTag)el).RemoveAttribute(attr.Name);
                        }
                    }
                }
                output.Append(el.ToString());
            }
            html = output.ToString();
            return(html);
        }

Пример #6

0

Показать файл

Файл: FormFactory.cs Проект: evilpaladique/OpenLiveWriter

 public FormFactory(Stream s)
 {
     using (StreamReader reader = new StreamReader(s))
     {
         parser = new SimpleHtmlParser(reader.ReadToEnd());
     }
 }

Пример #7

0

Показать файл

Файл: HubwayRentals.cs Проект: roland-chernov-akvelon/Moyeu

        /* This is a two steps process:
         *  1st: we need to visit the login form page to get two important pieces of data
         *    - the session ID cookie which will stay in our cache
         *    - the CSRF token that is generated on the fly in the form
         *  2nd: armed with those two things, we can then post to the login check page which
         *  will simply stamp our session ID as valid
         */
        async Task <bool> LoginToHubway()
        {
            var loginPage = await Client.GetStringAsync(HubwayLoginUrl).ConfigureAwait(false);

            var parser = new SimpleHtmlParser();
            var doc    = parser.ParseString(loginPage);
            var form   = doc.GetElementsByTagName("form")
                         .OfType <XmlElement> ()
                         .FirstOrDefault(n => n.GetAttribute("class") == "ed-popup-form_login__form");
            var inputs    = form.GetElementsByTagName("input").OfType <XmlElement> ().ToList();
            var csrfToken = inputs
                            .OfType <XmlElement> ()
                            .First(n => n.GetAttribute("name") == "_login_csrf_security_token")
                            .GetAttribute("value");

            var content = new FormUrlEncodedContent(new Dictionary <string, string> {
                { "_username", credentials.Username },
                { "_password", credentials.Password },
                { "_failure_path", "eightd_bike_profile__login" },
                { "ed_from_login_popup", "true" },
                { "_login_csrf_security_token", csrfToken }
            });
            var login = await Client.PostAsync(HubwayLoginCheckUrl, content).ConfigureAwait(false);

            return(login.StatusCode == HttpStatusCode.Found && login.Headers.Location == new Uri(HubwayProfileUrl));
        }

Пример #8

0

Показать файл

        public void DoPreloadWork()
        {
            ContentEditorProxy.ApplyInstalledCulture();
            SimpleHtmlParser.Create();
            BlogClientHelper.FormatUrl("", "", "", "");
            ContentEditor contentEditor = new ContentEditor(null, new Panel(), null, new BlogPostHtmlEditorControl.BlogPostHtmlEditorSecurityManager(), new ContentEditorProxy.ContentEditorTemplateStrategy(), MshtmlOptions.DEFAULT_DLCTL);

            contentEditor.Dispose();
        }

Пример #9

0

Показать файл

        private static Element[] Elements(string html)
        {
            ArrayList        elements = new ArrayList();
            SimpleHtmlParser parser   = new SimpleHtmlParser(html);
            Element          el;

            while (null != (el = parser.Next()))
            {
                elements.Add(el);
            }
            return((Element[])elements.ToArray(typeof(Element)));
        }

Пример #10

0

Показать файл

Файл: HtmlPreserver.cs Проект: chonghua/OpenLiveWriter-zh-CN

        public string ScanAndPreserve(string html)
        {
            StringBuilder    sb = new StringBuilder(html.Length);
            SimpleHtmlParser p  = new SimpleHtmlParser(html);
            Element          e;

            while (null != (e = p.Next()))
            {
                if (!(e is BeginTag))
                {
                    sb.Append(html, e.Offset, e.Length);
                    continue;
                }

                BeginTag bt = (BeginTag)e;

                if (bt.NameEquals("div"))
                {
                    switch (bt.GetAttributeValue("class"))
                    {
                    case ContentSourceManager.EDITABLE_SMART_CONTENT:
                    case ContentSourceManager.SMART_CONTENT:
                        sb.Append(html, e.Offset, e.Length);
                        sb.Append(p.CollectHtmlUntil("div"));
                        sb.Append("</div>");
                        continue;
                    }
                }

                if (!(bt.NameEquals("object") ||
                      bt.NameEquals("embed") ||
                      bt.NameEquals("noembed") ||
                      bt.NameEquals("script")))
                {
                    sb.Append(html, e.Offset, e.Length);
                    continue;
                }
                else
                {
                    string collected = p.CollectHtmlUntil(bt.Name);
                    string preserve  = bt.RawText + collected + "</" + bt.Name + ">";

                    string preserveId = Guid.NewGuid().ToString("N");
                    preserved[preserveId] = preserve;

                    sb.AppendFormat("<span id=\"preserve{0}\" class=\"{1}\">", preserveId, PRESERVE_CLASS);
                    sb.Append(preserve);
                    sb.Append("</span>");
                }
            }
            return(sb.ToString());
        }

Пример #11

0

Показать файл

        /// <summary>
        /// Clones active smart content contained in the provided HTML, and disables unknown smart content.
        /// </summary>
        public static string PrepareSmartContentHtmlForEditorInsertion(string html, IContentSourceSidebarContext sourceContext)
        {
            StringBuilder output = new StringBuilder();

            ContentSourceManager.SmartContentPredicate predicate = new ContentSourceManager.SmartContentPredicate();
            SimpleHtmlParser p = new SimpleHtmlParser(html);

            for (Element el; null != (el = p.Next());)
            {
                if (predicate.IsMatch(el))
                {
                    BeginTag bt     = el as BeginTag;
                    Attr     idAttr = bt.GetAttribute("id");

                    String contentSourceId, contentItemId;
                    ContentSourceManager.ParseContainingElementId(idAttr.Value, out contentSourceId, out contentItemId);
                    ISmartContent smartContent = sourceContext.FindSmartContent(contentItemId);
                    if (smartContent != null)
                    {
                        String newId = Guid.NewGuid().ToString();
                        sourceContext.CloneSmartContent(contentItemId, newId);

                        if (RefreshableContentManager.ContentSourcesWithRefreshableContent.Contains(contentSourceId))
                        {
                            IExtensionData extensionData = sourceContext.FindExtentsionData(newId);
                            Debug.Assert(extensionData != null);

                            // Since we just made a new id for the smart content just about to be inserted
                            // we want to give it a chance to get a callback because its callback might have happened while
                            // it was on the clipboard(in the event of cut).  This means the refreshable content manager doesnt know
                            // to watch out for this smart content on paste, it only knows to look out for who created it.   Thus
                            // we just force the callback, and if it didnt need it, nothing will happen.
                            if (extensionData.RefreshCallBack == null)
                            {
                                extensionData.RefreshCallBack = DateTime.UtcNow;
                            }
                        }


                        idAttr.Value = ContentSourceManager.MakeContainingElementId(contentSourceId, newId);
                    }
                    else
                    {
                        ContentSourceManager.RemoveSmartContentAttributes(bt);
                    }
                }
                output.Append(el.ToString());
            }
            return(output.ToString());
        }

Пример #12

0

Показать файл

Файл: HtmlCleaner.cs Проект: chonghua/OpenLiveWriter-zh-CN

        /// <summary>
        /// Namespaced tags come with Office 2007 clipboard data and result in weird
        /// namespace declarations being inserted as text into the DOM. (Bug 303784)
        /// </summary>
        private static string StripNamespacedTags(string html)
        {
            StringBuilder    output = new StringBuilder(html.Length);
            SimpleHtmlParser parser = new SimpleHtmlParser(html);

            for (Element el; null != (el = parser.Next());)
            {
                if (el is Tag && ((Tag)el).Name.IndexOf(':') >= 0)
                {
                    continue;
                }
                output.Append(el.RawText);
            }
            html = output.ToString();
            return(html);
        }

Пример #13

0

Показать файл

        /// <summary>
        /// Is the tag a meaningless tag such as <p></p> or <a href="..."></a> or <a href="...">&nbsp;</a>
        /// </summary>
        /// <param name="htmlParser"></param>
        /// <param name="bt"></param>
        /// <returns></returns>
        private static bool RemoveMeaninglessTags(SimpleHtmlParser htmlParser, BeginTag bt)
        {
            // Look to see if the tag is a <p> without any attributes
            if ((bt.NameEquals("p") && bt.Attributes.Length == 0 && !bt.HasResidue))
            {
                Element e = htmlParser.Peek(0);

                // Look to see if thereis a matching end tag to the element we are looking at
                if (e != null && e is EndTag && ((EndTag)e).NameEquals("p"))
                {
                    // eat up the end tag
                    htmlParser.Next();
                    return(true);
                }
            }

            // Look to see if the tag is an <a> without a style/id/name attribute, but has an href... meaning the link is not useful
            if ((bt.NameEquals("a") && bt.GetAttribute("name") == null && bt.GetAttributeValue("style") == null && bt.GetAttributeValue("id") == null && bt.GetAttributeValue("href") != null))
            {
                bool    hadWhiteSpaceText = false;
                Element e = htmlParser.Peek(0);

                // Look to see if the a just has whitespace inside of it
                if (e is Text && HtmlUtils.UnEscapeEntities(e.RawText, HtmlUtils.UnEscapeMode.NonMarkupText).Trim().Length == 0)
                {
                    e = htmlParser.Peek(1);
                    hadWhiteSpaceText = true;
                }

                // Look to see if thereis a matching end tag to the element we are looking at
                if (e != null && e is EndTag && ((EndTag)e).NameEquals("a"))
                {
                    // if this was an <a> with whitespace in the middle eat it up
                    if (hadWhiteSpaceText)
                    {
                        htmlParser.Next();
                    }
                    // eat up the end tag
                    htmlParser.Next();

                    return(true);
                }
            }

            return(false);
        }

Пример #14

0

Показать файл

Файл: TextHelper.cs Проект: chonghua/OpenLiveWriter-zh-CN

        public static string ConvertNewLinesToBr(string html)
        {
            SimpleHtmlParser parser = new SimpleHtmlParser(html);
            StringBuilder    sb     = new StringBuilder();
            Element          ele    = parser.Next();

            while (ele != null)
            {
                if (ele is Text)
                {
                    sb.Append(ele.RawText.Replace("\r\n", "<br/>"));
                }
                else
                {
                    sb.Append(ele.RawText);
                }
                ele = parser.Next();
            }

            return(sb.ToString());
        }

Пример #15

0

Показать файл

        public List <string> ExtractLinksHtmlParserSharp()
        {
            stream.Seek(0, SeekOrigin.Begin);
            var links = new List <string>();

            var simpleHtmlparser = new SimpleHtmlParser();
            var document         = simpleHtmlparser.Parse(new StreamReader(stream));
            var memoryStream     = new MemoryStream();

            document.Save(memoryStream);
            memoryStream.Seek(0, SeekOrigin.Begin);

            var reader = XmlReader.Create(memoryStream, new XmlReaderSettings {
                DtdProcessing = DtdProcessing.Parse
            });

            while (reader.Read())
            {
                if (reader.NodeType != XmlNodeType.Element)
                {
                    continue;
                }

                if (reader.Name != "a")
                {
                    continue;
                }

                var hrefAttributeValue = reader.GetAttribute("href");
                if (hrefAttributeValue == null)
                {
                    continue;
                }

                links.Add(hrefAttributeValue);
            }

            return(links);
        }

Пример #16

0

Показать файл

Файл: SmartContentInsertionHelper.cs Проект: chonghua/OpenLiveWriter-zh-CN

        public static bool ContainsUnbalancedDivs(string html)
        {
            int tags           = 0;
            SimpleHtmlParser p = new SimpleHtmlParser(html);

            for (Element e; (e = p.Next()) != null;)
            {
                if (e is Tag && ((Tag)e).NameEquals("div"))
                {
                    if (e is BeginTag)
                    {
                        ++tags;
                    }
                    else
                    {
                        --tags;
                    }
                }
            }

            return(tags != 0);
        }

Пример #17

0

Показать файл

        private static string BalanceHtml(string html)
        {
            StringBuilder sb = new StringBuilder(html.Length + 10);

            SimpleHtmlParser parser = new SimpleHtmlParser(html);
            Element          el;

            while (null != (el = parser.Next()))
            {
                if (el is BeginTag)
                {
                    BeginTag bt = (BeginTag)el;
                    if (!ElementFilters.RequiresEndTag(bt.Name))
                    {
                        bt.Complete = true;
                    }
                }
                sb.Append(el.ToString());
            }

            return(sb.ToString());
        }

Пример #18

0

Показать файл

        private static void EditorContext_PerformTemporaryFixupsToEditedHtml(TemporaryFixupArgs args)
        {
            string html = args.Html;

            if (html.Contains("table"))
            {
                StringBuilder    output = new StringBuilder(html.Length);
                SimpleHtmlParser parser = new SimpleHtmlParser(html);
                for (Element el; null != (el = parser.Next());)
                {
                    output.Append(html, el.Offset, el.Length);
                    if (el is BeginTag &&
                        ((BeginTag)el).NameEquals("td"))
                    {
                        Element e = parser.Peek(0);
                        if (e is EndTag && ((EndTag)e).NameEquals("td"))
                        {
                            output.Append("&nbsp;");
                        }
                    }
                }
                args.Html = output.ToString();
            }
        }

Пример #19

0

Показать файл

Файл: DomExtensions.cs Проект: jflam/NReadability

        public static void SetInnerHtml(this XElement element, string html)
        {
            if (element == null)
            {
            throw new ArgumentNullException("element");
            }

            if (html == null)
            {
            throw new ArgumentNullException("html");
            }

            element.RemoveAll();

            var parser = new SimpleHtmlParser();
            var nodes = parser.ParseFragment(new StringReader(html), String.Empty);
            foreach (var node in nodes)
            {
            element.Add(node);
            }
        }

Пример #20

0

Показать файл

Файл: FormFactory.cs Проект: evilpaladique/OpenLiveWriter

 public FormFactory(string html)
 {
     parser = new SimpleHtmlParser(html);
 }

Пример #21

0

Показать файл

Файл: HTMLBalancer.cs Проект: chonghua/OpenLiveWriter-zh-CN

        /// <summary>
        /// Balances the HTML and safely truncates it, using a custom algorithm
        /// to determine how much each character/string counts against maxCost.
        /// </summary>
        public static string Balance(string html, int maxCost, HTMLBalancerCostFilter costFilter, bool ellipsis)
        {
            bool             appendEllipsis = false;
            SimpleHtmlParser parser         = new SimpleHtmlParser(html);

            ArrayList     openTags = new ArrayList();
            StringBuilder output   = new StringBuilder();
            long          balance  = 0; // long to make sure that int32.MaxValue does not cause overflow

            if (costFilter == null)
            {
                costFilter = new DefaultCostFilter();
            }

            Element el;

            while (null != (el = parser.Next()))
            {
                if (el is StyleElement ||
                    el is ScriptElement ||
                    el is Comment ||
                    el is MarkupDirective)
                {
                    continue;
                }

                long lenLeft = Math.Max(0, maxCost - balance - LengthToClose(costFilter, openTags));

                if (el is Tag)
                {
                    if (el is BeginTag && ((BeginTag)el).Unterminated)
                    {
                        continue;  // skip corrupted tags
                    }
                    if (TagCost(costFilter, openTags, (Tag)el) > lenLeft)
                    {
                        break;  // don't use this tag; we're done
                    }
                    else
                    {
                        RegisterTag(openTags, (Tag)el);
                        output.Append(el.ToString());
                        balance += costFilter.ElementCost(el);
                    }
                }
                else if (el is Text)
                {
                    if (costFilter.ElementCost(el) > lenLeft)
                    {
                        // shrink down the text to fit
                        output.Append(costFilter.TruncateText((Text)el, (int)lenLeft));
                        appendEllipsis = true;
                        break;
                    }
                    else
                    {
                        // plenty of room
                        output.Append(el.ToString());
                        balance += costFilter.ElementCost(el);
                    }

                    //update the text end index
                }
                else
                {
                    if (costFilter.ElementCost(el) > lenLeft)
                    {
                        break;
                    }
                    else
                    {
                        output.Append(el.ToString());
                        balance += costFilter.ElementCost(el);
                    }
                }
            }

            // Append an ellipsis if we truncated text
            // We use "..." here rather than TextHelper.Ellipsis, because some mail clients don't understand "\u2026".
            if (ellipsis && appendEllipsis)
            {
                output.Append("...");
            }

            for (int i = openTags.Count - 1; i >= 0; i--)
            {
                output.Append(MakeEndTag((string)openTags[i]));
            }

            return(output.ToString());
        }

Пример #22

0

Показать файл

Файл: LightWeightHTMLDocumentIterator.cs Проект: evilpaladique/OpenLiveWriter

        public void Parse()
        {
            SimpleHtmlParser parser = new SimpleHtmlParser(_html);

            OnDocumentBegin();
            while (true)
            {
                Element currentElement = parser.Next();

                BeginTag beginTag = currentElement as BeginTag;
                if (beginTag != null)
                {
                    OnBeginTag(beginTag);
                    continue;
                }

                EndTag endTag = currentElement as EndTag;
                if (endTag != null)
                {
                    OnEndTag(endTag);
                    continue;
                }

                ScriptLiteral literal = currentElement as ScriptLiteral;
                if (literal != null)
                {
                    OnScriptLiteral(literal);
                    continue;
                }

                Comment comment = currentElement as Comment;
                if (comment != null)
                {
                    OnComment(comment);
                    continue;
                }

                MarkupDirective markupDirective = currentElement as MarkupDirective;
                if (markupDirective != null)
                {
                    OnMarkupDirective(markupDirective);
                    continue;
                }

                ScriptText scriptText = currentElement as ScriptText;
                if (scriptText != null)
                {
                    OnScriptText(scriptText);
                    continue;
                }

                ScriptComment scriptComment = currentElement as ScriptComment;
                if (scriptComment != null)
                {
                    OnScriptComment(scriptComment);
                    continue;
                }

                StyleText styleText = currentElement as StyleText;
                if (styleText != null)
                {
                    OnStyleText(styleText);
                    continue;
                }

                StyleUrl styleUrl = currentElement as StyleUrl;
                if (styleUrl != null)
                {
                    OnStyleUrl(styleUrl);
                    continue;
                }

                StyleImport styleImport = currentElement as StyleImport;
                if (styleImport != null)
                {
                    OnStyleImport(styleImport);
                    continue;
                }

                StyleComment styleComment = currentElement as StyleComment;
                if (styleComment != null)
                {
                    OnStyleComment(styleComment);
                    continue;
                }

                StyleLiteral styleLiteral = currentElement as StyleLiteral;
                if (styleLiteral != null)
                {
                    OnStyleLiteral(styleLiteral);
                    continue;
                }

                Text text = currentElement as Text;
                if (text != null)
                {
                    OnText(text);
                    continue;
                }


                if (currentElement == null)
                {
                    OnDocumentEnd();
                    return;
                }

                Debug.Fail("Unrecognized element in LightWeightHTMLDocumentIterator");
            }
        }

Пример #23

0

Показать файл

        private string ThinInternal(string html, bool preserveImages, bool strict, params ModifyReplacement[] modifyReplacements)
        {
            Hashtable replacements = _tagSpecs;

            if (strict)
            {
                replacements = _tagSpecsStrict;
            }

            if (modifyReplacements != null)
            {
                replacements = (Hashtable)replacements.Clone();
                foreach (ModifyReplacement modifyReplacement in modifyReplacements)
                {
                    modifyReplacement(replacements);
                }
            }

            // Will hold the results of the leading whitespace buffer.
            // This buffer may or may not make it into the final result,
            // depending on whether any block-level tags are present.
            StringBuilder leadingOutput = new StringBuilder(10);
            // Will hold the results of everything else.
            StringBuilder mainOutput = new StringBuilder(html.Length);

            // references whichever output buffer is current.
            StringBuilder output = leadingOutput;

            SimpleHtmlParser parser = new SimpleHtmlParser(html);
            Element          el;

            bool             preserveWhitespace = false; // <pre> blocks should preserve whitespace
            WhitespaceBuffer whitespaceBuffer   = new WhitespaceBuffer();

            whitespaceBuffer.Promote(WhitespaceClass.Paragraph);  // Insert an implicit <p> unless the first non-whitespace element is a block
            bool hasBlock = false;

            while (null != (el = parser.Next()))
            {
                if (el is Tag)
                {
                    Tag    t         = (Tag)el;
                    string lowerName = t.Name.ToLower(CultureInfo.InvariantCulture);

                    TagDesc desc = (TagDesc)replacements[lowerName];
                    // if this tag is not in the table, drop it
                    if (desc == null)
                    {
                        continue;
                    }

                    // Replace tag with substitute tag if necessary (e.g. <DIV> becomes <P>)
                    string tagName = desc.Substitute;
                    if (tagName == null)
                    {
                        tagName = lowerName;
                    }

                    // special case for images
                    if (!preserveImages && tagName == TAG_IMG)
                    {
                        continue;
                    }

                    bool beginTag = el is BeginTag;

                    ElementClass elClass = WhitespaceBuffer.ClassifyTag(tagName, desc.TagType);
                    hasBlock |= (elClass == ElementClass.Block || elClass == ElementClass.Paragraph || elClass == ElementClass.Break);
                    if (!preserveWhitespace && WhitespaceBuffer.ProcessElementClass(ref whitespaceBuffer, output, elClass, true))
                    {
                        continue;
                    }

                    output = mainOutput;

                    if (beginTag)
                    {
                        WriteBeginTag(desc, tagName, ((BeginTag)el).Attributes, output);
                        if (tagName == TAG_PRE)
                        {
                            preserveWhitespace = true;
                        }
                    }
                    else if (el is EndTag)
                    {
                        if (!((EndTag)el).Implicit && desc.TagType != TagType.Empty)
                        {
                            output.Append(string.Format(CultureInfo.InvariantCulture, "</{0}>", tagName));
                        }
                        if (tagName == TAG_PRE)
                        {
                            preserveWhitespace = false;
                        }
                    }
                }
                else if (el is Text)
                {
                    string text = el.RawText;
                    text = HtmlUtils.EscapeEntities(HtmlUtils.UnEscapeEntities(text, HtmlUtils.UnEscapeMode.NonMarkupText));

                    if (!preserveWhitespace && WhitespaceBuffer.ProcessElementClass(ref whitespaceBuffer, output, WhitespaceBuffer.ClassifyText(text), false))
                    {
                        continue;
                    }

                    output = mainOutput;

                    output.Append(text);
                }
            }

            if (hasBlock && ReferenceEquals(mainOutput, output))
            {
                output.Insert(0, leadingOutput.ToString());
            }

            // The whitespace buffer may not be empty at this point.  That's OK--we want to drop trailing whitespace

            return(output.ToString());
        }

Пример #24

0

Показать файл

Файл: BaseHtmlParserTests.cs Проект: simonech/XliffLib

 public void Init()
 {
     _htmlParser = new SimpleHtmlParser();
 }

Пример #25

0

Показать файл

        public void SvgHang()
        {
            var parser = new SimpleHtmlParser();

            parser.Parse(new StringReader("<svg x=y/>"));
        }

Пример #26

0

Показать файл

 public HtmlTextSource(SimpleHtmlParser parser)
 {
     this._parser = parser;
 }

Пример #27

0

Показать файл

        /// <summary>
        /// Walks the current contents to find smart content areas.  When one is found, it calls the operation on the smart content.  The operation has a chance
        /// to return new content.  If the content is non-null it will replace the current content.
        /// </summary>
        /// <param name="contents">the raw HTML string whose structured blocks will be replaced.</param>
        /// <param name="operation">Delegate for generating replacement content.</param>
        /// <param name="editMode">If true, then the element's stylename will be activated for editing</param>
        /// <param name="continueOnError">
        /// true - if the plugin throws an exception, it keeps crawling the DOM
        /// false - if a plugin throws an exception, it stops processing the DOM and return empty string
        /// null - if a plugin throws an exception, this function will rethrow it
        /// </param
        /// <returns>the contents with structured blocks replaced.</returns>
        internal static string PerformOperation(string contents, SmartContentOperation operation, bool editMode, IContentSourceSidebarContext sourceContext, bool?continueOnError)
        {
            //replace all structured content blocks with their editor HTML
            //string html = PostBodyPreprocessor.Preprocess(contents);
            StringBuilder    sb     = new StringBuilder();
            SimpleHtmlParser parser = new SimpleHtmlParser(contents);

            for (Element e = parser.Next(); e != null; e = parser.Next())
            {
                if (e is BeginTag)
                {
                    BeginTag beginTag         = (BeginTag)e;
                    string   elementClassName = beginTag.GetAttributeValue("class");
                    if (ContentSourceManager.IsSmartContentClass(elementClassName))
                    {
                        ISmartContent sContent = null;
                        try
                        {
                            string contentSourceId, contentItemId;
                            string blockId = beginTag.GetAttributeValue("id");
                            if (blockId != null)
                            {
                                ContentSourceManager.ParseContainingElementId(blockId, out contentSourceId, out contentItemId);

                                ContentSourceInfo contentSource = sourceContext.FindContentSource(contentSourceId);
                                if (contentSource != null && contentSource.Instance is SmartContentSource)
                                {
                                    SmartContentSource sSource = (SmartContentSource)contentSource.Instance;
                                    sContent = sourceContext.FindSmartContent(contentItemId);
                                    if (sContent != null)
                                    {
                                        //write the div with the appropriate className
                                        string newClassName = editMode ? ContentSourceManager.EDITABLE_SMART_CONTENT : ContentSourceManager.SMART_CONTENT;
                                        beginTag.GetAttribute("class").Value = newClassName;

                                        //replace the inner HTML of the div with the source's editor HTML
                                        string content = parser.CollectHtmlUntil("div");

                                        sb.Append(e.ToString());

                                        operation(sourceContext, sSource, sContent, ref content);

                                        sb.Append(content);

                                        sb.Append("</div>");
                                        continue;
                                    }
                                }
                            }
                        }
                        catch (Exception ex)
                        {
                            Trace.WriteLine(String.Format(CultureInfo.InvariantCulture, "Error loading smart content item\r\n{0}", ex));
                            sContent = null;

                            if (continueOnError == null)
                            {
                                throw;
                            }

                            if (!continueOnError.Value)
                            {
                                return(String.Empty);
                            }
                        }

                        if (sContent == null)
                        {
                            //this element references an unknown smart content, so it should not be editable
                            Attr classAttr = beginTag.GetAttribute("class");
                            classAttr.Value = ContentSourceManager.SMART_CONTENT;
                        }
                    }
                }
                sb.Append(e.ToString());
            }

            return(sb.ToString());
        }

Пример #28

0

Показать файл

Файл: Basic.cs Проект: antrampa/HtmlParserSharp

 public void SvgHang()
 {
     var parser = new SimpleHtmlParser();
     parser.Parse(new StringReader("<svg x=y/>"));
 }

Пример #29

0

Показать файл

Файл: HubwayRentals.cs Проект: roland-chernov-akvelon/Moyeu

        public async Task <Rental[]> GetRentals(int page)
        {
            bool needsAuth = false;

            for (int i = 0; i < 4; i++)
            {
                try {
                    if (needsAuth)
                    {
                        if (await LoginToHubway().ConfigureAwait(false))
                        {
                            needsAuth = false;
                        }
                        else
                        {
                            continue;
                        }
                    }

                    if (string.IsNullOrEmpty(credentials.UserId))
                    {
                        credentials.UserId = await GetHubwayUserId().ConfigureAwait(false);

                        if (string.IsNullOrEmpty(credentials.UserId))
                        {
                            needsAuth = true;
                            continue;
                        }
                    }

                    var rentalsUrl = HubwayRentalsUrl + credentials.UserId;
                    if (page > 0)
                    {
                        rentalsUrl += "?pageNumber=" + page;
                    }
                    var answer = await Client.GetStringAsync(rentalsUrl).ConfigureAwait(false);

                    var parser = new SimpleHtmlParser();
                    var doc    = parser.ParseString(answer);
                    var div    = doc.GetElementsByTagName("section")
                                 .OfType <XmlElement> ()
                                 .First(s => s.GetAttribute("class") == "ed-profile-page__content");
                    var rows = div.GetElementsByTagName("div")
                               .OfType <XmlElement> ()
                               .Where(n => n.ContainsClass("ed-table__item_trip"));
                    return(rows.Select(row => {
                        var cells = row.GetElementsByTagName("div").OfType <XmlElement> ().ToList();

                        /* 0 <div>
                         * 1   <div>time start
                         * 2   <div>station start
                         *   </div>
                         * 3 <div>
                         * 4   <div>time end
                         * 5   <div>station end
                         *   </div>
                         * 6 <div>duration
                         * 7 <div>billed
                         */
                        var rental = new Rental {
                            FromStationName = cells[2].InnerText.Trim(),
                            ToStationName = cells[5].InnerText.Trim(),
                            Duration = ParseRentalDuration(cells[6].InnerText.Trim()),
                            Price = ParseRentalPrice(cells[7].InnerText.Trim()),
                            DepartureTime = DateTime.Parse(cells[1].InnerText,
                                                           System.Globalization.CultureInfo.InvariantCulture),
                            ArrivalTime = DateTime.Parse(cells[4].InnerText,
                                                         System.Globalization.CultureInfo.InvariantCulture)
                        };
                        rental.Id = ((long)rental.DepartureTime.GetHashCode()) << 32;
                        rental.Id |= (uint)rental.ArrivalTime.GetHashCode();
                        return rental;
                    }).ToArray());
                } catch (HttpRequestException htmlException) {
                    // Super hacky but oh well
                    if (!needsAuth)
                    {
                        needsAuth = htmlException.Message.Contains("302");
                    }
                    continue;
                } catch (Exception e) {
                    AnalyticsHelper.LogException("RentalsGenericError", e);
                    Log.Error("RentalsGenericError", e.ToString());
                    break;
                }
            }

            return(null);
        }

Пример #30

0

Показать файл

        /// <summary>
        /// Converts tag names, attribute names, and style text to lowercase.
        /// </summary>
        private string CleanupHtml(string html, bool xml)
        {
            bool needsCleanup;

            do
            {
                needsCleanup = false;
                StringBuilder    output     = new StringBuilder(html.Length);
                SimpleHtmlParser htmlParser = new SimpleHtmlParser(html);
                for (Element el; null != (el = htmlParser.Next());)
                {
                    if (el is BeginTag)
                    {
                        BeginTag bt = (BeginTag)el;

                        if (RemoveMeaninglessTags(htmlParser, bt))
                        {
                            // Since we are removing a tag, we will want to clean up again, since that might mean
                            // there will be another tag to remove
                            needsCleanup = true;
                            continue;
                        }

                        output.Append("<");
                        output.Append(bt.Name.ToLower(CultureInfo.InvariantCulture));
                        foreach (Attr attr in bt.Attributes)
                        {
                            if (attr.NameEquals("contenteditable") || attr.NameEquals("atomicselection") ||
                                attr.NameEquals("unselectable"))
                            {
                                continue;
                            }

                            output.Append(" ");
                            output.Append(attr.Name.ToLower(CultureInfo.InvariantCulture));
                            if (attr.Value != null)
                            {
                                string attrVal = attr.Value;
                                if (attr.NameEquals("style"))
                                {
                                    attrVal = LowerCaseCss(attrVal);
                                }
                                else if (attr.Name == attr.Value)
                                {
                                    attrVal = attrVal.ToLower(CultureInfo.InvariantCulture);
                                }
                                output.AppendFormat("=\"{0}\"",
                                                    xml
                                                        ? HtmlUtils.EscapeEntitiesForXml(attrVal, true)
                                                        : HtmlUtils.EscapeEntities(attrVal));
                            }
                        }
                        if (bt.HasResidue)
                        {
                            if (bt.Attributes.Length == 0)
                            {
                                output.Append(" ");
                            }
                            output.Append(bt.Residue);
                        }
                        if (bt.Complete)
                        {
                            output.Append(" /");
                        }
                        output.Append(">");
                    }
                    else if (el is EndTag)
                    {
                        output.AppendFormat("</{0}>", ((EndTag)el).Name.ToLower(CultureInfo.InvariantCulture));
                    }
                    else if (el is Text)
                    {
                        string textHtml = HtmlUtils.TidyNbsps(el.RawText);
                        if (xml)
                        {
                            textHtml =
                                HtmlUtils.EscapeEntitiesForXml(
                                    HtmlUtils.UnEscapeEntities(textHtml, HtmlUtils.UnEscapeMode.NonMarkupText), false);
                        }
                        output.Append(textHtml);
                    }
                    else if (el is StyleText)
                    {
                        output.Append(el.RawText.ToLower(CultureInfo.InvariantCulture));
                    }
                    else
                    {
                        output.Append(el.RawText);
                    }
                }
                html = output.ToString();
            } while (needsCleanup);
            return(html);
        }

C# (CSharp) SimpleHtmlParser примеры использования