protected internal override void HandleResult(string homepageHtml, ITestResults results) { Regex regex = new Regex(Regex.Escape(guid1) + "(.*?)" + Regex.Escape(guid2)); SimpleHtmlParser parser = new SimpleHtmlParser(homepageHtml); for (Element e = parser.Next(); e != null; e = parser.Next()) { if (e is Text) { Match m = regex.Match(e.ToString()); if (m.Success) { string str = m.Groups[1].Value; if (str == HtmlUtils.EscapeEntities(TEST_STRING)) { results.AddResult("requiresHtmlTitles", YES); } else if (str == HtmlUtils.EscapeEntities(HtmlUtils.EscapeEntities(TEST_STRING))) { results.AddResult("requiresHtmlTitles", NO); } else { results.AddResult("requiresHtmlTitles", "[ERROR] (value was: " + str + ")"); } return; } } } throw new InvalidOperationException("Title encoding test failed--title was not detected"); }
public List <string> ExtractTextsHtmlParserSharp() { stream.Seek(0, SeekOrigin.Begin); var simpleHtmlparser = new SimpleHtmlParser(); var document = simpleHtmlparser.Parse(new StreamReader(stream)); var memoryStream = new MemoryStream(); document.Save(memoryStream); memoryStream.Seek(0, SeekOrigin.Begin); var texts = new List <string>(); var reader = XmlReader.Create(memoryStream, new XmlReaderSettings { DtdProcessing = DtdProcessing.Parse }); while (reader.Read()) { if (reader.NodeType != XmlNodeType.Text && reader.NodeType != XmlNodeType.Whitespace) { continue; } var value = reader.Value; if (value == "") { continue; } texts.Add(reader.Value); } return(texts); }
private static void DetachExtendedEntryBehavior(TemporaryFixupArgs args) { string html = args.Html; if (html.Contains(EXTENDED_ENTRY_ID)) { //replace the EXTENDED_ENTRY_ID behavior div with the <!--more--> comment StringBuilder output = new StringBuilder(html.Length); SimpleHtmlParser parser = new SimpleHtmlParser(html); SmartPredicate splitDiv = new SmartPredicate(String.Format(CultureInfo.InvariantCulture, "<div id='{0}'>", EXTENDED_ENTRY_ID)); for (Element el; null != (el = parser.Next());) { if (splitDiv.IsMatch(el)) { Element e = parser.Peek(0); if (e is EndTag && ((EndTag)e).NameEquals("div")) { output.Append(BlogPost.ExtendedEntryBreak); parser.Next(); } } else { output.Append(html, el.Offset, el.Length); } } args.Html = output.ToString(); } }
public IExtensionData[] CalculateReferencedExtensionData(string content) { Hashtable datas = new Hashtable(); ContentSourceManager.SmartContentPredicate predicate = new ContentSourceManager.SmartContentPredicate(); SimpleHtmlParser p = new SimpleHtmlParser(content); for (Element el; null != (el = p.Next());) { if (predicate.IsMatch(el)) { BeginTag bt = el as BeginTag; Attr idAttr = bt.GetAttribute("id"); if (idAttr != null) //Synchronized WP posts will strip ID attrs (bug 488143) { string smartContentSourceId; string smartContentId; string smartContentElementId = idAttr.Value; ContentSourceManager.ParseContainingElementId(smartContentElementId, out smartContentSourceId, out smartContentId); IExtensionData data = GetExtensionData(smartContentId); if (data != null) { datas[smartContentId] = data; } } } } return((IExtensionData[])ArrayHelper.CollectionToArray(datas.Values, typeof(IExtensionData))); }
/// <summary> /// Namespaced tags come with Office 2007 clipboard data and result in weird /// namespace declarations being inserted as text into the DOM. /// </summary> public static string StripNamespacedTagsAndCommentsAndMarkupDirectives(string html) { StringBuilder output = new StringBuilder(html.Length); SimpleHtmlParser parser = new SimpleHtmlParser(html); for (Element el; null != (el = parser.Next());) { if (el is Tag && ((Tag)el).Name.IndexOf(':') >= 0) { continue; } if (el is Comment) { continue; } if (el is MarkupDirective) { continue; } if (el is BeginTag) { foreach (Attr attr in ((BeginTag)el).Attributes) { if (ILLEGAL_ATTR_REGEX.IsMatch(attr.Name)) { ((BeginTag)el).RemoveAttribute(attr.Name); } } } output.Append(el.ToString()); } html = output.ToString(); return(html); }
public FormFactory(Stream s) { using (StreamReader reader = new StreamReader(s)) { parser = new SimpleHtmlParser(reader.ReadToEnd()); } }
/* This is a two steps process: * 1st: we need to visit the login form page to get two important pieces of data * - the session ID cookie which will stay in our cache * - the CSRF token that is generated on the fly in the form * 2nd: armed with those two things, we can then post to the login check page which * will simply stamp our session ID as valid */ async Task <bool> LoginToHubway() { var loginPage = await Client.GetStringAsync(HubwayLoginUrl).ConfigureAwait(false); var parser = new SimpleHtmlParser(); var doc = parser.ParseString(loginPage); var form = doc.GetElementsByTagName("form") .OfType <XmlElement> () .FirstOrDefault(n => n.GetAttribute("class") == "ed-popup-form_login__form"); var inputs = form.GetElementsByTagName("input").OfType <XmlElement> ().ToList(); var csrfToken = inputs .OfType <XmlElement> () .First(n => n.GetAttribute("name") == "_login_csrf_security_token") .GetAttribute("value"); var content = new FormUrlEncodedContent(new Dictionary <string, string> { { "_username", credentials.Username }, { "_password", credentials.Password }, { "_failure_path", "eightd_bike_profile__login" }, { "ed_from_login_popup", "true" }, { "_login_csrf_security_token", csrfToken } }); var login = await Client.PostAsync(HubwayLoginCheckUrl, content).ConfigureAwait(false); return(login.StatusCode == HttpStatusCode.Found && login.Headers.Location == new Uri(HubwayProfileUrl)); }
public void DoPreloadWork() { ContentEditorProxy.ApplyInstalledCulture(); SimpleHtmlParser.Create(); BlogClientHelper.FormatUrl("", "", "", ""); ContentEditor contentEditor = new ContentEditor(null, new Panel(), null, new BlogPostHtmlEditorControl.BlogPostHtmlEditorSecurityManager(), new ContentEditorProxy.ContentEditorTemplateStrategy(), MshtmlOptions.DEFAULT_DLCTL); contentEditor.Dispose(); }
private static Element[] Elements(string html) { ArrayList elements = new ArrayList(); SimpleHtmlParser parser = new SimpleHtmlParser(html); Element el; while (null != (el = parser.Next())) { elements.Add(el); } return((Element[])elements.ToArray(typeof(Element))); }
public string ScanAndPreserve(string html) { StringBuilder sb = new StringBuilder(html.Length); SimpleHtmlParser p = new SimpleHtmlParser(html); Element e; while (null != (e = p.Next())) { if (!(e is BeginTag)) { sb.Append(html, e.Offset, e.Length); continue; } BeginTag bt = (BeginTag)e; if (bt.NameEquals("div")) { switch (bt.GetAttributeValue("class")) { case ContentSourceManager.EDITABLE_SMART_CONTENT: case ContentSourceManager.SMART_CONTENT: sb.Append(html, e.Offset, e.Length); sb.Append(p.CollectHtmlUntil("div")); sb.Append("</div>"); continue; } } if (!(bt.NameEquals("object") || bt.NameEquals("embed") || bt.NameEquals("noembed") || bt.NameEquals("script"))) { sb.Append(html, e.Offset, e.Length); continue; } else { string collected = p.CollectHtmlUntil(bt.Name); string preserve = bt.RawText + collected + "</" + bt.Name + ">"; string preserveId = Guid.NewGuid().ToString("N"); preserved[preserveId] = preserve; sb.AppendFormat("<span id=\"preserve{0}\" class=\"{1}\">", preserveId, PRESERVE_CLASS); sb.Append(preserve); sb.Append("</span>"); } } return(sb.ToString()); }
/// <summary> /// Clones active smart content contained in the provided HTML, and disables unknown smart content. /// </summary> public static string PrepareSmartContentHtmlForEditorInsertion(string html, IContentSourceSidebarContext sourceContext) { StringBuilder output = new StringBuilder(); ContentSourceManager.SmartContentPredicate predicate = new ContentSourceManager.SmartContentPredicate(); SimpleHtmlParser p = new SimpleHtmlParser(html); for (Element el; null != (el = p.Next());) { if (predicate.IsMatch(el)) { BeginTag bt = el as BeginTag; Attr idAttr = bt.GetAttribute("id"); String contentSourceId, contentItemId; ContentSourceManager.ParseContainingElementId(idAttr.Value, out contentSourceId, out contentItemId); ISmartContent smartContent = sourceContext.FindSmartContent(contentItemId); if (smartContent != null) { String newId = Guid.NewGuid().ToString(); sourceContext.CloneSmartContent(contentItemId, newId); if (RefreshableContentManager.ContentSourcesWithRefreshableContent.Contains(contentSourceId)) { IExtensionData extensionData = sourceContext.FindExtentsionData(newId); Debug.Assert(extensionData != null); // Since we just made a new id for the smart content just about to be inserted // we want to give it a chance to get a callback because its callback might have happened while // it was on the clipboard(in the event of cut). This means the refreshable content manager doesnt know // to watch out for this smart content on paste, it only knows to look out for who created it. Thus // we just force the callback, and if it didnt need it, nothing will happen. if (extensionData.RefreshCallBack == null) { extensionData.RefreshCallBack = DateTime.UtcNow; } } idAttr.Value = ContentSourceManager.MakeContainingElementId(contentSourceId, newId); } else { ContentSourceManager.RemoveSmartContentAttributes(bt); } } output.Append(el.ToString()); } return(output.ToString()); }
/// <summary> /// Namespaced tags come with Office 2007 clipboard data and result in weird /// namespace declarations being inserted as text into the DOM. (Bug 303784) /// </summary> private static string StripNamespacedTags(string html) { StringBuilder output = new StringBuilder(html.Length); SimpleHtmlParser parser = new SimpleHtmlParser(html); for (Element el; null != (el = parser.Next());) { if (el is Tag && ((Tag)el).Name.IndexOf(':') >= 0) { continue; } output.Append(el.RawText); } html = output.ToString(); return(html); }
/// <summary> /// Is the tag a meaningless tag such as <p></p> or <a href="..."></a> or <a href="..."> </a> /// </summary> /// <param name="htmlParser"></param> /// <param name="bt"></param> /// <returns></returns> private static bool RemoveMeaninglessTags(SimpleHtmlParser htmlParser, BeginTag bt) { // Look to see if the tag is a <p> without any attributes if ((bt.NameEquals("p") && bt.Attributes.Length == 0 && !bt.HasResidue)) { Element e = htmlParser.Peek(0); // Look to see if thereis a matching end tag to the element we are looking at if (e != null && e is EndTag && ((EndTag)e).NameEquals("p")) { // eat up the end tag htmlParser.Next(); return(true); } } // Look to see if the tag is an <a> without a style/id/name attribute, but has an href... meaning the link is not useful if ((bt.NameEquals("a") && bt.GetAttribute("name") == null && bt.GetAttributeValue("style") == null && bt.GetAttributeValue("id") == null && bt.GetAttributeValue("href") != null)) { bool hadWhiteSpaceText = false; Element e = htmlParser.Peek(0); // Look to see if the a just has whitespace inside of it if (e is Text && HtmlUtils.UnEscapeEntities(e.RawText, HtmlUtils.UnEscapeMode.NonMarkupText).Trim().Length == 0) { e = htmlParser.Peek(1); hadWhiteSpaceText = true; } // Look to see if thereis a matching end tag to the element we are looking at if (e != null && e is EndTag && ((EndTag)e).NameEquals("a")) { // if this was an <a> with whitespace in the middle eat it up if (hadWhiteSpaceText) { htmlParser.Next(); } // eat up the end tag htmlParser.Next(); return(true); } } return(false); }
public static string ConvertNewLinesToBr(string html) { SimpleHtmlParser parser = new SimpleHtmlParser(html); StringBuilder sb = new StringBuilder(); Element ele = parser.Next(); while (ele != null) { if (ele is Text) { sb.Append(ele.RawText.Replace("\r\n", "<br/>")); } else { sb.Append(ele.RawText); } ele = parser.Next(); } return(sb.ToString()); }
public List <string> ExtractLinksHtmlParserSharp() { stream.Seek(0, SeekOrigin.Begin); var links = new List <string>(); var simpleHtmlparser = new SimpleHtmlParser(); var document = simpleHtmlparser.Parse(new StreamReader(stream)); var memoryStream = new MemoryStream(); document.Save(memoryStream); memoryStream.Seek(0, SeekOrigin.Begin); var reader = XmlReader.Create(memoryStream, new XmlReaderSettings { DtdProcessing = DtdProcessing.Parse }); while (reader.Read()) { if (reader.NodeType != XmlNodeType.Element) { continue; } if (reader.Name != "a") { continue; } var hrefAttributeValue = reader.GetAttribute("href"); if (hrefAttributeValue == null) { continue; } links.Add(hrefAttributeValue); } return(links); }
public static bool ContainsUnbalancedDivs(string html) { int tags = 0; SimpleHtmlParser p = new SimpleHtmlParser(html); for (Element e; (e = p.Next()) != null;) { if (e is Tag && ((Tag)e).NameEquals("div")) { if (e is BeginTag) { ++tags; } else { --tags; } } } return(tags != 0); }
private static string BalanceHtml(string html) { StringBuilder sb = new StringBuilder(html.Length + 10); SimpleHtmlParser parser = new SimpleHtmlParser(html); Element el; while (null != (el = parser.Next())) { if (el is BeginTag) { BeginTag bt = (BeginTag)el; if (!ElementFilters.RequiresEndTag(bt.Name)) { bt.Complete = true; } } sb.Append(el.ToString()); } return(sb.ToString()); }
private static void EditorContext_PerformTemporaryFixupsToEditedHtml(TemporaryFixupArgs args) { string html = args.Html; if (html.Contains("table")) { StringBuilder output = new StringBuilder(html.Length); SimpleHtmlParser parser = new SimpleHtmlParser(html); for (Element el; null != (el = parser.Next());) { output.Append(html, el.Offset, el.Length); if (el is BeginTag && ((BeginTag)el).NameEquals("td")) { Element e = parser.Peek(0); if (e is EndTag && ((EndTag)e).NameEquals("td")) { output.Append(" "); } } } args.Html = output.ToString(); } }
public static void SetInnerHtml(this XElement element, string html) { if (element == null) { throw new ArgumentNullException("element"); } if (html == null) { throw new ArgumentNullException("html"); } element.RemoveAll(); var parser = new SimpleHtmlParser(); var nodes = parser.ParseFragment(new StringReader(html), String.Empty); foreach (var node in nodes) { element.Add(node); } }
public FormFactory(string html) { parser = new SimpleHtmlParser(html); }
/// <summary> /// Balances the HTML and safely truncates it, using a custom algorithm /// to determine how much each character/string counts against maxCost. /// </summary> public static string Balance(string html, int maxCost, HTMLBalancerCostFilter costFilter, bool ellipsis) { bool appendEllipsis = false; SimpleHtmlParser parser = new SimpleHtmlParser(html); ArrayList openTags = new ArrayList(); StringBuilder output = new StringBuilder(); long balance = 0; // long to make sure that int32.MaxValue does not cause overflow if (costFilter == null) { costFilter = new DefaultCostFilter(); } Element el; while (null != (el = parser.Next())) { if (el is StyleElement || el is ScriptElement || el is Comment || el is MarkupDirective) { continue; } long lenLeft = Math.Max(0, maxCost - balance - LengthToClose(costFilter, openTags)); if (el is Tag) { if (el is BeginTag && ((BeginTag)el).Unterminated) { continue; // skip corrupted tags } if (TagCost(costFilter, openTags, (Tag)el) > lenLeft) { break; // don't use this tag; we're done } else { RegisterTag(openTags, (Tag)el); output.Append(el.ToString()); balance += costFilter.ElementCost(el); } } else if (el is Text) { if (costFilter.ElementCost(el) > lenLeft) { // shrink down the text to fit output.Append(costFilter.TruncateText((Text)el, (int)lenLeft)); appendEllipsis = true; break; } else { // plenty of room output.Append(el.ToString()); balance += costFilter.ElementCost(el); } //update the text end index } else { if (costFilter.ElementCost(el) > lenLeft) { break; } else { output.Append(el.ToString()); balance += costFilter.ElementCost(el); } } } // Append an ellipsis if we truncated text // We use "..." here rather than TextHelper.Ellipsis, because some mail clients don't understand "\u2026". if (ellipsis && appendEllipsis) { output.Append("..."); } for (int i = openTags.Count - 1; i >= 0; i--) { output.Append(MakeEndTag((string)openTags[i])); } return(output.ToString()); }
public void Parse() { SimpleHtmlParser parser = new SimpleHtmlParser(_html); OnDocumentBegin(); while (true) { Element currentElement = parser.Next(); BeginTag beginTag = currentElement as BeginTag; if (beginTag != null) { OnBeginTag(beginTag); continue; } EndTag endTag = currentElement as EndTag; if (endTag != null) { OnEndTag(endTag); continue; } ScriptLiteral literal = currentElement as ScriptLiteral; if (literal != null) { OnScriptLiteral(literal); continue; } Comment comment = currentElement as Comment; if (comment != null) { OnComment(comment); continue; } MarkupDirective markupDirective = currentElement as MarkupDirective; if (markupDirective != null) { OnMarkupDirective(markupDirective); continue; } ScriptText scriptText = currentElement as ScriptText; if (scriptText != null) { OnScriptText(scriptText); continue; } ScriptComment scriptComment = currentElement as ScriptComment; if (scriptComment != null) { OnScriptComment(scriptComment); continue; } StyleText styleText = currentElement as StyleText; if (styleText != null) { OnStyleText(styleText); continue; } StyleUrl styleUrl = currentElement as StyleUrl; if (styleUrl != null) { OnStyleUrl(styleUrl); continue; } StyleImport styleImport = currentElement as StyleImport; if (styleImport != null) { OnStyleImport(styleImport); continue; } StyleComment styleComment = currentElement as StyleComment; if (styleComment != null) { OnStyleComment(styleComment); continue; } StyleLiteral styleLiteral = currentElement as StyleLiteral; if (styleLiteral != null) { OnStyleLiteral(styleLiteral); continue; } Text text = currentElement as Text; if (text != null) { OnText(text); continue; } if (currentElement == null) { OnDocumentEnd(); return; } Debug.Fail("Unrecognized element in LightWeightHTMLDocumentIterator"); } }
private string ThinInternal(string html, bool preserveImages, bool strict, params ModifyReplacement[] modifyReplacements) { Hashtable replacements = _tagSpecs; if (strict) { replacements = _tagSpecsStrict; } if (modifyReplacements != null) { replacements = (Hashtable)replacements.Clone(); foreach (ModifyReplacement modifyReplacement in modifyReplacements) { modifyReplacement(replacements); } } // Will hold the results of the leading whitespace buffer. // This buffer may or may not make it into the final result, // depending on whether any block-level tags are present. StringBuilder leadingOutput = new StringBuilder(10); // Will hold the results of everything else. StringBuilder mainOutput = new StringBuilder(html.Length); // references whichever output buffer is current. StringBuilder output = leadingOutput; SimpleHtmlParser parser = new SimpleHtmlParser(html); Element el; bool preserveWhitespace = false; // <pre> blocks should preserve whitespace WhitespaceBuffer whitespaceBuffer = new WhitespaceBuffer(); whitespaceBuffer.Promote(WhitespaceClass.Paragraph); // Insert an implicit <p> unless the first non-whitespace element is a block bool hasBlock = false; while (null != (el = parser.Next())) { if (el is Tag) { Tag t = (Tag)el; string lowerName = t.Name.ToLower(CultureInfo.InvariantCulture); TagDesc desc = (TagDesc)replacements[lowerName]; // if this tag is not in the table, drop it if (desc == null) { continue; } // Replace tag with substitute tag if necessary (e.g. <DIV> becomes <P>) string tagName = desc.Substitute; if (tagName == null) { tagName = lowerName; } // special case for images if (!preserveImages && tagName == TAG_IMG) { continue; } bool beginTag = el is BeginTag; ElementClass elClass = WhitespaceBuffer.ClassifyTag(tagName, desc.TagType); hasBlock |= (elClass == ElementClass.Block || elClass == ElementClass.Paragraph || elClass == ElementClass.Break); if (!preserveWhitespace && WhitespaceBuffer.ProcessElementClass(ref whitespaceBuffer, output, elClass, true)) { continue; } output = mainOutput; if (beginTag) { WriteBeginTag(desc, tagName, ((BeginTag)el).Attributes, output); if (tagName == TAG_PRE) { preserveWhitespace = true; } } else if (el is EndTag) { if (!((EndTag)el).Implicit && desc.TagType != TagType.Empty) { output.Append(string.Format(CultureInfo.InvariantCulture, "</{0}>", tagName)); } if (tagName == TAG_PRE) { preserveWhitespace = false; } } } else if (el is Text) { string text = el.RawText; text = HtmlUtils.EscapeEntities(HtmlUtils.UnEscapeEntities(text, HtmlUtils.UnEscapeMode.NonMarkupText)); if (!preserveWhitespace && WhitespaceBuffer.ProcessElementClass(ref whitespaceBuffer, output, WhitespaceBuffer.ClassifyText(text), false)) { continue; } output = mainOutput; output.Append(text); } } if (hasBlock && ReferenceEquals(mainOutput, output)) { output.Insert(0, leadingOutput.ToString()); } // The whitespace buffer may not be empty at this point. That's OK--we want to drop trailing whitespace return(output.ToString()); }
public void Init() { _htmlParser = new SimpleHtmlParser(); }
public void SvgHang() { var parser = new SimpleHtmlParser(); parser.Parse(new StringReader("<svg x=y/>")); }
public HtmlTextSource(SimpleHtmlParser parser) { this._parser = parser; }
/// <summary> /// Walks the current contents to find smart content areas. When one is found, it calls the operation on the smart content. The operation has a chance /// to return new content. If the content is non-null it will replace the current content. /// </summary> /// <param name="contents">the raw HTML string whose structured blocks will be replaced.</param> /// <param name="operation">Delegate for generating replacement content.</param> /// <param name="editMode">If true, then the element's stylename will be activated for editing</param> /// <param name="continueOnError"> /// true - if the plugin throws an exception, it keeps crawling the DOM /// false - if a plugin throws an exception, it stops processing the DOM and return empty string /// null - if a plugin throws an exception, this function will rethrow it /// </param /// <returns>the contents with structured blocks replaced.</returns> internal static string PerformOperation(string contents, SmartContentOperation operation, bool editMode, IContentSourceSidebarContext sourceContext, bool?continueOnError) { //replace all structured content blocks with their editor HTML //string html = PostBodyPreprocessor.Preprocess(contents); StringBuilder sb = new StringBuilder(); SimpleHtmlParser parser = new SimpleHtmlParser(contents); for (Element e = parser.Next(); e != null; e = parser.Next()) { if (e is BeginTag) { BeginTag beginTag = (BeginTag)e; string elementClassName = beginTag.GetAttributeValue("class"); if (ContentSourceManager.IsSmartContentClass(elementClassName)) { ISmartContent sContent = null; try { string contentSourceId, contentItemId; string blockId = beginTag.GetAttributeValue("id"); if (blockId != null) { ContentSourceManager.ParseContainingElementId(blockId, out contentSourceId, out contentItemId); ContentSourceInfo contentSource = sourceContext.FindContentSource(contentSourceId); if (contentSource != null && contentSource.Instance is SmartContentSource) { SmartContentSource sSource = (SmartContentSource)contentSource.Instance; sContent = sourceContext.FindSmartContent(contentItemId); if (sContent != null) { //write the div with the appropriate className string newClassName = editMode ? ContentSourceManager.EDITABLE_SMART_CONTENT : ContentSourceManager.SMART_CONTENT; beginTag.GetAttribute("class").Value = newClassName; //replace the inner HTML of the div with the source's editor HTML string content = parser.CollectHtmlUntil("div"); sb.Append(e.ToString()); operation(sourceContext, sSource, sContent, ref content); sb.Append(content); sb.Append("</div>"); continue; } } } } catch (Exception ex) { Trace.WriteLine(String.Format(CultureInfo.InvariantCulture, "Error loading smart content item\r\n{0}", ex)); sContent = null; if (continueOnError == null) { throw; } if (!continueOnError.Value) { return(String.Empty); } } if (sContent == null) { //this element references an unknown smart content, so it should not be editable Attr classAttr = beginTag.GetAttribute("class"); classAttr.Value = ContentSourceManager.SMART_CONTENT; } } } sb.Append(e.ToString()); } return(sb.ToString()); }
public async Task <Rental[]> GetRentals(int page) { bool needsAuth = false; for (int i = 0; i < 4; i++) { try { if (needsAuth) { if (await LoginToHubway().ConfigureAwait(false)) { needsAuth = false; } else { continue; } } if (string.IsNullOrEmpty(credentials.UserId)) { credentials.UserId = await GetHubwayUserId().ConfigureAwait(false); if (string.IsNullOrEmpty(credentials.UserId)) { needsAuth = true; continue; } } var rentalsUrl = HubwayRentalsUrl + credentials.UserId; if (page > 0) { rentalsUrl += "?pageNumber=" + page; } var answer = await Client.GetStringAsync(rentalsUrl).ConfigureAwait(false); var parser = new SimpleHtmlParser(); var doc = parser.ParseString(answer); var div = doc.GetElementsByTagName("section") .OfType <XmlElement> () .First(s => s.GetAttribute("class") == "ed-profile-page__content"); var rows = div.GetElementsByTagName("div") .OfType <XmlElement> () .Where(n => n.ContainsClass("ed-table__item_trip")); return(rows.Select(row => { var cells = row.GetElementsByTagName("div").OfType <XmlElement> ().ToList(); /* 0 <div> * 1 <div>time start * 2 <div>station start * </div> * 3 <div> * 4 <div>time end * 5 <div>station end * </div> * 6 <div>duration * 7 <div>billed */ var rental = new Rental { FromStationName = cells[2].InnerText.Trim(), ToStationName = cells[5].InnerText.Trim(), Duration = ParseRentalDuration(cells[6].InnerText.Trim()), Price = ParseRentalPrice(cells[7].InnerText.Trim()), DepartureTime = DateTime.Parse(cells[1].InnerText, System.Globalization.CultureInfo.InvariantCulture), ArrivalTime = DateTime.Parse(cells[4].InnerText, System.Globalization.CultureInfo.InvariantCulture) }; rental.Id = ((long)rental.DepartureTime.GetHashCode()) << 32; rental.Id |= (uint)rental.ArrivalTime.GetHashCode(); return rental; }).ToArray()); } catch (HttpRequestException htmlException) { // Super hacky but oh well if (!needsAuth) { needsAuth = htmlException.Message.Contains("302"); } continue; } catch (Exception e) { AnalyticsHelper.LogException("RentalsGenericError", e); Log.Error("RentalsGenericError", e.ToString()); break; } } return(null); }
/// <summary> /// Converts tag names, attribute names, and style text to lowercase. /// </summary> private string CleanupHtml(string html, bool xml) { bool needsCleanup; do { needsCleanup = false; StringBuilder output = new StringBuilder(html.Length); SimpleHtmlParser htmlParser = new SimpleHtmlParser(html); for (Element el; null != (el = htmlParser.Next());) { if (el is BeginTag) { BeginTag bt = (BeginTag)el; if (RemoveMeaninglessTags(htmlParser, bt)) { // Since we are removing a tag, we will want to clean up again, since that might mean // there will be another tag to remove needsCleanup = true; continue; } output.Append("<"); output.Append(bt.Name.ToLower(CultureInfo.InvariantCulture)); foreach (Attr attr in bt.Attributes) { if (attr.NameEquals("contenteditable") || attr.NameEquals("atomicselection") || attr.NameEquals("unselectable")) { continue; } output.Append(" "); output.Append(attr.Name.ToLower(CultureInfo.InvariantCulture)); if (attr.Value != null) { string attrVal = attr.Value; if (attr.NameEquals("style")) { attrVal = LowerCaseCss(attrVal); } else if (attr.Name == attr.Value) { attrVal = attrVal.ToLower(CultureInfo.InvariantCulture); } output.AppendFormat("=\"{0}\"", xml ? HtmlUtils.EscapeEntitiesForXml(attrVal, true) : HtmlUtils.EscapeEntities(attrVal)); } } if (bt.HasResidue) { if (bt.Attributes.Length == 0) { output.Append(" "); } output.Append(bt.Residue); } if (bt.Complete) { output.Append(" /"); } output.Append(">"); } else if (el is EndTag) { output.AppendFormat("</{0}>", ((EndTag)el).Name.ToLower(CultureInfo.InvariantCulture)); } else if (el is Text) { string textHtml = HtmlUtils.TidyNbsps(el.RawText); if (xml) { textHtml = HtmlUtils.EscapeEntitiesForXml( HtmlUtils.UnEscapeEntities(textHtml, HtmlUtils.UnEscapeMode.NonMarkupText), false); } output.Append(textHtml); } else if (el is StyleText) { output.Append(el.RawText.ToLower(CultureInfo.InvariantCulture)); } else { output.Append(el.RawText); } } html = output.ToString(); } while (needsCleanup); return(html); }