public void Save() { var body = doc.DocumentNode.SelectSingleNode("//body"); body.InnerHtml = ""; currentUnit.Write(body); var stream = File.CreateText(fileList[currentUnitName]); doc.Save(stream); }
public static string Inject(string htmlText) { //https://stackoverflow.com/questions/9520932/how-do-i-use-html-agility-pack-to-edit-an-html-snippet var doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(htmlText); foreach (var node in doc.DocumentNode.Descendants().Where(n => n.NodeType == HtmlAgilityPack.HtmlNodeType.Element)) { var classAttr = node.Attributes["class"]; if (classAttr != null) { classAttr.Value = $"{classAttr.Value} {cssClassMarker}"; } else { node.SetAttributeValue("class", cssClassMarker); } node.Attributes.Add("lineNumber", node.Line.ToString()); node.Attributes.Add("column", node.LinePosition.ToString()); } using (StringWriter writer = new StringWriter()) { doc.Save(writer); var result = writer.ToString(); return(result); } }
public string FormatArticleContent(string content) { //var baseUri = new Uri(publicSiteUrl); var baseUri = new Uri(_appSettings.Value.PublicSiteBaseUrl); var tags = new[] { "img", "iframe", "video", "audio" }; HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(content); foreach (var node in doc.DocumentNode.Descendants().Where(n => tags.Contains(n.Name))) { if (node.HasAttributes) { var attr = node.Attributes["src"]; if (attr != null) { attr.Value = GetAbsoluteUrl(baseUri, attr.Value); } } } using (var writer = new StringWriter()) { doc.Save(writer); var formattedContent = writer.ToString(); return(formattedContent); } }
public void Process(Crawler crawler, PropertyBag propertyBag) { if (propertyBag["HtmlDoc"].Value == null) { return; } string gender = "gender"; string age_min = "age_min"; string age_max = "age_max"; string start = "start"; gender += "=" + CrawlUtil.getQueryValueFromUrl(propertyBag.ResponseUri.AbsoluteUri, gender); age_min += "=" + CrawlUtil.getQueryValueFromUrl(propertyBag.ResponseUri.AbsoluteUri, age_min); age_max += "=" + CrawlUtil.getQueryValueFromUrl(propertyBag.ResponseUri.AbsoluteUri, age_max); start += "=" + CrawlUtil.getQueryValueFromUrl(propertyBag.ResponseUri.AbsoluteUri, start); ReviewCrawler.MainForm.appendLineToLog(propertyBag.Title); ReviewCrawler.MainForm.appendLineToLog(gender); ReviewCrawler.MainForm.appendLineToLog(age_min); ReviewCrawler.MainForm.appendLineToLog(age_max); ReviewCrawler.MainForm.appendLineToLog(start); HtmlAgilityPack.HtmlDocument doc = propertyBag["HtmlDoc"].Value as HtmlAgilityPack.HtmlDocument; doc.Save("HtmlDump/" + CrawlUtil.SanitiseFileName(CrawlUtil.getMovieNameFromTitle(HttpUtility.HtmlDecode(propertyBag.Title))) + "#" + gender + "#" + age_min + "#" + age_max + "#" + start + ".html"); }
public static string RelativeToAbsoluteUrls(string html, string siteUrl) { StringWriter writer = new StringWriter(); string baseUrl = siteUrl; HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); foreach (var link in doc.DocumentNode.Descendants("link")) { link.Attributes["href"].Value = new Uri(new Uri(baseUrl), link.Attributes["href"].Value).AbsoluteUri; } foreach (var img in doc.DocumentNode.Descendants("img")) { img.Attributes["src"].Value = new Uri(new Uri(baseUrl), img.Attributes["src"].Value).AbsoluteUri; } foreach (var a in doc.DocumentNode.Descendants("a")) { a.Attributes["href"].Value = new Uri(new Uri(baseUrl), a.Attributes["href"].Value).AbsoluteUri; } doc.Save(writer); string newHtml = writer.ToString(); return(newHtml); }
private void frmSettings_Load(object sender, EventArgs e) { Thread.CurrentThread.CurrentUICulture = new CultureInfo("en"); var svgPath = @"C:\Users\Pc\Documents\TBI Chart.svg"; HtmlAgilityPack.HtmlDocument svg = new HtmlAgilityPack.HtmlDocument(); svg.Load(svgPath); var text = svg.DocumentNode.Descendants("text"); //var rect = svg.DocumentNode.Descendants("rect"); var max = 66.2; var basic = 24.2; var good = 45.2; double height = svg.GetElementbyId("bar_current").GetAttributeValue("height", 0); double top = svg.GetElementbyId("bar_current").GetAttributeValue("y", 0); text.ElementAt(6).InnerHtml = text.ElementAt(0).InnerHtml = $"{max}".ToNumber(); text.ElementAt(5).InnerHtml = text.ElementAt(1).InnerHtml = $"{max/2}".ToNumber(); svg.GetElementbyId("bar_basic_as").SetAttributeValue("height", $"{basic * height / max}".ToNumber()); svg.GetElementbyId("bar_basic_as").SetAttributeValue("y", $"{(top + height) - (basic * height / max)}".ToNumber()); svg.GetElementbyId("bar_good_as").SetAttributeValue("height", $"{good * height / max}".ToNumber()); svg.GetElementbyId("bar_good_as").SetAttributeValue("y", $"{(top + height) - (good * height / max)}".ToNumber()); var savePath = $@"C:\Users\Pc\Documents\temp\{Guid.NewGuid()}.svg"; svg.Save(savePath); webBrowser1.Navigate(svgPath); }
/// <summary> /// See interface docs. /// </summary> /// <param name="html"></param> /// <param name="encoding"></param> public string Html(string html, Encoding encoding) { var result = html; var document = new HtmlAgilityPack.HtmlDocument() { OptionCheckSyntax = false, OptionDefaultStreamEncoding = encoding, }; document.LoadHtml(html); if (Html(document)) { using (var stream = new MemoryStream()) { document.Save(stream); stream.Position = 0; using (var streamReader = new StreamReader(stream, encoding, true)) { result = streamReader.ReadToEnd(); } } } return(result); }
private static String ParseSaveAndFixImages(string contents, string dirPath) { contents = System.Web.HttpUtility.HtmlDecode(contents); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.Load(new StringReader(contents)); var nav = doc.CreateNavigator(); var strExpression = "//img"; HtmlAgilityPack.HtmlNodeCollection imgTags = doc.DocumentNode.SelectNodes(strExpression); if (imgTags != null) { foreach (HtmlAgilityPack.HtmlNode tag in imgTags) { if (tag.Attributes["src"] != null) { String imgPath = tag.Attributes["src"].Value; tag.Attributes["src"].Value = GetAndSaveImage(imgPath, dirPath); } } } string finalContents = null; using (StringWriter sw = new StringWriter()) { doc.Save(sw); finalContents = sw.ToString(); } return(finalContents); }
List<Show> Grab(GrabParametersBase p) { var shows = new List<Show>(); try { var param = (GrabParameters)p; var wr = WebRequest.Create(string.Format(urlFormat, (int)param.ChannelId)); _logger.WriteEntry(string.Format("Grabbing Channel {0} ...", param.ChannelId), LogType.Info); var res = (HttpWebResponse)wr.GetResponse(); var doc = new HtmlAgilityPack.HtmlDocument(); doc.Load(res.GetResponseStream()); doc.OptionOutputAsXml = true; var writer = new StringWriter(); doc.Save(writer); var xml = XDocument.Load(new StringReader(writer.ToString())); FillShows(xml, shows); for (int i = shows.Count - 1; i >= 0; i--) { var show = shows[i]; show.Channel = param.ChannelId.ToString(); if (i == shows.Count - 1) show.EndTime = show.StartTime.AddHours(12);// usually 3-4 days from now , not that important else show.EndTime = shows[i + 1].StartTime; } } catch (Exception ex) { _logger.WriteEntry(ex.Message, LogType.Error); } _logger.WriteEntry(string.Format("Found {0} Shows", shows.Count), LogType.Info); return shows; }
/// <summary> /// Update <img src> url /// </summary> private async Task <string> SetImgUrls(string htmlBody, string localGetImgRelativeURI) { await Task.CompletedTask; var htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.LoadHtml(htmlBody); var imgItems = htmlDoc.DocumentNode.SelectNodes("//img")?.ToList(); if (imgItems != null) { foreach (var imgItem in imgItems) { string urlSrc = imgItem.Attributes["src"].Value; if (urlSrc != null) { if (!urlSrc.ToLower().StartsWith("http")) { string newImgSrc = localGetImgRelativeURI + Convert.ToBase64String(Encoding.UTF8.GetBytes(urlSrc)); imgItem.Attributes["src"].Value = newImgSrc; } } } } var sw = new StringWriter(); htmlDoc.Save(sw); return(sw.ToString()); }
/// <summary> /// Load HTML DOM from given <paramref name="stream"/>. /// </summary> private bool loadHTML(Context ctx, TextReader stream, string filename, int options = 0) { HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); // setup HTML parser htmlDoc.OptionOutputAsXml = true; //htmlDoc.OptionOutputOriginalCase = true; // NOTE: we need lower-cased names because of XPath queries //htmlDoc.OptionFixNestedTags = true; htmlDoc.OptionCheckSyntax = false; htmlDoc.OptionUseIdAttribute = false; // only needed when XPath navigator is used on htmlDoc htmlDoc.OptionWriteEmptyNodes = true; // load HTML (from string or a stream) htmlDoc.Load(stream); CheckHtmlErrors(ctx, htmlDoc, filename); // save to string as XML using (var sw = new StringWriter()) { htmlDoc.Save(sw); // load as XML return(loadXMLInternal(ctx, sw.ToString(), 0, true)); } }
private static void TranformHtml(IDocumentBuildContext context, string transformed, string relativeModelPath, StreamWriter outputWriter) { // Update HREF and XREF HtmlAgilityPack.HtmlDocument html = new HtmlAgilityPack.HtmlDocument(); html.LoadHtml(transformed); var xrefLinkNodes = html.DocumentNode.SelectNodes("//a[starts-with(@href, 'xref:')]"); if (xrefLinkNodes != null) { foreach (var xref in xrefLinkNodes) { TransformXrefLink(xref, context); } } var xrefExceptions = new List <CrossReferenceNotResolvedException>(); var xrefNodes = html.DocumentNode.SelectNodes("//xref/@href"); if (xrefNodes != null) { foreach (var xref in xrefNodes) { try { UpdateXref(xref, context, Constants.DefaultLanguage); } catch (CrossReferenceNotResolvedException e) { xrefExceptions.Add(e); } } } var srcNodes = html.DocumentNode.SelectNodes("//*/@src"); if (srcNodes != null) { foreach (var link in srcNodes) { UpdateHref(link, "src", context, relativeModelPath); } } var hrefNodes = html.DocumentNode.SelectNodes("//*/@href"); if (hrefNodes != null) { foreach (var link in hrefNodes) { UpdateHref(link, "href", context, relativeModelPath); } } html.Save(outputWriter); if (xrefExceptions.Count > 0) { throw new AggregateException(xrefExceptions); } }
public static string GetOutputHTML(this HtmlAgilityPack.HtmlDocument source) { StringWriter stream = new StringWriter(); source.Save(stream); return(stream.ToString()); }
/// <summary> /// Load HTML DOM from given <paramref name="stream"/>. /// </summary> private object loadHTML(TextReader stream, string filename) { HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); // setup HTML parser htmlDoc.OptionOutputAsXml = true; //htmlDoc.OptionOutputOriginalCase = true; // NOTE: we need lower-cased names because of XPath queries //htmlDoc.OptionFixNestedTags = true; htmlDoc.OptionCheckSyntax = false; htmlDoc.OptionWriteEmptyNodes = true; // load HTML (from string or a stream) htmlDoc.Load(stream); CheckHtmlErrors(htmlDoc, filename); // save to string as XML using (StringWriter sw = new StringWriter()) { htmlDoc.Save(sw); // load as XML return(this.loadXMLInternal(sw.ToString(), 0, true)); } }
/// <summary> /// See interface docs. /// </summary> /// <param name="environment"></param> /// <param name="textContent"></param> public void ManipulateTextResponse(IDictionary <string, object> environment, TextContent textContent) { var context = OwinContext.Create(environment); var pathAndFile = context.RequestPathFlattened; var allInjectors = _HtmlContentInjectors; var injectors = allInjectors.Where(r => !String.IsNullOrEmpty(r.Element) && r.Content != null && (r.PathAndFile == null || r.PathAndFile.Equals(pathAndFile, StringComparison.OrdinalIgnoreCase)) ).ToArray(); if (injectors.Length > 0) { var document = new HtmlAgilityPack.HtmlDocument() { OptionCheckSyntax = false, OptionDefaultStreamEncoding = textContent.Encoding, }; document.LoadHtml(textContent.Content); var modified = false; foreach (var injector in injectors.OrderByDescending(r => r.Priority)) { var elements = document.DocumentNode.Descendants(injector.Element); var element = injector.AtStart ? elements.FirstOrDefault() : elements.LastOrDefault(); var content = element == null ? null : injector.Content(); if (element != null && !String.IsNullOrEmpty(content)) { var subDocument = new HtmlAgilityPack.HtmlDocument() { OptionCheckSyntax = false, }; subDocument.LoadHtml(injector.Content()); if (injector.AtStart) { element.PrependChild(subDocument.DocumentNode); } else { element.AppendChild(subDocument.DocumentNode); } modified = true; } } if (modified) { using (var stream = new MemoryStream()) { document.Save(stream); stream.Position = 0; using (var streamReader = new StreamReader(stream, textContent.Encoding, true)) { textContent.Content = streamReader.ReadToEnd(); } } } } }
public static string ToHTML(this HtmlAgilityPack.HtmlDocument Document) { using (var Stream = new MemoryStream()) { Document.Save(Stream); return(Document.Encoding.GetString(Stream.ToArray())); } }
private static void TranformHtml(IDocumentBuildContext context, string transformed, string relativeModelPath, string outputPath) { // Update HREF and XREF HtmlAgilityPack.HtmlDocument html = new HtmlAgilityPack.HtmlDocument(); html.LoadHtml(transformed); var xrefExceptions = new List <CrossReferenceNotResolvedException>(); var xrefNodes = html.DocumentNode.SelectNodes("//xref/@href"); if (xrefNodes != null) { foreach (var xref in xrefNodes) { try { UpdateXref(xref, context, Language); } catch (CrossReferenceNotResolvedException e) { xrefExceptions.Add(e); } } } var srcNodes = html.DocumentNode.SelectNodes("//*/@src"); if (srcNodes != null) { foreach (var link in srcNodes) { UpdateHref(link, "src", context, relativeModelPath); } } var hrefNodes = html.DocumentNode.SelectNodes("//*/@href"); if (hrefNodes != null) { foreach (var link in hrefNodes) { UpdateHref(link, "href", context, relativeModelPath); } } // Save with extension changed var subDirectory = Path.GetDirectoryName(outputPath); if (!string.IsNullOrEmpty(subDirectory) && !Directory.Exists(subDirectory)) { Directory.CreateDirectory(subDirectory); } html.Save(outputPath, Encoding.UTF8); if (xrefExceptions.Count > 0) { throw new AggregateException(xrefExceptions); } }
static FineAntsCore.Statement ConvertHSBCHTMLFileToFineAnts(FileInfo fileInfo) { HtmlAgilityPack.HtmlDocument brokenDocument = new HtmlAgilityPack.HtmlDocument(); brokenDocument.Load(fileInfo.FullName); brokenDocument.OptionOutputAsXml = true; string fixedXmlFileName = fileInfo.FullName + ".fixed.xml"; brokenDocument.Save(fixedXmlFileName); XmlDocument document = new XmlDocument(); document.Load(fixedXmlFileName); XmlNamespaceManager namespaceManager = new XmlNamespaceManager(document.NameTable); namespaceManager.AddNamespace("d", "http://www.w3.org/1999/xhtml"); XmlNode closingBalanceNode = document.SelectSingleNode("/span/d:html/d:body/d:div[@id='top']/d:div[@id='innerPage']/d:div[@id='wrapper']/d:div[@id='main']/d:div[@id='content']/d:div[@class='containerMain']/d:div[@class='hsbcMainContent hsbcCol']/d:div[@class='extContentHighlightPib hsbcCol']/d:table/d:tbody/d:tr[last()]/d:td[6]/d:p", namespaceManager); XmlNode closingBalanceSignNode = document.SelectSingleNode("/span/d:html/d:body/d:div[@id='top']/d:div[@id='innerPage']/d:div[@id='wrapper']/d:div[@id='main']/d:div[@id='content']/d:div[@class='containerMain']/d:div[@class='hsbcMainContent hsbcCol']/d:div[@class='extContentHighlightPib hsbcCol']/d:table/d:tbody/d:tr[last()]/d:td[7]/d:p", namespaceManager); int closingBalance = moneyInPenceFromString(closingBalanceNode.InnerText.Trim()); if (closingBalanceSignNode.InnerText.Trim() == "D") closingBalance = -closingBalance; XmlNode endDateNode = document.SelectSingleNode("/span/d:html/d:body/d:div[@id='top']/d:div[@id='innerPage']/d:div[@id='wrapper']/d:div[@id='main']/d:div[@id='content']/d:div[@class='containerMain']/d:div[@class='hsbcMainContent hsbcCol']/d:div[@class='extContentHighlightPib hsbcCol']/d:div[@class='extPibRow hsbcRow']/d:div[@class='hsbcPadding']/d:div[@class='hsbcTextRight']", namespaceManager); string endDateString = HtmlAgilityPack.HtmlEntity.DeEntitize(endDateNode.InnerText).Trim(); System.Globalization.CultureInfo provider = System.Globalization.CultureInfo.InvariantCulture; DateTime endDate = DateTime.ParseExact(endDateString, "dd MMM yyyy", provider); XmlNode startDateNode = document.SelectSingleNode("/span/d:html/d:body/d:div[@id='top']/d:div[@id='innerPage']/d:div[@id='wrapper']/d:div[@id='main']/d:div[@id='content']/d:div[@class='containerMain']/d:div[@class='hsbcMainContent hsbcCol']/d:div[@class='extContentHighlightPib hsbcCol']/d:table/d:tbody/d:tr[1]/d:td[1]/d:p", namespaceManager); string startDateString = HtmlAgilityPack.HtmlEntity.DeEntitize(startDateNode.InnerText).Trim(); DateTime startDate = dateFromDateStringFixedUsingUpperBoundDate(startDateString, endDate.AddDays(-1)).AddDays(1); List<FineAntsCore.Transaction> transactions = new List<FineAntsCore.Transaction>(); XmlNodeList transactionNodes = document.SelectNodes("/span/d:html/d:body/d:div[@id='top']/d:div[@id='innerPage']/d:div[@id='wrapper']/d:div[@id='main']/d:div[@id='content']/d:div[@class='containerMain']/d:div[@class='hsbcMainContent hsbcCol']/d:div[@class='extContentHighlightPib hsbcCol']/d:table/d:tbody/d:tr[position()>1 and position()<last()]", namespaceManager); foreach (XmlNode node in transactionNodes) { XmlNode dateNode = node.SelectSingleNode("d:td[1]/d:p", namespaceManager); XmlNode typeNode = node.SelectSingleNode("d:td[2]/d:p", namespaceManager); XmlNode nameNode = node.SelectSingleNode("d:td[3]/d:p", namespaceManager); XmlNode moneyOutNode = node.SelectSingleNode("d:td[4]/d:p", namespaceManager); XmlNode moneyInNode = node.SelectSingleNode("d:td[5]/d:p", namespaceManager); string date = HtmlAgilityPack.HtmlEntity.DeEntitize(dateNode.InnerText).Trim(); string name = HtmlAgilityPack.HtmlEntity.DeEntitize(getInnerTextIgnoringLinks(nameNode)); string moneyIn = HtmlAgilityPack.HtmlEntity.DeEntitize(moneyInNode.InnerText).Trim(); string moneyOut = HtmlAgilityPack.HtmlEntity.DeEntitize(moneyOutNode.InnerText).Trim(); int money = moneyIn == "" ? -moneyInPenceFromString(moneyOut) : moneyInPenceFromString(moneyIn); transactions.Add(new FineAntsCore.Transaction(money, dateFromDateStringFixedUsingUpperBoundDate(date, endDate), name, "")); } // remove the temporary fixed file System.IO.File.Delete(fixedXmlFileName); FineAntsCore.Statement statement = new FineAntsCore.Statement(transactions, startDate, endDate, closingBalance); return statement; }
private string SetTitles(string value) { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(value); HtmlAgilityPack.HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//img[@src]"); if (nodes == null || nodes.Count == 0) { return(value); } string id = null; // non-null indicates need to update field value after looping foreach (HtmlAgilityPack.HtmlNode node in nodes) { string src = node.GetAttributeValue("src", String.Empty); if (src == null) { continue; } Match match = this.regex.Match(src); if (match.Success) { id = match.Groups[1].Value; Sitecore.Data.ID guid = Sitecore.Data.ID.Parse(id); Sitecore.Data.Items.Item item = Sitecore.Context.Database.GetItem(guid); if (item == null) { continue; } string title = String.Format( "{0} [{1}, {2}]", item.Name, item["extension"].ToUpper(), this.FormatBytes(Int32.Parse(item["size"]))); node.SetAttributeValue("title", title); } } if (id == null) { return(value); } StringBuilder sb = new StringBuilder(); StringWriter sw = new StringWriter(sb); doc.Save(sw); sw.Flush(); return(sb.ToString()); }
string[] ExtractFormAndHiddenControls(Response response) { HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.LoadHtml(response.Body); var tempxml = new StringBuilder(); var tsw = new StringWriter(tempxml); htmlDoc.OptionOutputAsXml = true; htmlDoc.Save(tsw); var doc = new XmlDocument(); doc.LoadXml(tempxml.ToString()); XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable); nsmgr.AddNamespace("html", HTML_NAMESPACE); XmlNode formNode = doc.SelectSingleNode("//html:form", nsmgr); if (formNode == null) { throw new ArgumentException("Form was not found in document: " + response.Body); } string actionUrl = formNode.Attributes ["action"].Value; XmlNode method = formNode.Attributes ["method"]; var data = new List <string> (); string name, value; foreach (XmlNode inputNode in doc.SelectNodes("//html:input[@type='hidden']", nsmgr)) { name = inputNode.Attributes["name"].Value; if (String.IsNullOrEmpty(name)) { continue; } XmlAttribute attr = inputNode.Attributes["value"]; if (attr != null) { value = attr.Value; } else { value = String.Empty; } data.Add(name); data.Add(value); } return(data.ToArray()); }
private void ExtractFormAndHiddenControls (Response response, string formId) { HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument (); htmlDoc.LoadHtml (response.Body); StringBuilder tempxml = new StringBuilder (); StringWriter tsw = new StringWriter (tempxml); htmlDoc.OptionOutputAsXml = true; htmlDoc.Save (tsw); XmlDocument doc = new XmlDocument (); doc.LoadXml (tempxml.ToString ()); const string HTML_NAMESPACE = "http://www.w3.org/1999/xhtml"; XmlNamespaceManager nsmgr = new XmlNamespaceManager (doc.NameTable); nsmgr.AddNamespace ("html", HTML_NAMESPACE); #if USE_CORRECT_FORMID XmlNode formNode = doc.SelectSingleNode ("//html:form[@name='" + formId + "']", nsmgr); #else XmlNode formNode = doc.SelectSingleNode ("//html:form", nsmgr); #endif if (formNode == null) throw new ArgumentException ("Form with id='" + formId + "' was not found in document: " + response.Body); string actionUrl = formNode.Attributes["action"].Value; if (actionUrl != null && actionUrl != string.Empty) base.Url = actionUrl; XmlNode method = formNode.Attributes["method"]; if (method != null && "POST" == method.Value.ToUpper(CultureInfo.InvariantCulture)) base.IsPost = true; else base.IsPost = false; #if USE_CORRECT_FORMID foreach (XmlNode inputNode in formNode.SelectNodes ("//html:input", nsmgr)) #else foreach (XmlNode inputNode in doc.SelectNodes ("//html:input[@type='hidden']", nsmgr)) #endif { BaseControl bc = new BaseControl (); bc.Name = inputNode.Attributes["name"].Value; if (bc.Name == null || bc.Name == string.Empty) continue; if (inputNode.Attributes["value"] != null) bc.Value = inputNode.Attributes["value"].Value; else bc.Value = ""; Controls[bc.Name] = bc; } }
public static XElement HtmlToXElement(string html) { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.OptionOutputAsXml = true; doc.LoadHtml(html); using (StringWriter writer = new StringWriter()) { doc.Save(writer); using (StringReader reader = new StringReader(writer.ToString())) { return(XElement.Load(reader)); } } }
private void TransformHtml(IDocumentBuildContext context, string html, string sourceFilePath, string destFilePath, StreamWriter outputWriter) { // Update href and xref HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument(); document.LoadHtml(html); var xrefExceptions = TransformHtmlCore(context, sourceFilePath, destFilePath, document); document.Save(outputWriter); if (xrefExceptions.Count > 0) { throw new AggregateException(xrefExceptions); } }
private string HtmltoXml(string html) { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); StringBuilder fixedxml = new StringBuilder(); StringWriter sw = new StringWriter(fixedxml); try { StringBuilder tempxml = new StringBuilder(); StringWriter tsw = new StringWriter(tempxml); doc.OptionOutputAsXml = true; doc.Save(tsw); // fix style attribute // the reason is that style attribute name-value pairs come in different order // in .NET and GH // Here I will sort the values of style attribute XmlDocument tempDoc = new XmlDocument(); tempDoc.LoadXml(tempxml.ToString()); XmlNodeList allNodes = tempDoc.SelectNodes("//*"); foreach (XmlNode n in allNodes) { if (n.Attributes["style"] != null) { string att = n.Attributes["style"].Value; string [] style = att.Trim(new char[] { ' ', ';' }).Split(';'); for (int styleIndex = 0; styleIndex < style.Length; styleIndex++) { style[styleIndex] = FixStyleNameValue(style[styleIndex]); } Array.Sort(style); n.Attributes["style"].Value = string.Join(";", style); } } tempDoc.Save(sw); } catch (Exception) { Console.WriteLine("Error parsing html response..."); Console.WriteLine("Test case aborted"); return("<TestCaseAborted></TestCaseAborted>"); } return(fixedxml.ToString()); }
/// <summary> /// When overridden in a derived class, this validates both the request and response. /// </summary> /// <param name="sender"></param> /// <param name="e"></param> public virtual void Validate(Object sender, ValidationEventArgs e) { _context.Outcome = WebTestOutcome.NotExecuted; HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument(); document.LoadHtml(((WebTestRequest)e.WebTestItem).HttpResponseBody); document.OptionOutputAsXml = true; using (StringWriter writer = new StringWriter()) { document.Save(writer); _document = XDocument.Parse(writer.GetStringBuilder().ToString()); } }
/// <summary> /// Returns xhtml string from html document /// </summary> /// <param name="html"></param> /// <returns></returns> public static string HtmlToXml(string html) { StringBuilder stringbuild = new StringBuilder(); StringWriter stringwriter = new StringWriter(stringbuild); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); doc.OptionOutputAsXml = true; doc.OptionCheckSyntax = true; doc.OptionFixNestedTags = true; doc.Save(stringwriter); return(stringbuild.ToString()); }
/// <summary> /// Creates OneNote page /// </summary> /// <param name="document"></param> /// <returns></returns> private static Page OpenOrCreatePage(Document document) { Console.WriteLine("Loading pages ..."); string title = "Microsoft Graph API example"; var pageFactory = new GraphPageFactory(); var allPages = pageFactory.GetAllItems(document.Id); var page = allPages.FirstOrDefault(p => p.Title.Equals(title)); if (page != null) { return(pageFactory.GetItem(page.Id)); } // create sample html page document and add some fields var sampleDoc = new HtmlAgilityPack.HtmlDocument(); sampleDoc.LoadHtml(Resources.SamplePageHtml); var body = sampleDoc.DocumentNode.SelectSingleNode("//body"); var textField = sampleDoc.CreateElement("div"); textField.SetAttributeValue("id", "dynamicElement"); textField.InnerHtml = "Dynamically added DIV element #"; body.AppendChild(textField); //save html document as HTML string htmlContent; using (var htmlStream = new MemoryStream()) { sampleDoc.Save(htmlStream, Encoding.UTF8); htmlStream.Position = 0; byte[] buffer = new byte[htmlStream.Length]; htmlStream.Read(buffer, 0, buffer.Length); htmlContent = Encoding.UTF8.GetString(buffer); } //MemoryStream stream = ReadImage(); //page = pageFactory.AddItem(new Page { Content = htmlContent, StreamContent = stream, Title = title }, section.Id); Console.WriteLine("Created page 'Microsoft Graph API example'..."); return(page); }
public void Test_Save_Does_Not_Stackoverflow_Even_If_The_Dom_Is_Deep() { var deep = String.Join("", Enumerable.Repeat("<div><span>", 1000) .Concat(Enumerable.Repeat("</span></div>", 1000)) ); CrashTest(() => { var doc = new HtmlAgilityPack.HtmlDocument(); var writer = new StringWriter(); doc.LoadHtml(deep); doc.Save(writer); }); }
public void Add(string name) { doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(Properties.Resources.EmptyTest); var title = doc.DocumentNode.SelectSingleNode("//head/title"); title.InnerHtml = name; string filename = CreateFileName(); fileList.Add(name, filename); themes.Add(name); var stream = File.CreateText(filename); doc.Save(stream); }
private static void TranformHtml(DocumentBuildContext context, string transformed, string relativeModelPath, string outputPath) { // Update HREF and XREF var internalXref = context.XRefSpecMap; var externalXref = context.ExternalXRefSpec; HtmlAgilityPack.HtmlDocument html = new HtmlAgilityPack.HtmlDocument(); html.LoadHtml(transformed); var srcNodes = html.DocumentNode.SelectNodes("//*/@src"); if (srcNodes != null) { foreach (var link in srcNodes) { UpdateSrc(link, context.FileMap, s => UpdateFilePath(s, relativeModelPath)); } } var hrefNodes = html.DocumentNode.SelectNodes("//*/@href"); if (hrefNodes != null) { foreach (var link in hrefNodes) { // xref is generated by docfx, and is lower-cased if (link.Name == "xref") { UpdateXref(link, internalXref, externalXref, s => UpdateFilePath(s, relativeModelPath), Language); } else { UpdateHref(link, context.FileMap, s => UpdateFilePath(s, relativeModelPath)); } } } // Save with extension changed var subDirectory = Path.GetDirectoryName(outputPath); if (!string.IsNullOrEmpty(subDirectory) && !Directory.Exists(subDirectory)) { Directory.CreateDirectory(subDirectory); } html.Save(outputPath, Encoding.UTF8); }
/// <summary> /// Parse HTML file and find all local contents to download. /// </summary> /// <param name="pnum">Problem number</param> /// <param name="replace">True, if you want to replace old files.</param> /// <returns>List of files to download</returns> public static List <DownloadTask> ProcessHtmlContent(long pnum, bool replace) { try { string external = string.Format("http://uva.onlinejudge.org/external/{0}/", pnum / 100); string filepath = LocalDirectory.GetProblemHtml(pnum); if (!File.Exists(filepath)) { return(new List <DownloadTask>()); } List <string> urls = new List <string>(); List <DownloadTask> tasks = new List <DownloadTask>(); HtmlAgilityPack.HtmlDocument htdoc = new HtmlAgilityPack.HtmlDocument(); htdoc.Load(filepath); DFS(htdoc.DocumentNode, urls); htdoc.Save(filepath); foreach (string str in urls) { string url = str.StartsWith("./") ? str.Remove(0, 2) : str; while (url.StartsWith("/")) { url = url.Remove(0, 1); } string file = url.Replace('/', Path.DirectorySeparatorChar); file = LocalDirectory.GetProblemContent(pnum, file); if (replace || LocalDirectory.GetFileSize(file) < 10) { tasks.Add(new DownloadTask(external + url, file, pnum)); } } urls.Clear(); return(tasks); } catch (Exception ex) { Logger.Add(ex.Message, "Internet"); return(new List <DownloadTask>()); } }
public string HtmltoXml(string html) //throws XmlException { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html.Trim(new char[] { '\r', '\n', ' ' })); // bug in HtmlAgilityPack StringBuilder fixedxml = new StringBuilder(); StringWriter sw = new StringWriter(fixedxml); StringBuilder tempxml = new StringBuilder(); StringWriter tsw = new StringWriter(tempxml); doc.OptionOutputAsXml = true; doc.Save(tsw); // fix style attribute // the reason is that style attribute name-value pairs come in different order // in .NET and GH // Here I will sort the values of style attribute XmlDocument tempDoc = new XmlDocument(); tempDoc.LoadXml(tempxml.ToString()); XmlNodeList allNodes = tempDoc.SelectNodes("//*"); foreach (XmlNode n in allNodes) { if (n.Attributes["style"] != null) { string att = n.Attributes["style"].Value; string[] style = att.Trim(new char[] { ' ', ';' }).Split(';'); for (int styleIndex = 0; styleIndex < style.Length; styleIndex++) { style[styleIndex] = FixStyleNameValue(style[styleIndex]); } Array.Sort(style); n.Attributes["style"].Value = string.Join(";", style); } } tempDoc.Save(sw); return(fixedxml.ToString()); }
/// <summary> /// 将port端口加入html文件的id=port控件 /// </summary> /// <param name="htmlDirPath"></param> /// <param name="htmlFileName"></param> /// <returns></returns> public static string fillPortInHtml(string htmlDirPath, string htmlFileName) { var htmlDoc = new HtmlAgilityPack.HtmlDocument(); var fileName = htmlFileName.Substring(0, htmlFileName.LastIndexOf('.')); using (var htmlFileR = File.OpenRead(htmlDirPath + htmlFileName)) { htmlDoc.Load(htmlFileR, Encoding.UTF8); var portInput = htmlDoc.DocumentNode.SelectSingleNode("//*[@id='port']"); if (portInput != null) { portInput.SetAttributeValue("value", TCPHelper.port.ToString()); } } var newFileFullPath = htmlDirPath + "/" + fileName + "_fill.html"; File.Delete(newFileFullPath); using (var htmlFileW = File.OpenWrite(newFileFullPath)) { htmlDoc.Save(htmlFileW); } return(newFileFullPath); }
private static void TranformHtml(DocumentBuildContext context, string transformed, string relativeModelPath, string outputPath) { // Update HREF and XREF var internalXref = context.XRefSpecMap; var externalXref = context.ExternalXRefSpec; HtmlAgilityPack.HtmlDocument html = new HtmlAgilityPack.HtmlDocument(); html.LoadHtml(transformed); var xrefExceptions = new List<CrossReferenceNotResolvedException>(); var xrefNodes = html.DocumentNode.SelectNodes("//xref/@href"); if (xrefNodes != null) { foreach(var xref in xrefNodes) { try { UpdateXref(xref, internalXref, externalXref, Language); } catch (CrossReferenceNotResolvedException e) { xrefExceptions.Add(e); } } } var srcNodes = html.DocumentNode.SelectNodes("//*/@src"); if (srcNodes != null) foreach (var link in srcNodes) { UpdateHref(link, "src", context.FileMap, relativeModelPath); } var hrefNodes = html.DocumentNode.SelectNodes("//*/@href"); if (hrefNodes != null) { foreach (var link in hrefNodes) { UpdateHref(link, "href", context.FileMap, relativeModelPath); } } // Save with extension changed var subDirectory = Path.GetDirectoryName(outputPath); if (!string.IsNullOrEmpty(subDirectory) && !Directory.Exists(subDirectory)) Directory.CreateDirectory(subDirectory); html.Save(outputPath, Encoding.UTF8); if (xrefExceptions.Count > 0) { throw new AggregateException(xrefExceptions); } }
public string HtmltoXml (string html) //throws XmlException { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument (); doc.LoadHtml (html.Trim (new char[] { '\r', '\n', ' ' })); // bug in HtmlAgilityPack StringBuilder fixedxml = new StringBuilder (); StringWriter sw = new StringWriter (fixedxml); StringBuilder tempxml = new StringBuilder (); StringWriter tsw = new StringWriter (tempxml); doc.OptionOutputAsXml = true; doc.Save (tsw); // fix style attribute // the reason is that style attribute name-value pairs come in different order // in .NET and GH // Here I will sort the values of style attribute XmlDocument tempDoc = new XmlDocument (); tempDoc.LoadXml (tempxml.ToString ()); XmlNodeList allNodes = tempDoc.SelectNodes ("//*"); foreach (XmlNode n in allNodes) { if (n.Attributes["style"] != null) { string att = n.Attributes["style"].Value; string[] style = att.Trim (new char[] { ' ', ';' }).Split (';'); for (int styleIndex = 0; styleIndex < style.Length; styleIndex++) { style[styleIndex] = FixStyleNameValue (style[styleIndex]); } Array.Sort (style); n.Attributes["style"].Value = string.Join (";", style); } } tempDoc.Save (sw); return fixedxml.ToString (); }
/// <summary> /// Load HTML DOM from given <paramref name="stream"/>. /// </summary> private object loadHTML(TextReader stream) { HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); // setup HTML parser htmlDoc.OptionOutputAsXml = true; //htmlDoc.OptionOutputOriginalCase = true; // NOTE: we need lower-cased names because of XPath queries //htmlDoc.OptionFixNestedTags = true; htmlDoc.OptionCheckSyntax = false; htmlDoc.OptionWriteEmptyNodes = true; // load HTML (from string or a stream) htmlDoc.Load(stream); CheckHtmlErrors(htmlDoc); // save to string as XML using (StringWriter sw = new StringWriter()) { htmlDoc.Save(sw); // load as XML try { return loadXML(this, sw.ToString(), 0); } finally { this._isHtmlDocument = true; } } }
private void TranformHtml(DocumentBuildContext context, string transformed, string relativeModelPath, string outputPath) { // Update HREF and XREF HtmlAgilityPack.HtmlDocument html = new HtmlAgilityPack.HtmlDocument(); html.LoadHtml(transformed); var srcNodes = html.DocumentNode.SelectNodes("//*/@src"); if (srcNodes != null) foreach (var link in srcNodes) { UpdateSrc(link, context.FileMap, s => UpdateFilePath(s, relativeModelPath)); } var hrefNodes = html.DocumentNode.SelectNodes("//*/@href"); if (hrefNodes != null) foreach (var link in hrefNodes) { // xref is generated by docfx, and is lower-cased if (link.Name == "xref") { UpdateXref(link, context.XRefSpecMap, context.ExternalXRefSpec, s => UpdateFilePath(s, relativeModelPath), Language); } else { UpdateHref(link, context.FileMap, s => UpdateFilePath(s, relativeModelPath)); } } // Save with extension changed var subDirectory = Path.GetDirectoryName(outputPath); if (!string.IsNullOrEmpty(subDirectory) && !Directory.Exists(subDirectory)) Directory.CreateDirectory(subDirectory); html.Save(outputPath, Encoding.UTF8); }
/// <summary> /// Load HTML DOM from given <paramref name="stream"/>. /// </summary> private object loadHTML(TextReader stream, string filename) { HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); // setup HTML parser htmlDoc.OptionOutputAsXml = true; //htmlDoc.OptionOutputOriginalCase = true; // NOTE: we need lower-cased names because of XPath queries //htmlDoc.OptionFixNestedTags = true; htmlDoc.OptionCheckSyntax = false; htmlDoc.OptionUseIdAttribute = false; // only needed when XPath navigator is used on htmlDoc htmlDoc.OptionWriteEmptyNodes = true; // load HTML (from string or a stream) htmlDoc.Load(stream); CheckHtmlErrors(htmlDoc, filename); // save to string as XML using (StringWriter sw = new StringWriter()) { htmlDoc.Save(sw); // load as XML return this.loadXMLInternal(sw.ToString(), 0, true); } }
public async Task LoadPost() { if (Status == NetworkStatus.NotReachable) {//true || ShowErrorMessage (Settings.MSG_NETWORK_NOT_REACHABLE); return; } IsLoading = true; RequestPost req = new RequestPost(); req.Id = Post.Id; ResponsePost detailPost; try{ detailPost = await Service.GetPost (req); Post = detailPost.Post; if (Post.Kento_vote != null) { IsLikedThisPost = Post.Kento_vote.Vote_status == 1; Like_count = Post.Kento_vote.Vote_up_total; } Title = Post.FirstCategoryName;//category name - home post } catch (Exception e){ Debug.WriteLine (e); ShowErrorMessage (Settings.MSG_NETWORK_COMMON, e); return; } var doc = new HtmlAgilityPack.HtmlDocument(); //remove ulike info try { doc.LoadHtml(detailPost.Post.Content); foreach(var item in doc.DocumentNode.ChildNodes) { if (item.Id.StartsWith ("kento-vote")) { item.InnerHtml = string.Empty; Debug.WriteLine ("Empty ULike Info: " + item.OuterHtml); } //remove follow & unfollow content if (item.Attributes["class"] != null && item.Attributes["class"].Value.StartsWith("wpw-fp-follow-post-wrapper")) item.InnerHtml = string.Empty; } var stringBuilder = new System.Text.StringBuilder(); doc.Save (new System.IO.StringWriter(stringBuilder)); Post.Content = stringBuilder.ToString(); } catch (Exception e) { ShowErrorMessage (Settings.MSG_NETWORK_COMMON, e); } //Repair comment content to plain text try { Comment_count = Post.Comment_count; foreach (Comment comment in Post.Comments) { doc.LoadHtml(comment.Content); foreach(var item in doc.DocumentNode.ChildNodes)// "//div" is a xpath which means select div nodes that are anywhere in the html { if (item.Id.StartsWith ("wp-ulike-comment-")) { item.InnerHtml = string.Empty; Debug.WriteLine (item.OuterHtml); } } comment.Content = System.Net.WebUtility.HtmlDecode (doc.DocumentNode.InnerText); Comments.Add (comment); } } catch (Exception e) { Debug.WriteLine ("[CommentPage-LoadComments] {0}", e); } string featured_img = Post.FullImage; //Post.IconSource==null ? "" : (Post.Thumbnail_images.Full.Url ?? Post.IconSource) ; string timeAgo = new TimeAgoValueConverter ().Convert (Post.Date, null, null, null).ToString(); // Html = NewsTemplates.DETAIL_TEMPLATE_HEADER + String.Format(NewsTemplates.DETAIL_TEMPLATE_BODY, Post.Title, timeAgo, featured_img ,Post.Content, Post.FirstCategoryName); Html = String.Format(NewsTemplates.DETAIL_TEMPLATE, Post.Title, timeAgo, featured_img ,Post.Content, Post.FirstCategoryName); //test video player //Html = string.Format(NewsTemplates.DETAIL_VIDEO_PLAYER, "http://techslides.com/demos/sample-videos/small.mp4"); RaisePropertyChanged ("Comment_count"); RaisePropertyChanged ("Comments"); IsLoading = false; }
private string HtmltoXml(string html) { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); StringBuilder fixedxml = new StringBuilder(); StringWriter sw = new StringWriter(fixedxml); try { StringBuilder tempxml = new StringBuilder(); StringWriter tsw = new StringWriter(tempxml); doc.OptionOutputAsXml = true; doc.Save(tsw); // fix style attribute // the reason is that style attribute name-value pairs come in different order // in .NET and GH // Here I will sort the values of style attribute XmlDocument tempDoc = new XmlDocument(); tempDoc.LoadXml(tempxml.ToString()); XmlNodeList allNodes = tempDoc.SelectNodes("//*"); foreach (XmlNode n in allNodes) { if (n.Attributes["style"] != null) { string att = n.Attributes["style"].Value; string [] style = att.Trim(new char[]{' ', ';'}).Split(';'); for (int styleIndex=0; styleIndex<style.Length; styleIndex++) { style[styleIndex] = FixStyleNameValue(style[styleIndex]); } Array.Sort(style); n.Attributes["style"].Value = string.Join(";", style); } } tempDoc.Save(sw); } catch (Exception) { Console.WriteLine("Error parsing html response..."); Console.WriteLine("Test case aborted"); return "<TestCaseAborted></TestCaseAborted>"; } return fixedxml.ToString(); }
private static void TransformHtml(IDocumentBuildContext context, string html, string relativeModelPath, StreamWriter outputWriter) { // Update href and xref HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument(); document.LoadHtml(html); var xrefExceptions = TransformHtmlCore(context, relativeModelPath, document); document.Save(outputWriter); if (xrefExceptions.Count > 0) { throw new AggregateException(xrefExceptions); } }
public void editPost(string postid, string title, string body) { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(body); HtmlAgilityPack.HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//img"); if (nodes != null) foreach (HtmlAgilityPack.HtmlNode node in nodes) { //获取html源地址 HtmlAgilityPack.HtmlAttribute attr = node.Attributes["src"]; JoeBlogs.MediaObject mediaObj = new JoeBlogs.MediaObject(); mediaObj.Name = "test"; mediaObj.Type = "image/jpeg"; FileStream file = new FileStream(attr.Value, FileMode.Open); long size = file.Length; byte[] stream = new byte[size]; file.Read(stream, 0, (int)size); file.Close(); mediaObj.Bits = stream; JoeBlogs.MediaObjectInfo mediainfo = metablog.NewMediaObject(mediaObj); attr.Value = mediainfo.Url; } JoeBlogs.Post post = new JoeBlogs.Post(); post.Title = title; StringWriter writer = new StringWriter(); if (nodes != null) { doc.Save(writer); post.Body = writer.ToString(); } else { post.Body = body; } metablog.EditPost(postid, post, true); }
/// <summary> /// Generates Image from the specified Website /// </summary> protected void Preview_Gen_Click(object sender, EventArgs e) { string url = txtUrl.Text; uri = CreateUri(url); string domain = uri.Host; domain = Regex.Replace(domain, @"^(?:http(?:s)?://)?(?:www(?:[0-9]+)?\.)?", string.Empty, RegexOptions.IgnoreCase); var date = DateTime.Now.Year.ToString() + DateTime.Now.Month.ToString() + DateTime.Now.Day.ToString() + "_" + DateTime.Now.Hour.ToString() + DateTime.Now.Minute.ToString() + DateTime.Now.Second.ToString(); var filename = domain + "_" + date + ".html"; /********************************** * Method WebClient.DownloadString * ***********************************/ domain = "http://" + domain; string webpage = GetHTML(url); var doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(webpage); var head = doc.DocumentNode.SelectSingleNode("//head"); var baseTag = HtmlAgilityPack.HtmlNode.CreateNode("<base href=\"" + domain + "\">"); var fontAwesomeRes = HtmlAgilityPack.HtmlNode.CreateNode("<link rel=\"stylesheet\" href=\"https://maxcdn.bootstrapcdn.com/font-awesome/4.4.0/css/font-awesome.min.css\">"); head.PrependChild(baseTag); head.AppendChild(fontAwesomeRes); doc = AssignAsbsoluteUri(doc, "//img", "src"); doc = AssignAsbsoluteUri(doc, "//link", "href"); string path = Server.MapPath("Content/Images/Screenshots"); var filepath = path + "\\" + filename; doc.Save(filepath);//webpageurlNext iframeLoader.Src = "Content/Images/Screenshots/" + filename; iframeLoader.Visible = true; iframeVisible = true; //StatusText.Text = "Wait until this message disappear"; //ContentLoaded.Visible = true; }
private static String ParseSaveAndFixImages(string contents, string dirPath) { contents = System.Web.HttpUtility.HtmlDecode(contents); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.Load(new StringReader(contents)); var nav = doc.CreateNavigator(); var strExpression = "//img"; HtmlAgilityPack.HtmlNodeCollection imgTags = doc.DocumentNode.SelectNodes(strExpression); if (imgTags != null) { foreach (HtmlAgilityPack.HtmlNode tag in imgTags) { if (tag.Attributes["src"] != null) { String imgPath = tag.Attributes["src"].Value; tag.Attributes["src"].Value = GetAndSaveImage(imgPath, dirPath); } } } string finalContents = null; using (StringWriter sw = new StringWriter()) { doc.Save(sw); finalContents = sw.ToString(); } return finalContents; }
string[] ExtractFormAndHiddenControls (Response response) { HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument (); htmlDoc.LoadHtml (response.Body); var tempxml = new StringBuilder (); var tsw = new StringWriter (tempxml); htmlDoc.OptionOutputAsXml = true; htmlDoc.Save (tsw); var doc = new XmlDocument (); doc.LoadXml (tempxml.ToString ()); XmlNamespaceManager nsmgr = new XmlNamespaceManager (doc.NameTable); nsmgr.AddNamespace ("html", HTML_NAMESPACE); XmlNode formNode = doc.SelectSingleNode ("//html:form", nsmgr); if (formNode == null) throw new ArgumentException ("Form was not found in document: " + response.Body); string actionUrl = formNode.Attributes ["action"].Value; XmlNode method = formNode.Attributes ["method"]; var data = new List <string> (); string name, value; foreach (XmlNode inputNode in doc.SelectNodes ("//html:input[@type='hidden']", nsmgr)) { name = inputNode.Attributes["name"].Value; if (String.IsNullOrEmpty (name)) continue; XmlAttribute attr = inputNode.Attributes["value"]; if (attr != null) value = attr.Value; else value = String.Empty; data.Add (name); data.Add (value); } return data.ToArray (); }
public Transaction[] GetTransactionsFromAccountHistory(Stream stream) { StreamReader reader = new StreamReader(stream); // From here: http://stackoverflow.com/questions/12822680/xmldocument-failed-to-load-xhtml-string-because-of-error-reference-to-undeclare HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(reader.ReadToEnd()); doc.OptionOutputAsXml = true; StringWriter writer = new StringWriter(); doc.Save(writer); XDocument root = XDocument.Load(new StringReader(writer.ToString())); XPathNavigator navigator = root.CreateNavigator(); List<Transaction> resultTransactions = new List<Transaction>(); // Pullled from http://stackoverflow.com/questions/2524804/how-do-i-use-xpath-with-a-default-namespace-with-no-prefix var nameTable = new NameTable(); var nsMgr = new XmlNamespaceManager(nameTable); nsMgr.AddNamespace("xhtml", "http://www.w3.org/1999/xhtml"); var transactionTable = navigator.SelectSingleNode("span/body/table[1]/tr[4]/td/table[2]"); var transactions = transactionTable.Select("tr"); bool first = true; foreach (XPathNavigator t in transactions) { if (first) { first = false; continue; } var cellSelect = t.Select("td"); var cells = new List<string>(); cells.Add(cellSelect.Current.Value.Trim()); while (cellSelect.MoveNext()) { cells.Add(cellSelect.Current.Value.Trim()); } if (cells.Count == 8 && (accountsToFilter.Contains(cells[4]))) { TransactionType tType = TransactionType.Buy; if (cells[3].Contains("Short-Term Earnings") || cells[3] == "Dividend" || cells[3] == "Earnings") { tType = TransactionType.ReinvestDividend; } else if (cells[3] == "Contributions") { tType = TransactionType.Buy; } else if (cells[3] == "ADP/ACP Refunds") { tType = TransactionType.Sell; } else { continue; } resultTransactions.Add(new Transaction() { FundName = cells[2], Price = double.Parse(cells[5], System.Globalization.NumberStyles.Any), Amount = double.Parse(cells[6]), Total = double.Parse(cells[7], System.Globalization.NumberStyles.Any), Date = DateTime.Parse(cells[1]), Type = tType }); } } return resultTransactions.ToArray(); }
/// <summary> /// Parse HTML file and find all local contents to download. /// </summary> /// <param name="pnum">Problem number</param> /// <param name="replace">True, if you want to replace old files.</param> /// <returns>List of files to download</returns> public static List<DownloadTask> ProcessHtmlContent(long pnum, bool replace) { try { string external = string.Format("http://uva.onlinejudge.org/external/{0}/", pnum / 100); string filepath = LocalDirectory.GetProblemHtml(pnum); if (!File.Exists(filepath)) return new List<DownloadTask>(); List<string> urls = new List<string>(); List<DownloadTask> tasks = new List<DownloadTask>(); HtmlAgilityPack.HtmlDocument htdoc = new HtmlAgilityPack.HtmlDocument(); htdoc.Load(filepath); GetAllImageFiles(htdoc.DocumentNode, urls); htdoc.Save(filepath); foreach (string str in urls) { string url = str.StartsWith("./") ? str.Remove(0, 2) : str; while (url.StartsWith("/")) url = url.Remove(0, 1); string file = url.Replace('/', Path.DirectorySeparatorChar); file = LocalDirectory.GetProblemContent(pnum, file); if (replace || LocalDirectory.GetFileSize(file) < 10) { tasks.Add(new DownloadTask(external + url, file, pnum)); } } urls.Clear(); return tasks; } catch (Exception ex) { Logger.Add(ex.Message, "Internet"); return new List<DownloadTask>(); } }
//--------------------------- public int NStartProcessingAuthentification(int jobID) { int status = 0; //Пишем последнюю страницу в файл //Создается каталог для контроля считанной странички /*DirectoryInfo _directoryInfo = new DirectoryInfo(Path.GetDirectoryName(Application.ExecutablePath) + "\\OUT"); if (!_directoryInfo.Exists) { _directoryInfo.Create(); }*/ HTTPClient client = new HTTPClient(_dbConnectionString, "MySession", _ipProxy, _portProxy, _streamNumber); ; bool nextPage = true; string sHTML = ""; string address = ""; string message = ""; HtmlDocument document = new HtmlDocument(); if (!_webAuthentification) // Аунтификация { HttpWebResponse httpWebResponse = client.Request(_webPreAuthentificationAddress); //("http://yarsk24.ru"); if (httpWebResponse != null && httpWebResponse.StatusCode == HttpStatusCode.OK) { httpWebResponse.Close(); httpWebResponse = client.Request_Post(_webAuthentificationAddress, _webConnectionString); //("http://yarsk24.ru/index/sub/", "user=zero2001&password=2001&rem=1&a=2&ajax=1&rnd=038&_tp_=xml"); if (httpWebResponse != null && httpWebResponse.StatusCode == HttpStatusCode.OK) { Stream stream = httpWebResponse.GetResponseStream(); using (StreamReader reader = new StreamReader(stream, this.Encoding)) //System.Text.Encoding.GetEncoding(1251))) / System.Text.Encoding.UTF8 { sHTML = reader.ReadToEnd(); } if (httpWebResponse != null && httpWebResponse.StatusCode == HttpStatusCode.OK) { httpWebResponse.Close(); _webAuthentification = true; } else { address = _webAuthentificationAddress; message = httpWebResponse.StatusCode.ToString(); status = 8; } } else { address = _webAuthentificationAddress; message = httpWebResponse.StatusCode.ToString(); status = 7; } } else { address = _webPreAuthentificationAddress; message = httpWebResponse.StatusCode.ToString(); status = 6; } } if (status != 0) { DataDBService ddbs = new DataDBService(); ddbs.WriteErrorMessage(_dbConnectionString, _streamNumber, !string.IsNullOrEmpty(address) ? address : _webAuthentificationAddress, "Ошибка при прохождении аутентификации: " + message); } if (_webAuthentification) // Запрос страницы с количеством объявлений { HttpWebResponse httpWebResponse = client.Request(_startAddress); if (httpWebResponse != null && httpWebResponse.StatusCode == HttpStatusCode.OK) { Stream stream = httpWebResponse.GetResponseStream(); using (StreamReader reader = new StreamReader(stream, this.Encoding)) { sHTML = reader.ReadToEnd(); document = new HtmlDocument(); document.LoadHtml(sHTML); if (httpWebResponse != null && httpWebResponse.StatusCode == HttpStatusCode.OK) { httpWebResponse.Close(); document.Save(_directoryInfo + "\\LastPage" + _streamNumber.ToString() + ".html"); } else { status = 4; nextPage = false; } } } else { status = 10; nextPage = false; } } if (_webAuthentification && nextPage) // Цикл по страницам { int countPagesTotal = GetPageCount(document, _startAddress); if (countPagesTotal > 0) { for (int i = 0; i < countPagesTotal; i++) { status = 1; if (i + 1 >= _firstPage) { List<bool> valids; List<string> addresses; List<string> stickers; //Получаем страницу для обработки string currentAddress = GetLoadedPage(_startAddress, i); if (_lastAddress != currentAddress) { _lastAddress = currentAddress; // Запрос страницы с количеством объявлений document = null; HttpWebResponse httpWebResponse = client.Request(_lastAddress); if (httpWebResponse != null && httpWebResponse.StatusCode == HttpStatusCode.OK) { Stream stream = httpWebResponse.GetResponseStream(); using (StreamReader reader = new StreamReader(stream, this.Encoding)) { sHTML = reader.ReadToEnd(); document = new HtmlDocument(); document.LoadHtml(sHTML); if (httpWebResponse != null && httpWebResponse.StatusCode == HttpStatusCode.OK) httpWebResponse.Close(); else { } } } if (document != null) { document.Save(_directoryInfo + "\\LastPage" + _streamNumber.ToString() + ".html"); //Получаем список интересных объявлений valids = GetValidObjects(document); //Получаем список ссылок на странице addresses = GetAddressList(document, valids); //Получаем список стикеров на странице stickers = GetStickers(document, valids); //------------------------------------ if (valids != null && (addresses != null || stickers != null)) { System.Threading.Thread.Sleep(new Random().Next(1000, 3000)); string hostName = Dns.GetHostName(); if (InsertIntoAddressesUpload(addresses, stickers, jobID, hostName)) { bool bb = false; while (!bb) { if (IUDAdressesMask(hostName, jobID)) { nextPage = NSetJobState(jobID, i + 1, countPagesTotal, addresses.Count, jobID, document); if (!nextPage) { DataDBExtention ddbe = new DataDBExtention(); ddbe.NSetStatistics(jobID, _dbConnectionString, hostName, _streamNumber); } //if (valids == null || addresses == null || stickers == null) //{ // status = 3; // break; //} bb = true; } else System.Threading.Thread.Sleep(new Random().Next(100, 300)); //else //{ // nextPage = NSetJobState(jobID, i + 1, countPagesTotal, 0, jobID, document); // //если что-то, то все равно идем дальше // ////может брейк постаивть. например, если таймаут вышел и запрос вывалился // ////или оставить как есть и припоследующих запусках оно само заберет неотданные ссылки //} } } //тут надо отписываться, что он спарсил и сколько ссылок } else { status = 3; break; } // Досрочный выход из цикла по страницам // Достигнуто заданное количество страниц if (i + 1 >= _firstPage - 1 + _pageCount || !nextPage) { break; } // Исчерпано время (в часах) DateTime dateTime = DateTime.Now; if ((dateTime.Year - _dateTimeBegin.Year) * 365 * 24 + (dateTime.DayOfYear - _dateTimeBegin.DayOfYear) * 24 + dateTime.Hour - _dateTimeBegin.Hour > _timeInHours) { status = 2; break; } //---------------------------------------- // //Обрабатываем каждую ссылку и выбираем необходимую информацию // List<DirtyApartments> apartments = CardProcessingAuthentification(addresses, stickers, client); // if (apartments != null) // { // //Делегат // if (collectedApartmentsDel != null) // nextPage = collectedApartmentsDel(apartments, i + 1, countPagesTotal, _directoryInfo + "\\LastPage" + _streamNumber.ToString() + ".html"); // // Досрочный выход из цикла по страницам // // Достигнуто заданное количество страниц // if (i + 1 >= _firstPage - 1 + _pageCount || !nextPage) // { // break; // } // // Исчерпано время (в часах) // DateTime dateTime = DateTime.Now; // if ((dateTime.Year - _dateTimeBegin.Year) * 365 * 24 // + (dateTime.DayOfYear - _dateTimeBegin.DayOfYear) * 24 // + dateTime.Hour - _dateTimeBegin.Hour // > _timeInHours) // { // status = 2; // break; // } // if (valids == null || addresses == null || stickers == null) // { // status = 3; // break; // } // } // else // { // status = 16; // break; // } } else { status = 4; break; } } else { status = 5; break; } } } } else status = 9; } else { } return status; }