private static void AppendSnippetReferences(SnippetDocumentControl snippet, HtmlAgilityPack.HtmlDocument doc, HtmlNode head) { // take care of javascript references foreach (var reference in snippet.References) { if (reference.Type == ReferenceTypes.Css) { var s = doc.CreateElement("link"); s.SetAttributeValue("rel", "stylesheet"); s.SetAttributeValue("href", reference.Url); head.AppendChild(s); } else if (reference.Type == ReferenceTypes.Javascript) { var s = doc.CreateElement("script"); s.SetAttributeValue("src", reference.Url); s.SetAttributeValue("type", "text/javascript"); head.AppendChild(s); } else { throw new Exception($"Unsupported snippet reference type: {reference.Type}"); } } }
private void comboBox1_SelectedIndexChanged(object sender, EventArgs e) { if (comboBox1.SelectedItem != null) { checkedListBox1.Items.Clear(); checkedListBox1.Controls.Clear(); innerHtmlTextBox.Clear(); pathTextBox.Clear(); string[] arr; document.Load(Application.StartupPath + "/tempHtml.txt"); string[] elem = comboBox1.SelectedItem.ToString().Split("<>".ToCharArray(), StringSplitOptions.RemoveEmptyEntries); node = document.CreateElement(elem[0]); while (checkedListBox1.CheckedIndices.Count > 0) { checkedListBox1.SetItemChecked(checkedListBox1.CheckedIndices[0], false); } if (attr.AttrDict.ContainsKey(elem[0])) { arr = attr.AttrDict[elem[0]]; checkedListBox1.Items.AddRange(arr); } // Label lbl = new Label(); // lbl.AutoSize = true; // lbl.Text = "Global Attributes"; // lbl.Location = new Point(0, arr.Length * 15); // checkedListBox1.Controls.Add(lbl); checkedListBox1.Items.AddRange(attr.AttrDict["Global"]); checkedListBox1.Items.AddRange(attr.AttrDict["events"]); } else { return; } }
public static string DecodeHtml(string htmltext) { htmltext = htmltext.Replace("<p>", "").Replace("</p>", "\r\n\r\n"); string decoded = String.Empty; if (htmltext.IndexOf('<') > -1 || htmltext.IndexOf('>') > -1 || htmltext.IndexOf('&') > -1) { try { HtmlDocument document = new HtmlDocument(); var decode = document.CreateElement("div"); htmltext = htmltext.Replace(".<", ". <").Replace("?<", "? <").Replace("!<", "! <").Replace("'", "'"); decode.InnerHtml = htmltext; var allElements = decode.Descendants().ToArray(); for (int n = allElements.Length - 1; n >= 0; n--) { if (allElements[n].NodeType == HtmlNodeType.Comment || allElements[n].Name.EqualNoCase("style") || allElements[n].Name.EqualNoCase("script")) { allElements[n].Remove(); } } decoded = WebUtility.HtmlDecode(decode.InnerText); } catch { } } else { decoded = htmltext; } return decoded; }
private static HtmlNode GetExternalReferencesListNode( HtmlDocument document, IEnumerable<ExternalReference> externalReferences) { var list = document.CreateElement("ol"); foreach (var reference in externalReferences) { var item = document.CreateElement("li"); string referenceId = GetExternalReferenceId(reference.Index); var backLink = document.CreateElement("a"); backLink.InnerHtml = "<strong>^</strong>"; backLink.SetAttributeValue("href", "#" + referenceId + BackLinkReferenceIdSuffix); var externalLink = document.CreateElement("a"); externalLink.InnerHtml = reference.Url; externalLink.SetAttributeValue("href", reference.Url); externalLink.SetAttributeValue("name", referenceId); item.AppendChild(backLink); item.AppendChild(document.CreateTextNode(" ")); item.AppendChild(externalLink); list.AppendChild(item); } return list; }
/// <summary> /// Decodes the HTML. /// </summary> /// <param name="htmlText">The html text.</param> /// <returns>String without any html tags.</returns> public static string DecodeHtml(string htmlText) { htmlText = htmlText.Replace("<p>", "").Replace("</p>", "\r\n\r\n"); string decoded = String.Empty; if (htmlText.IndexOf('<') > -1 || htmlText.IndexOf('>') > -1 || htmlText.IndexOf('&') > -1) { try { HtmlDocument document = new HtmlDocument(); var decode = document.CreateElement("div"); htmlText = htmlText.Replace(".<", ". <").Replace("?<", "? <").Replace("!<", "! <").Replace("'", "'"); decode.InnerHtml = htmlText; decoded = WebUtility.HtmlDecode(decode.InnerText); decoded = Regex.Replace(decoded, "<!--.*?-->", string.Empty, RegexOptions.Singleline); } catch { } } else { decoded = htmlText; } return decoded; }
public void Parse(string input, string[] args = null) { var xxr = new XamlXmlReader(new StringReader(input), new XamlSchemaContext()); var graphReader = new XamlObjectWriter(xxr.SchemaContext); while (xxr.Read()) graphReader.WriteNode(xxr); var page = (Page)graphReader.Result; // Map our generators var g = new Generator(); g.Map<Page, PageGeneratorBlock>(); g.Map<Button, ButtonGeneratorBlock>(); g.Map<StackPanel, StackPanelGeneratorBlock>(); var doc = new HtmlDocument(); var html = doc.CreateElement("html"); g.Generate(html, page); // HTML5 Doc type doc.DocumentNode.AppendChild(doc.CreateComment("<!DOCTYPE html>")); doc.DocumentNode.AppendChild(html); doc.Save("test.htm"); var cssContents = g.GenerateStyles(page); File.WriteAllText("XamlCore.css", cssContents); }
private void WriteContent(HtmlDocument doc) { HtmlNode bodyContent = doc.GetElementbyId("bodyContent"); var heading = doc.CreateElement("h1"); heading.InnerHtml = "Heading"; bodyContent.AppendChild(heading); HtmlNode table = bodyContent.CreateElement("table"); table.CreateAttributeWithValue("class", "table table-striped table-bordered"); var tableHead = table.CreateElement("thead"); var headerRow = tableHead.CreateElement("tr"); headerRow.CreateElementWithHtml("td", "First"); headerRow.CreateElementWithHtml("td", "Second"); headerRow.CreateElementWithHtml("td", "Third"); headerRow.CreateElementWithHtml("td", "Fourth"); headerRow.CreateElementWithHtml("td", "Fifth"); HtmlNode tableBody = table.CreateElement("tbody"); const string text = "Fi fa fo fum fi fa fo fum fi fa fo fum fi fa fo fum"; for (int i = 0; i < 10; i++) { HtmlNode bodyRow = tableBody.CreateElement("tr"); bodyRow.CreateElementWithHtml("td", i + " first " + text); bodyRow.CreateElementWithHtml("td", i + " second " + text); bodyRow.CreateElementWithHtml("td", i + " third " + text); bodyRow.CreateElementWithHtml("td", i + " fourth " + text); bodyRow.CreateElementWithHtml("td", i + " fifth " + text); } }
static Article GrabArticle(HtmlDocument doc) { Article article = new Article(); HtmlNode articleContent = doc.CreateElement("div"); ParagraphParentCollection paragraphParents = new ParagraphParentCollection(new ParagraphCollection(doc.DocumentNode.SelectNodes("//p"))); // Replace br tags with paragraph tags. doc.DocumentNode.InnerHtml = Regex.Replace(doc.DocumentNode.InnerHtml, @"<br/?>[ \r\n\s]*<br/?>", @"</p><p>"); // TODO handle title. article.Title = doc.DocumentNode.SelectSingleNode("//title") == null ? null : doc.DocumentNode.SelectSingleNode("//title").InnerText; foreach (ParagraphParent parent in paragraphParents) { foreach (HtmlAttribute att in parent.Node.Attributes.AttributesWithName("class")) { if (Regex.IsMatch(att.Name, @"/(comment|meta|footer|footnote)/")) parent.Score -= 50; else if (Regex.IsMatch(att.Name, @"/((^|\\s)(post|hentry|entry[-]?(content|text|body)?|article[-]?(content|text|body)?)(\\s|$))/")) parent.Score += 25; break; } foreach (HtmlAttribute att in parent.Node.Attributes.AttributesWithName("id")) { if (Regex.IsMatch(att.Name, @"/(comment|meta|footer|footnote)/")) parent.Score -= 50; else if (Regex.IsMatch(att.Name, @"/^(post|hentry|entry[-]?(content|text|body)?|article[-]?(content|text|body)?)$/")) parent.Score += 25; } foreach (Paragraph paragraph in parent.Paragraphs) { if (paragraph.Node.InnerText.Length > 10) parent.Score++; parent.Score += GetCharCount(paragraph.Node); } } ParagraphParent winner = paragraphParents.OrderByDescending(a => a.Score).FirstOrDefault(); // TODO cleanup. winner.Clean("style"); winner.KillDivs(); winner.KillBreaks(); winner.Clean("form"); winner.Clean("object"); winner.Clean("table", 250); winner.Clean("h1"); winner.Clean("h2"); winner.Clean("iframe"); winner.Clean("script"); article.Content.DocumentNode.AppendChild(winner.Node); return article; }
public IHtmlElement AddElement(IHtmlContainer container, int index, string name) { var element = _document.CreateElement(name); AddNode(container, index, element); return(element.AsElement()); }
public void when_html_document_doesnt_contain_rss_feed() { var document = new HtmlDocument(); document.CreateElement("html"); var parser = new Scraper(document); var actual = parser.GetRssFeedUrl(); Assert.IsNull(actual); }
public override void Execute(IEmailItem emailItem = null, int? lastExitCode = null) { if (AppliesTo(emailItem, lastExitCode)) { if (BodyFormat.Text == emailItem.Message.Body.BodyFormat) { StringBuilder sb = new StringBuilder(); sb.Append(emailItem.Message.GetBody()); sb.AppendLine(); sb.AppendLine(); sb.Append(Text); emailItem.Message.SetBody(sb.ToString()); } if (BodyFormat.Rtf == emailItem.Message.Body.BodyFormat) { var messageRtf = new RtfDocument(emailItem.Message.GetBody()); var mergeRtf = new RtfDocument(Rtf); if (messageRtf.Merge(mergeRtf)) { emailItem.Message.SetBody(messageRtf.Content); } } if (BodyFormat.Html == emailItem.Message.Body.BodyFormat) { var messageDoc = new HtmlDocument(); messageDoc.LoadHtml(emailItem.Message.GetBody()); var disclaimerDoc = new HtmlDocument(); disclaimerDoc.LoadHtml(Html); var messageBodyNode = messageDoc.DocumentNode.SelectSingleNode("//body"); var disclaimerBodyNode = disclaimerDoc.DocumentNode.SelectSingleNode("//body"); var brNode = messageDoc.CreateElement("br"); messageBodyNode.AppendChild(brNode); messageBodyNode.AppendChildren(disclaimerBodyNode.ChildNodes); emailItem.Message.SetBody(messageDoc.DocumentNode.InnerHtml); } if (null != Handlers && Handlers.Count > 0) { foreach (IHandler handler in Handlers) { handler.Execute(emailItem, lastExitCode); } } } }
public static void Save(SnippetDocumentControl snippet, string filePath) { var doc = new HtmlAgilityPack.HtmlDocument(); // use newlines with Stringbuilder so the document formats better than using @"" var node = HtmlAgilityPack.HtmlNode.CreateNode( new System.Text.StringBuilder() .AppendLine("<html>") .AppendLine("<head></head>") .AppendLine($"<body>{snippet.Html}</body>") .AppendLine("</html>") .ToString() ); doc.DocumentNode.AppendChild(node); var head = doc.DocumentNode.SelectSingleNode("//head"); var body = doc.DocumentNode.SelectSingleNode("//body"); // save CSS if (!string.IsNullOrWhiteSpace(snippet.CSS)) { var s = doc.CreateElement("style"); s.SetAttributeValue("type", "text/css"); s.AppendChild(doc.CreateTextNode(snippet.CSS)); head.AppendChild(s); } // save javascript if (!string.IsNullOrWhiteSpace(snippet.Javascript)) { var s = doc.CreateElement("script"); s.SetAttributeValue("type", "text/javascript"); s.AppendChild(doc.CreateTextNode(snippet.Javascript)); body.AppendChild(s); } AppendSnippetReferences(snippet, doc, head); doc.Save(filename: filePath); }
public string TransformImgToPicture(string content) { try { HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(content); // - activating"do no harm"-mode doc.OptionFixNestedTags = false; doc.OptionAutoCloseOnEnd = false; doc.OptionCheckSyntax = false; HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//img[@src]"); if (collection == null) return content; var changed = false; foreach (HtmlNode img in collection) { HtmlAttribute src = img.Attributes["src"]; HtmlAttribute cls = img.Attributes["class"]; if ((src != null && src.Value.IndexOf("slimmage=true", StringComparison.OrdinalIgnoreCase) > -1) || (cls != null && cls.Value.IndexOf("slimmage", StringComparison.OrdinalIgnoreCase) > -1)) { // - append fallback image HtmlNode container = doc.CreateElement("noscript"); container.SetAttributeValue("data-slimmage", "true"); //copy attributes for IE6/7/8 support foreach (var a in img.Attributes) { container.SetAttributeValue("data-img-" + a.Name, a.Value); } //Place 'img' inside 'noscript' img.ParentNode.InsertBefore(container, img); img.Remove(); container.AppendChild(img); changed = true; } } //Don't modify the DOM unless you actually edited the HTML return changed ? doc.DocumentNode.OuterHtml : content; } catch(Exception ex) { Trace.TraceWarning("SlimResponse failed to parse HTML: " + ex.ToString() + ex.StackTrace); // - better that nothing(tm) ... //return content; return ex.ToString(); } }
public string ReplaceDeletedImageBySpanCoupons(string htmlContent) { HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(htmlContent); foreach (HtmlNode img in doc.DocumentNode.SelectNodes("//img[@class='" + hdnImageClass.Value + "']")) { string value = img.Attributes.Contains("value") ? img.Attributes["value"].Value : " "; HtmlNode lbl = doc.CreateElement("span"); lbl.Attributes.Add("class", hdnImageClass.Value); lbl.InnerHtml = value; img.ParentNode.ReplaceChild(lbl, img); } return doc.DocumentNode.OuterHtml; }
public string ReplaceDeletedIframeBySpan(string htmlContent) { HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(htmlContent); foreach (HtmlNode img in doc.DocumentNode.SelectNodes("//iframe")) { HtmlNode lbl = doc.CreateElement("span"); lbl.Attributes.Add("class", "IF"); lbl.Attributes.Add("height", "187"); lbl.Attributes.Add("width", "240"); img.ParentNode.ReplaceChild(lbl, img); } return doc.DocumentNode.OuterHtml; }
public override TaskExecutionDetails Execute(string Value) { TaskExecutionDetails d = new TaskExecutionDetails(); HtmlDocument doc = new HtmlDocument(); doc.Load(IO.IOHelper.MapPath(SystemDirectories.Masterpages) + "/" + TargetFile); //if (doc.DocumentNode.SelectSingleNode(string.Format("//link [@href = '{0}']", Value)) == null) //{ HtmlNode target = doc.DocumentNode.SelectSingleNode(string.IsNullOrEmpty(TargetSelector) ? "//head" : TargetSelector.ToLower()); if (target != null) { HtmlNode s = doc.CreateElement("link"); //s.Name = "link"; s.Attributes.Append("rel", "stylesheet"); s.Attributes.Append("type", "text/css"); s.Attributes.Append("href", Value); if (!string.IsNullOrEmpty(Media)) s.Attributes.Append("media", Media); target.AppendChild(s); doc.Save(IO.IOHelper.MapPath(SystemDirectories.Masterpages) + "/" + TargetFile); d.TaskExecutionStatus = TaskExecutionStatus.Completed; d.NewValue = Value; } else d.TaskExecutionStatus = TaskExecutionStatus.Cancelled; //} //else // d.TaskExecutionStatus = TaskExecutionStatus.Cancelled; return d; }
public static HtmlNode CreateNode( HtmlDocument document, string name, string className = null, HtmlNode parentNode = null) { var node = document.CreateElement(name); if (!string.IsNullOrWhiteSpace(className)) { node.SetAttributeValue("class", className); } if (parentNode != null) { parentNode.AppendChild(node); } return node; }
public void requestUpdatedDinoTable(string search) { //Create the HTML table for the DINOSAURS var doc = new HtmlAgilityPack.HtmlDocument(); var node = HtmlNode.CreateNode("<html><head></head><body></body></html>"); doc.DocumentNode.AppendChild(node); HtmlNode table = doc.CreateElement("table"); table.Attributes.Add("class", "mdl-data-table mdl-js-data-table mdl-data-table--selectable mdl-shadow--2dp dino_table_inner"); foreach (ArkDino dino in StaticVars.dinos) { if (search == "" || dino.name.ToLower().Contains(search.ToLower())) { HtmlNode entry = doc.CreateElement("tr"); entry.AddClass("dino_list_element"); var icon = doc.CreateElement("td"); icon.AddClass("mdl-data-table__cell--non-numeric"); var iconImg = doc.CreateElement("img"); iconImg.Attributes.Add("src", dino.img); icon.AppendChild(iconImg); entry.AppendChild(icon); //name var name = doc.CreateElement("td"); name.AddClass("mdl-data-table__cell--non-numeric"); name.InnerHtml = dino.name; entry.AppendChild(name); //Buttons var btns = doc.CreateElement("td"); btns.AddClass("mdl-data-table__cell--non-numeric"); btns.AddClass("dino_list_element_btns"); string dinoString = Convert.ToBase64String(Encoding.ASCII.GetBytes(RpTools.SerializeObject(dino))); btns.InnerHtml = "<button class=\"mdl-button mdl-js-button mdl-button--raised\" onclick=\"SpawnCharacter('" + dinoString + "');\"> Spawn </button>"; entry.AppendChild(btns); //Add this to the table. table.AppendChild(entry); } } //Convert this to the html. string html = table.WriteTo(); //Send this back in base64 because it's a lot of effort to escape it. var plainTextBytes = System.Text.Encoding.UTF8.GetBytes(html); StaticVars.browser.ExecuteScriptAsync("FinishRefreshTable('" + System.Convert.ToBase64String(plainTextBytes) + "');"); }
public static string GetPreview(string html, string replacmentHtml, Guid productID) { var doc = new HtmlDocument(); doc.LoadHtml(string.Format("<html>{0}</html>", htmlTags.Replace(html, string.Empty))); var nodes = doc.DocumentNode.SelectNodes("//div[translate(@class,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz')='asccut']"); if (nodes != null) { foreach (var node in nodes) { var newNode = doc.CreateElement("div"); var styleAttr = doc.CreateAttribute("style"); styleAttr.Value = "display:inline;"; newNode.Attributes.Append(styleAttr); newNode.InnerHtml = replacmentHtml ?? string.Empty; node.ParentNode.ReplaceChild(newNode, node); } } ProcessCustomTags(doc, productID); return htmlTags.Replace(doc.DocumentNode.InnerHtml, string.Empty); }
public void Download(HtmlDocument document, string path) { Directory.CreateDirectory(ResourcePath); //Descargar todos los recursos _downloadImages(document); _downloadResources(document, "link", "href", true); _downloadResources(document, "script", "src"); //Añadir cabeceras meta para indificar codificación UTF8 HtmlNode meta = document.CreateElement("meta"); meta.Attributes.Add("charset", "utf-8"); document.DocumentNode.SelectSingleNode("//head").ChildNodes.Add(meta); /*HtmlNode meta2 = document.CreateElement("meta"); meta2.Attributes.Add("http-equiv", "Content-Type"); meta2.Attributes.Add("content", "Type=text/html; charset=utf-8"); document.DocumentNode.SelectSingleNode("//head").ChildNodes.Add(meta2);*/ File.WriteAllText(path, document.DocumentNode.WriteTo(), new UTF8Encoding(true)); }
private void downloadProductImages(string productUrl, string folderName) { Regex urlRegex = new Regex(@"(?:^|\?|&)id=(\d*)"); Match m = urlRegex.Match(productUrl); string mvalue = m.Value; string productId = m.Value.Substring(mvalue.IndexOf("id=") + 3); TmallClient client = new TmallClient(); string description = client.getProductDesc(productId); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); HtmlNode productDesc = doc.CreateElement("description"); productDesc.InnerHtml = description; IEnumerable<HtmlNode> imageList = productDesc.Descendants("img"); string path = tbImageFolder.Text + "\\" + folderName; foreach (HtmlNode imageNode in imageList) { string imageUrl = imageNode.GetAttributeValue("src", ""); string imageName = imageUrl.Substring(imageUrl.LastIndexOf("/") + 1); saveImage(path, imageName, imageUrl); } }
public void Download(HtmlDocument document, string path) { Directory.CreateDirectory(ResourcePath); //Descargar todos los recursos _downloadImages(document); _downloadResources(document, "link", "href", true); _downloadResources(document, "script", "src"); //Añadir cabeceras meta para indificar codificación UTF8 HtmlNode meta = document.CreateElement("meta"); meta.Attributes.Add("charset", "utf-8"); document.DocumentNode.SelectSingleNode("//head").ChildNodes.Add(meta); /*HtmlNode meta2 = document.CreateElement("meta"); * meta2.Attributes.Add("http-equiv", "Content-Type"); * meta2.Attributes.Add("content", "Type=text/html; charset=utf-8"); * document.DocumentNode.SelectSingleNode("//head").ChildNodes.Add(meta2);*/ File.WriteAllText(path, document.DocumentNode.WriteTo(), new UTF8Encoding(true)); }
protected virtual HtmlNode GetBodyNode(HtmlDocument htmlDoc) { var bodyNode = htmlDoc.CreateElement("body"); var headNode = htmlDoc.DocumentNode.SelectSingleNode("//head"); if (headNode != null) { var linkNodes = headNode.SelectNodes("//link[@href] | //script[@src]"); bodyNode.AppendChildren(linkNodes); } var rawBodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body"); if (rawBodyNode == null) { bodyNode.AppendChild(htmlDoc.DocumentNode); } else { bodyNode.AppendChildren(rawBodyNode.ChildNodes); } return bodyNode; }
static private void start_Operation() { while (true) { if (file_Paths.Count == 0) { System.Threading.Thread.Sleep(1000); } else { try { //System.Threading.Interlocked.Increment(ref word_Application_Count); while (file_Paths.Count > 0) { string temp_String = ""; if (file_Paths.TryDequeue(out temp_String)) { string[] temp_Collection = temp_String.Split(new string[] { "|^|" }, StringSplitOptions.None); string file_Path = temp_Collection[0]; string web_Page_Base_Address = temp_Collection[1]; string folder_Path = file_Path.Substring(0, file_Path.LastIndexOf("\\")); string file_Name = file_Path.Substring(file_Path.LastIndexOf("\\") + 1); if (!System.IO.File.Exists(file_Path)) { return;// continue; } string file_Name_Htm = folder_Path + "\\HTML\\" + System.IO.Path.ChangeExtension(file_Name, "htm"); if (System.IO.File.Exists(file_Name_Htm)) { return;// continue; } Microsoft.Office.Interop.Word._Document doc = app.Documents.Open(file_Path, Visible: false); //Console.WriteLine(file_Name_Htm); doc.SaveAs(FileName: file_Name_Htm, FileFormat: Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatHTML); System.Threading.ThreadPool.QueueUserWorkItem((state) => { HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); // There are various options, set as needed htmlDoc.OptionFixNestedTags = true; // filePath is a path to a file containing the html //string filePath = @"C:\Documents and Settings\prabhakaran\Desktop\rv_3b72167e18a54fe28a330c5fbecc222f.htm"; string filePath = file_Name_Htm; while (true) { try { htmlDoc.Load(filePath); break; } catch (Exception excp) { } } // Use: htmlDoc.LoadXML(xmlString); to load from a string // ParseErrors is an ArrayList containing any errors from the Load statement if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0) { // Handle any parse errors as required } else { if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body"); if (bodyNode != null) { bodyNode.SetAttributeValue("onload", "var temp_Element_Pdf = document.getElementById('PDF Link');temp_Element_Pdf.style.width=screen.width/2;temp_Element_Pdf.style.height='50';var temp_Element_Doc = document.getElementById('DOC Link');temp_Element_Doc.style.width=screen.width/2;temp_Element_Doc.style.height='50';temp_Element_Doc.style.position='absolute';"); HtmlNode pdf_Link = htmlDoc.CreateElement("button"); //new HtmlNode(HtmlNodeType.Element, htmlDoc, 0); pdf_Link.SetAttributeValue("value", "PDF"); pdf_Link.SetAttributeValue("id", "PDF Link"); string pdf_Path = web_Page_Base_Address + "Local_Database/PDF/" + System.IO.Path.ChangeExtension(file_Name, "pdf"); pdf_Link.SetAttributeValue("onclick", "window.open('" + pdf_Path + "','_blank','location=no,menubar=no,status=no,toolbar=no');"); pdf_Link.InnerHtml = "PDF"; pdf_Link.SetAttributeValue("style", "background-color:#262626;color:#747474;border-style:groove;border-color:#121212;-moz-appearance:none;"); pdf_Link.SetAttributeValue("onmouseover", "this.style.backgroundColor='#262626';this.style.color='#adbdde';this.style.fontWeight='bold';this.style.fontSize='normal';this.style.borderColor='#afafaf';"); //#afafaf pdf_Link.SetAttributeValue("onmouseout", "this.style.backgroundColor='#262626';this.style.color='#747474';this.style.fontWeight='normal';this.style.fontSize='normal';this.style.borderColor='#121212';"); bodyNode.ChildNodes.Add(pdf_Link); HtmlNode doc_Link = htmlDoc.CreateElement("button"); //new HtmlNode(HtmlNodeType.Element, htmlDoc, 0); doc_Link.SetAttributeValue("value", "DOC"); doc_Link.SetAttributeValue("id", "DOC Link"); string doc_Path = web_Page_Base_Address + "Local_Database/DOC/" + System.IO.Path.ChangeExtension(file_Name, "doc"); doc_Link.SetAttributeValue("onclick", "window.open('" + doc_Path + "','_blank','location=no,menubar=no,status=no,toolbar=no');"); doc_Link.InnerHtml = "DOC"; doc_Link.SetAttributeValue("style", "background-color:#262626;color:#747474;border-style:groove;border-color:#121212;-moz-appearance:none;"); doc_Link.SetAttributeValue("onmouseover", "this.style.backgroundColor='#262626';this.style.color='#adbdde';this.style.fontWeight='bold';this.style.fontSize='normal';this.style.borderColor='#afafaf';"); //#afafaf doc_Link.SetAttributeValue("onmouseout", "this.style.backgroundColor='#262626';this.style.color='#747474';this.style.fontWeight='normal';this.style.fontSize='normal';this.style.borderColor='#121212';"); bodyNode.ChildNodes.Add(doc_Link); //htmlDoc.Save(@"C:\Documents and Settings\prabhakaran\Desktop\new.html"); htmlDoc.Save(file_Name_Htm); // Do something with bodyNode } } }// *** }); string file_Name_Pdf = folder_Path + "\\PDF\\" + System.IO.Path.ChangeExtension(file_Name, "pdf"); //Console.WriteLine(file_Name_Pdf); doc.SaveAs(FileName: file_Name_Pdf, FileFormat: Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatPDF); //Console.WriteLine(folder_Path + "\\DOC\\" + file_Name); ((Microsoft.Office.Interop.Word._Document)doc).Close(SaveChanges: Microsoft.Office.Interop.Word.WdSaveOptions.wdDoNotSaveChanges); while (true) { try { System.IO.File.Move(file_Path, folder_Path + "\\DOC\\" + file_Name); break; } catch (Exception excp) { } } } } //System.Threading.Interlocked.Decrement(ref word_Application_Count); } catch (Exception excp) { Console.WriteLine(excp.Message); } } } }
public static void GetQuoteAndReply ( HtmlNode t , Post p , HtmlDocument doc ) { HtmlNode div; HtmlNodeCollection quotes; div = t.SelectSingleNode ( ".//td[@class='alt1']" ).SelectSingleNode ( "./div[@id]" ); if ( div == null ) { p.SetFullPost ( t.SelectSingleNode ( ".//td[@class='alt1']" ).InnerText.Trim () ); } else { //replace <br> with new line HAP.ReplaceNewLine ( div ); //if post have color, remove it HAP.RemovePostColor ( div ); //set emo link HAP.SetEmoLink ( div , p ); if ( UserData.settings.GetValueOrDefault ( AppSettings.keyEmo , true ) == false ) HAP.ReplaceEmoWithText ( div ); //edit link HAP.EditLink ( div , p ); //replace img HAP.ReplaceImage ( div , p ); //check quotes quotes = div.SelectNodes ( "./div[@style='margin:20px; margin-top:5px; ']" ); if ( quotes != null ) { foreach ( HtmlNode quote in quotes ) { string s = HAP.ProcessQuote ( quote , doc ); HtmlNode newNode = doc.CreateElement ( "title" ); newNode.InnerHtml = HtmlDocument.HtmlEncode ( s ); quote.ParentNode.ReplaceChild ( newNode , quote ); } } p.content = HtmlEntity.DeEntitize ( div.InnerText.Trim () ); p.ReplaceImage (); p.ReplaceNewLine (); p.ReplaceLink (); p.SetFullPost ( p.content ); } }
public static string ProcessQuote ( HtmlNode quote , HtmlDocument doc ) { RemoveViewPost ( quote ); string quoteContent = ""; quoteContent = Model.Post.GetQuoteBorderTop (); HtmlNode td = quote.SelectSingleNode ( "./table" ).SelectSingleNode ( ".//td[@class='alt2']" ); if ( td != null ) { HtmlNodeCollection quotes = td.SelectNodes ( "./div[@style='margin:20px; margin-top:5px; ']" ); if ( quotes != null ) { foreach ( HtmlNode q in quotes ) { string s = ProcessQuote ( q , doc ); HtmlNode newNode = doc.CreateElement ( "title" ); newNode.InnerHtml = HtmlDocument.HtmlEncode ( s ); q.ParentNode.ReplaceChild ( newNode , q ); } } //check if quote has user and link HtmlNode strongUserName = td.SelectSingleNode ( ".//strong" ); if ( strongUserName != null )//user quote exist { string quoteUser = "******" + HtmlEntity.DeEntitize ( strongUserName.InnerText.Trim () ) + "</b>"; quoteContent += quoteUser + "<br>"; td.RemoveChild ( td.Element ( "div" ) ); } quoteContent += HtmlEntity.DeEntitize ( td.InnerText.Trim () ); quoteContent += Model.Post.GetQuoteBorderBottom (); } else quoteContent += HtmlEntity.DeEntitize ( td.InnerText.Trim () ).Trim (); return quoteContent; }
private static void ProcessZoomImages(HtmlDocument doc, Guid productID) { HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//img[@_zoom]"); HtmlNode hrefNode; HtmlAttribute borderAttribute, hrefAttribute, srcAttribute, zoomAttribute; string imgSrc = string.Empty; if (nodes == null) return; foreach (HtmlNode node in nodes) { srcAttribute = node.Attributes["src"]; if (srcAttribute == null || string.IsNullOrEmpty(srcAttribute.Value)) continue; zoomAttribute = node.Attributes["_zoom"]; if (zoomAttribute == null || string.IsNullOrEmpty(zoomAttribute.Value)) continue; borderAttribute = node.Attributes["border"]; if (borderAttribute == null) { borderAttribute = doc.CreateAttribute("border"); node.Attributes.Append(borderAttribute); } borderAttribute.Value = "0"; imgSrc = srcAttribute.Value; if (!rxNumeric.IsMatch(zoomAttribute.Value)) { imgSrc = zoomAttribute.Value; } if (node.ParentNode != null) { hrefNode = doc.CreateElement("a"); hrefAttribute = doc.CreateAttribute("href"); hrefAttribute.Value = imgSrc; hrefNode.Attributes.Append(hrefAttribute); hrefAttribute = doc.CreateAttribute("class"); hrefAttribute.Value = "fancyzoom"; hrefNode.Attributes.Append(hrefAttribute); /* hrefAttribute = doc.CreateAttribute("onclick"); hrefAttribute.Value = string.Format(@"javascript:if(typeof(popimgFckup) == 'function')popimgFckup('{0}');", srcAttribute.Value); hrefNode.Attributes.Append(hrefAttribute);*/ node.ParentNode.ReplaceChild(hrefNode, node); hrefNode.AppendChild(node); } } }
/// <summary> /// Replaces the base href for some HTML supplied in a string. /// </summary> /// <param name="content">HTML content to have its base href replaced.</param> /// <param name="baseUri">New URI for the base href.</param> /// <returns>The content with a base element pointing at baseUri.</returns> public static string SetBaseUri(string content, string baseUri) { // Quick and dirty method: Just do a search and replace on a <head> element. // if (content.IndexOf("<head>") >= 0) // return content.Replace("<head>", String.Format("<head><base href=\"{0}\" />", response.ResponseUri.AbsoluteUri)); // // if (content.IndexOf("<HEAD>") >= 0) // return content.Replace("<HEAD>", String.Format("<HEAD><base href=\"{0}\" />", response.ResponseUri.AbsoluteUri)); // Load the content into an HTML document HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(content); // find the head element (if there is one) // first, look for /html/head HtmlNode head = doc.DocumentNode.SelectSingleNode("/html/head"); if (head == null) // now look for /head head = doc.DocumentNode.SelectSingleNode("/head"); if (head == null) { // still no head element - create one and insert it head = doc.CreateElement("head"); doc.DocumentNode.PrependChild(head); } if (head != null) { // look for a base element in the head HtmlNode baseElement = head.SelectSingleNode("base"); if (baseElement == null) { // not there - create one and insert baseElement = doc.CreateElement("base"); head.PrependChild(baseElement); } // set the base element's href attribute to the new baseUri baseElement.SetAttributeValue("href", baseUri); } else log.Warn("Could not find or create a head element."); // convert the document back to a string and return return doc.DocumentNode.OuterHtml; }
public void ParseTagForInnerHTMLTest() { // arrange string uriPrefix = string.Empty; HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(this.htmlFileContents); doc.OptionUseIdAttribute = true; HtmlNode node = doc.CreateElement("span"); node.InnerHtml = "Access Control"; HtmlParser target = new HtmlParser(uriPrefix); string expected = "Access Control"; string actual; // act actual = target.InnerHTML(node); // assert Assert.AreEqual(expected, actual); }
public static HtmlString Sanitize(this string html) { // Run it through Markdown first var md = new Markdown(); html = md.Transform(html); // Linkify images if they are shown as resized versions (only relevant for new Markdown comments) var doc = new HtmlDocument(); doc.LoadHtml(html); var root = doc.DocumentNode; if (root != null) { var images = root.SelectNodes("//img"); if (images != null) { foreach (var image in images) { var src = image.GetAttributeValue("src", ""); var orgSrc = src.Replace("rs/", ""); if (src == orgSrc || image.ParentNode.Name == "a") continue; var a = doc.CreateElement("a"); a.SetAttributeValue("href", orgSrc); a.SetAttributeValue("target", "_blank"); a.AppendChild(image.Clone()); image.ParentNode.ReplaceChild(a, image); } } // Any links not going to an "approved" domain need to be marked as nofollow var links = root.SelectNodes("//a"); if (links != null) { foreach (var link in links) { if (link.Attributes["href"] != null && (SpamChecker.CountValidLinks(link.Attributes["href"].Value, 0) == 0)) { if (link.Attributes["rel"] != null) { link.Attributes.Remove("rel"); } link.Attributes.Add("rel", "nofollow"); } } } // Remove styles from all elements var elementsWithStyleAttribute = root.SelectNodes("//@style"); if (elementsWithStyleAttribute != null) { foreach (var element in elementsWithStyleAttribute) { element.Attributes.Remove("style"); } } using (var writer = new StringWriter()) { doc.Save(writer); html = writer.ToString(); } } return new HtmlString(Utils.Sanitize(html)); }
/// <summary> /// This method needs heavy revision. /// /// Since element on which was module dropped is COM interface IHTMLElement /// and that seems like it's not supporting own module tag (it gets removed on whatever operation), /// this method adds id to IHTMLElement(if needed), to be able to identify it in HtmlAgilityPack HtmlDocument /// then adds preview output as end to that element, then converts document back to COM IHTMLDocument2 /// /// Doh. /// /// TODO: Could use hard refactoring, probably own customized html editor /// </summary> /// <param name="sender"></param> /// <param name="e"></param> void theSite_drop(DataObject sender, DragEventArgs e) { CFormController.Instance.mainForm.setStatus("Module(s) Added"); if (sender.GetData("System.Windows.Forms.ListView+SelectedListViewItemCollection", false) != null) { // Get module preview (multiple modules can be dragged) String input = String.Empty; ListView.SelectedListViewItemCollection listViewItemModules = (ListView.SelectedListViewItemCollection)sender.GetData("System.Windows.Forms.ListView+SelectedListViewItemCollection", false); foreach (ListViewItem listViewItemModule in listViewItemModules) { input += CXMLParser.Instance.GetPreviewFromProjectXML(CXMLParser.Instance.GetNodeFromModule(CModuleReader.Instance.GetModuleInstanceFromName(listViewItemModule.Text)).OuterHtml); } // Get relative drop location Point htmlEditorCorner = htmlEditor1.PointToScreen(new Point(0, 0)); int X = e.X - htmlEditorCorner.X; int Y = e.Y - htmlEditorCorner.Y; // Get element on which module was dropped IHTMLElement hoverElem = htmlEditor1.HtmlDocument2.ElementFromPoint(X, Y); IHTMLElement moduleElem = null; // If it gets dropped on module, pass its parent element instead if (CRestrictedEditDesigner.isModule(hoverElem, out moduleElem)) { hoverElem = moduleElem; } if (hoverElem.tagName.Equals("BODY")) { Debug.WriteLine("dropped on body"); if (hoverElem.innerText == null && hoverElem.innerHTML == null) { htmlEditor1.LoadDocument("<body>" + input + "</body>"); } else { htmlEditor1.LoadDocument("<body>" + hoverElem.innerHTML + input + "</body>"); } } else { Debug.WriteLine("dropped on " + hoverElem.tagName); //Mshtml deletes <module> in element load, //uhm so it has to be converted to HtmlAgilityPack.HtmlDocument //and then back String guid = Guid.NewGuid().ToString(); Boolean idChanged; if (hoverElem.id == null) { hoverElem.id = guid; idChanged = true; } else { guid = hoverElem.id; idChanged = false; } // Get wanted element and modify its content HtmlAgilityPack.HtmlDocument htmlDoc = HTMLDocumentConverter.mshtmlDocToAgilityPackDoc(htmlEditor1.HtmlDocument2); HtmlAgilityPack.HtmlNode node = htmlDoc.GetElementbyId(guid); // Dont remove id if it was there before if (idChanged) { node.Attributes.Remove("id"); } // Need to create element, because HtmlNode dont have OuterHtml settable HtmlNode addedModulesNode = htmlDoc.CreateElement("div"); addedModulesNode.InnerHtml = input; try { // Well, this sometimes fails.. god knows why htmlDoc.DocumentNode.InsertAfter(addedModulesNode, node); } catch (Exception) { // So if it fails, add module in the end of parent module node.ParentNode.InnerHtml += input; } // And back to IHTMLDocument htmlEditor1.LoadDocument("<body>" + htmlDoc.DocumentNode.InnerHtml + "</body>"); } } }
private static List<HelpCenterItem> ParseHelpCenterHtml(string html, string helpLinkBlock) { var helpCenterItems = new List<HelpCenterItem>(); if (string.IsNullOrEmpty(html)) return helpCenterItems; var doc = new HtmlDocument(); doc.LoadHtml(html); var urlHelp = CommonLinkUtility.GetHelpLink(false); var mainContent = doc.DocumentNode.SelectSingleNode("//div[@class='MainHelpCenter GettingStarted']"); if (mainContent == null) return helpCenterItems; var blocks = (mainContent.SelectNodes(".//div[@class='gs_content']")) .Where(r => r.Attributes["id"] != null) .Select(x => x.Attributes["id"].Value).ToList(); foreach (var block in mainContent.SelectNodes(".//div[@class='gs_content']")) { var hrefs = block.SelectNodes(".//a[@href]") .Where(r => { var value = r.Attributes["href"].Value; return r.Attributes["href"] != null && !string.IsNullOrEmpty(value) && !value.StartsWith("mailto:") && !value.StartsWith("http"); }); foreach (var href in hrefs) { var value = href.Attributes["href"].Value; if (value.IndexOf("#", StringComparison.Ordinal) != 0 && value.Length > 1) { href.Attributes["href"].Value = urlHelp + value.Substring(1); href.SetAttributeValue("target", "_blank"); } else { if (!blocks.Contains(value.Substring(1))) continue; href.Attributes["href"].Value = helpLinkBlock + blocks.IndexOf(value.Substring(1)).ToString(CultureInfo.InvariantCulture); } } var images = block.SelectNodes(".//img"); if (images != null) { foreach (var img in images.Where(img => img.Attributes["src"] != null)) { img.Attributes["src"].Value = GetInternalLink(urlHelp + img.Attributes["src"].Value); } foreach (var screenPhoto in images.Where(img => img.Attributes["class"] != null && img.Attributes["class"].Value.Contains("screenphoto") && img.Attributes["target"] != null && img.ParentNode != null)) { var bigphotoScreenId = screenPhoto.Attributes["target"].Value; var bigphotoScreen = images.FirstOrDefault(img => img.Attributes["id"] != null && img.Attributes["id"].Value == bigphotoScreenId && img.Attributes["class"] != null && img.Attributes["class"].Value.Contains("bigphoto_screen") && img.Attributes["src"] != null); if (bigphotoScreen == null) continue; var hrefNode = doc.CreateElement("a"); var hrefAttribute = doc.CreateAttribute("href"); hrefAttribute.Value = bigphotoScreen.Attributes["src"].Value; hrefNode.Attributes.Append(hrefAttribute); hrefAttribute = doc.CreateAttribute("class"); hrefAttribute.Value = "screenzoom"; hrefNode.Attributes.Append(hrefAttribute); hrefAttribute = doc.CreateAttribute("rel"); hrefAttribute.Value = "imageHelpCenter"; hrefNode.Attributes.Append(hrefAttribute); screenPhoto.ParentNode.ReplaceChild(hrefNode, screenPhoto); hrefNode.AppendChild(screenPhoto); } } var titles = block.SelectSingleNode(".//h2"); var contents = block.SelectSingleNode(".//div[@class='PortalHelp']"); if (titles != null && contents != null) { helpCenterItems.Add(new HelpCenterItem { Title = titles.InnerText, Content = contents.InnerHtml }); } } return helpCenterItems; }
private static void ProcessZoomImages(HtmlDocument doc) { var nodes = doc.DocumentNode.SelectNodes("//img[@_zoom]"); if (nodes == null) return; foreach (var node in nodes) { var srcAttribute = node.Attributes["src"]; if (srcAttribute == null || string.IsNullOrEmpty(srcAttribute.Value)) continue; var zoomAttribute = node.Attributes["_zoom"]; if (zoomAttribute == null || string.IsNullOrEmpty(zoomAttribute.Value)) continue; var borderAttribute = node.Attributes["border"]; if (borderAttribute == null) { borderAttribute = doc.CreateAttribute("border"); node.Attributes.Append(borderAttribute); } borderAttribute.Value = "0"; var imgSrc = srcAttribute.Value; if (!RxNumeric.IsMatch(zoomAttribute.Value)) { imgSrc = zoomAttribute.Value; } if (node.ParentNode != null) { var hrefNode = doc.CreateElement("a"); var hrefAttribute = doc.CreateAttribute("href"); hrefAttribute.Value = imgSrc; hrefNode.Attributes.Append(hrefAttribute); hrefAttribute = doc.CreateAttribute("class"); hrefAttribute.Value = "screenzoom"; hrefNode.Attributes.Append(hrefAttribute); /* hrefAttribute = doc.CreateAttribute("onclick"); hrefAttribute.Value = string.Format(@"javascript:if(typeof(popimgFckup) == 'function')popimgFckup('{0}');", srcAttribute.Value); hrefNode.Attributes.Append(hrefAttribute);*/ node.ParentNode.ReplaceChild(hrefNode, node); hrefNode.AppendChild(node); } } }
public static string SetBaseUri(string content, string baseUri) { // if (content.IndexOf("<head>") >= 0) // return content.Replace("<head>", String.Format("<head><base href=\"{0}\" />", response.ResponseUri.AbsoluteUri)); // // if (content.IndexOf("<HEAD>") >= 0) // return content.Replace("<HEAD>", String.Format("<HEAD><base href=\"{0}\" />", response.ResponseUri.AbsoluteUri)); // Tidy HTML and rebase HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(content); HtmlNode head = doc.DocumentNode.SelectSingleNode("/html/head"); if (head == null) head = doc.DocumentNode.SelectSingleNode("/head"); if (head == null) { head = doc.CreateElement("head"); doc.DocumentNode.PrependChild(head); } if (head != null) { HtmlNode baseElement = head.SelectSingleNode("base"); if (baseElement == null) { baseElement = doc.CreateElement("base"); head.PrependChild(baseElement); } baseElement.SetAttributeValue("href", baseUri); } return doc.DocumentNode.OuterHtml; }
private void CreatePlayer(ClipData clip, string folder) { MetaDataData[] metadata = mediamanager.GetClipMetaData(clip.ID); List<MetaDataData> indices = metadata .Where(x => x.TimeStamp > 0 && x.Payload.GetType().Name == "AgendaItem" ) .OrderBy(x => x.TimeStamp) .ToList<MetaDataData>(); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.Load(folder + @"\play.html"); foreach (HtmlNode div in doc.DocumentNode.SelectNodes("//span[@id='title']")) { div.InnerHtml = clip.Name; } foreach (HtmlNode div in doc.DocumentNode.SelectNodes("//ul[@id='jumpto']")) { indices.ForEach(index => { HtmlNode li = doc.CreateElement("li"); div.AppendChild(li); HtmlNode link = doc.CreateElement("a"); link.Attributes.Add("data-rel", index.TimeStamp.ToString()); link.Attributes.Add("href", "#"); link.InnerHtml = index.Name; li.AppendChild(link); }); } doc.Save(folder+ @"\play.html"); }
public static string Sanitize(string html) { // Run it through Markdown first var md = new Markdown(); html = md.Transform(html); // Linkify images if they are shown as resized versions (only relevant for new Markdown comments) var doc = new HtmlDocument(); doc.LoadHtml(html); var root = doc.DocumentNode; if (root != null) { var images = root.SelectNodes("//img"); if (images != null) { var replace = false; foreach (var image in images) { var src = image.GetAttributeValue("src", ""); var orgSrc = src.Replace("rs/", ""); if (src == orgSrc || image.ParentNode.Name == "a") continue; var a = doc.CreateElement("a"); a.SetAttributeValue("href", orgSrc); a.SetAttributeValue("target", "_blank"); a.AppendChild(image.Clone()); image.ParentNode.ReplaceChild(a, image); replace = true; } if (replace) { html = root.OuterHtml; } } } return Utills.Sanitize(html); }
private static void ProcessZoomImages(HtmlDocument doc) { var nodes = doc.DocumentNode.SelectNodes("//img[@_zoom]"); if (nodes == null) return; foreach (var node in nodes) { if (node.ParentNode != null && (node.ParentNode.Name ?? "").ToLower() == "a") continue; var srcAttribute = node.Attributes["src"]; if (srcAttribute == null || string.IsNullOrEmpty(srcAttribute.Value)) continue; var zoomAttribute = node.Attributes["_zoom"]; if (zoomAttribute == null || string.IsNullOrEmpty(zoomAttribute.Value)) continue; var borderAttribute = node.Attributes["border"]; if (borderAttribute == null) { borderAttribute = doc.CreateAttribute("border"); node.Attributes.Append(borderAttribute); } borderAttribute.Value = "0"; var imgSrc = srcAttribute.Value; if (!RxNumeric.IsMatch(zoomAttribute.Value)) { imgSrc = zoomAttribute.Value; } if (node.ParentNode != null) { var hrefNode = doc.CreateElement("a"); var hrefAttribute = doc.CreateAttribute("href"); hrefAttribute.Value = imgSrc; hrefNode.Attributes.Append(hrefAttribute); hrefAttribute = doc.CreateAttribute("class"); hrefAttribute.Value = "screenzoom"; hrefNode.Attributes.Append(hrefAttribute); string title = null; var titleAttribute = node.Attributes["title"]; if (titleAttribute != null) { title = titleAttribute.Value; } else { var altAttribute = node.Attributes["alt"]; if (altAttribute != null) { title = altAttribute.Value; } } if (!string.IsNullOrEmpty(title)) { hrefAttribute = doc.CreateAttribute("title"); hrefAttribute.Value = title; hrefNode.Attributes.Append(hrefAttribute); } node.ParentNode.ReplaceChild(hrefNode, node); hrefNode.AppendChild(node); } } }
private void HtmlEdit(string path, bool DoTransfer, bool ToTraidional) { // string fullpath = @"temp\unzipping\" + OpfBasedPath + @"\" + url; FileStream xhtmlloder = new FileStream(path, FileMode.Open, FileAccess.Read); StreamReader sr = new StreamReader(xhtmlloder); string source = sr.ReadToEnd(); source = HttpUtility.HtmlDecode(source); sr.Close(); xhtmlloder.Close(); var doc = new HtmlAgilityPack.HtmlDocument(); // doc.OptionOutputAsXml = true; bool hasCSSStylesheet = false; doc.OptionWriteEmptyNodes = true; doc.LoadHtml(source); foreach (HtmlNode node in doc.DocumentNode.ChildNodes) { if (node.Name == "html") { foreach (HtmlNode bodynode in node.ChildNodes) { if (bodynode.Name == "head") { foreach (HtmlNode item in bodynode.ChildNodes) { if (item.Name == "link") { if (item.Attributes["rel"].Value == "stylesheet" && item.Attributes["type"].Value == "text/css") { hasCSSStylesheet = true; } } } if (hasCSSStylesheet == false) { HtmlNode verticalstyle = doc.CreateElement("style"); verticalstyle.InnerHtml = ChoHoe.Properties.Resources.VerticalStyle; bodynode.AppendChild(verticalstyle); } } if (bodynode.Name == "body") { foreach (HtmlNode body_child_Node in bodynode.ChildNodes) { RecursivelyReplaceText(body_child_Node, ToTraidional, DoTransfer); if (isRemoveCss) { RemoveHtmlStyle(body_child_Node); } } } } } //if (ToTradictional == true) //{ // after = ChineseConverter.Convert(before, ChineseConversionDirection.SimplifiedToTraditional); //} //else //{ // after = ChineseConverter.Convert(before, ChineseConversionDirection.TraditionalToSimplified); //} //node.ParentNode.ReplaceChild(HtmlTextNode.CreateNode(after), node); //node.InnerText.Replace(node.InnerText,after); } // Console.WriteLine(doc.DocumentNode.InnerHtml); FileStream sw = new FileStream(path, FileMode.Create); // doc.DocumentNode.InnerHtml= HttpUtility.HtmlEncode(doc.ParsedText); doc.Save(sw, System.Text.Encoding.UTF8); sw.Close(); }
/// <summary> /// 下载html /// </summary> public void DownLoadHtml() { string sqlStr = "select Url from IcookMenu where IsDownload = 0"; object[] menuUrlObj = sh.GetField(sqlStr); string title, fullFoldPath = string.Empty; HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); ArrayList mainAList = hh.GetHtmlData(mainUrl, cookie); int htmlCount = 0; foreach (var menuUrl in menuUrlObj) { try { ArrayList menuResList = hh.GetHtmlData(menuUrl.ToString(), cookie); doc.LoadHtml(menuResList[1].ToString()); HtmlNode h1Node = doc.DocumentNode.SelectSingleNode("//div[@class='recipe-details-header-title']/h1[@class='title']"); title = myUtils.StringConvert(h1Node.InnerText).Trim();//标题 title = myUtils.FilterPath(title); fullFoldPath = outPath + title + @"\"; if (!Directory.Exists(fullFoldPath)) //判断是否存在 { Directory.CreateDirectory(fullFoldPath); //创建新路径 } HtmlNode headerNode = doc.DocumentNode.SelectSingleNode("//div[@class='recipe-details-header recipe-details-block']"); HtmlNode headerChild = doc.DocumentNode.SelectSingleNode("//div[@class='recipe-details-header recipe-details-block']/div[@class='header-row center-row']"); HtmlNode rightChild = doc.DocumentNode.SelectSingleNode("//div[@class='recipe-details-header recipe-details-block']/div[@class='header-row center-row']/div[@class='header-col right-col']"); headerChild.RemoveChild(rightChild);//删除右边 HtmlNode headerImgParentNode = doc.DocumentNode.SelectSingleNode("//div[@class='recipe-details-header recipe-details-block']/div[@class='header-row center-row']/div[@class='header-col left-col']/div[@class='recipe-cover']"); HtmlNode headerImgChild = doc.DocumentNode.SelectSingleNode("//img[@class='main-pic']"); string headerImgSrc = headerImgChild.GetAttributeValue("src", ""); myUtils.DownLoadImage(headerImgSrc, fullFoldPath + @"图片1.jpg", cookie); headerImgParentNode.RemoveAllChildren(); HtmlNode newheaderImgNode = doc.CreateElement("div"); newheaderImgNode.InnerHtml = $"图片{1}"; headerImgParentNode.AppendChild(newheaderImgNode); string headerHtml = headerNode.InnerHtml;//头部内容 HtmlNode mainNode = doc.DocumentNode.SelectSingleNode("//div[@class='recipe-details-main']"); HtmlNode mainChild = doc.DocumentNode.SelectSingleNode("//div[@class='recipe-details-main']/div[@class='recipe-ad-placeholder']"); mainNode.RemoveChild(mainChild); HtmlNodeCollection imgParentNodeList = doc.DocumentNode.SelectNodes("//div[@class='step-cover']"); int totalImgCount = 0, index = 0; if (imgParentNodeList != null) { totalImgCount = imgParentNodeList.Count(); for (int i = 1; i < imgParentNodeList.Count + 1; i++) { try { HtmlNode imgChildNode = imgParentNodeList[i - 1].SelectSingleNode("a"); string imgUrl = imgChildNode.GetAttributeValue("href", "").Replace("medium_", "large_"); myUtils.DownLoadImage(imgUrl, fullFoldPath + $"图片{i + 1}.jpg", cookie); imgParentNodeList[i - 1].RemoveAllChildren(); HtmlNode newImgNode = doc.CreateElement("div"); newImgNode.InnerHtml = $"图片{i + 1}"; imgParentNodeList[i - 1].AppendChild(newImgNode); index++; } catch (Exception ex) { myUtils.WriteLog(ex); } } } string mainStr = mainNode.InnerHtml;//主题内容 string allStr = headerHtml + mainStr; // sqlStr = $"UPDATE IcookMenu SET Title = '{title}', Html = '{allStr}' WHERE Url = '{menuUrl}'"; sqlStr = $"UPDATE IcookMenu SET Title = '{title}' WHERE Url = '{menuUrl}'"; sh.RunSql(sqlStr); bool isOk = false; if (totalImgCount == index) { if (myUtils.TransToWord(allStr, title, fullFoldPath)) { if (myUtils.InsertPictureToWord(outPath, title)) { sqlStr = $"UPDATE IcookMenu SET IsDownload = 1 WHERE Url = '{menuUrl}'"; sh.RunSql(sqlStr); htmlCount++; myUtils.UpdateLabel(label3, htmlCount); myUtils.UpdateListBox(listBox1, title); isOk = true; } } } if (!isOk) { if (Directory.Exists(fullFoldPath)) { Directory.Delete(fullFoldPath, true); } } } catch (Exception e) { myUtils.WriteLog(e); } } }
/// <summary> /// 下载html /// </summary> public void DownLoadHtml() { string sqlStr = "select Url from TeeprNewsUrl where IsDownload = 0"; object[] newsUrlObj = sh.GetField(sqlStr); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); ArrayList httpList = hh.GetHtmlData(mainUrl, cookie); string title, htmlStr, fullFoldPath = string.Empty; int htmlCount = 0; foreach (var newsUrl in newsUrlObj) { try { httpList = hh.GetHtmlData(newsUrl.ToString(), cookie); doc.LoadHtml(httpList[1].ToString()); HtmlNode titleNode = doc.DocumentNode.SelectSingleNode("//h1[@class='title single-title']"); title = titleNode.InnerText; title = myUtils.StringConvert(title).Trim(); title = myUtils.FilterPath(title); fullFoldPath = outPath + title + @"\"; if (!Directory.Exists(fullFoldPath)) { Directory.CreateDirectory(fullFoldPath); } HtmlNode mianContentNode = doc.DocumentNode.SelectSingleNode("//div[@class='post-single-wrapper']"); HtmlNodeCollection adNodeList = doc.DocumentNode.SelectNodes("//div[@class='post-single-wrapper']/div[@class='mid-post-ad-2']"); foreach (HtmlNode adNode in adNodeList) { try { mianContentNode.RemoveChild(adNode); } catch (Exception ex) { myUtils.WriteLog(ex); } } HtmlNodeCollection imgDivNodeList = doc.DocumentNode.SelectNodes("//div[@class='post-single-wrapper']/div"); HtmlNodeCollection imgList = doc.DocumentNode.SelectNodes("//div[@class='post-single-wrapper']/div/img"); string imgUrl = string.Empty; int totalImg = 0, imgCount = 0; if (imgDivNodeList != null) { totalImg = imgDivNodeList.Count(); for (int i = 0; i < imgDivNodeList.Count(); i++) { try { HtmlNode imgNode = imgDivNodeList[i].FirstChild; if (imgNode != null) { imgUrl = imgNode.GetAttributeValue("src", ""); if ((!string.IsNullOrEmpty(imgUrl) || imgUrl.Contains(".jpg") || imgUrl.Contains(".png") || imgUrl.Contains(".JPEG")) && (!imgUrl.Contains("video") && !imgUrl.Contains("width"))) { myUtils.DownLoadImage(imgUrl, fullFoldPath + $"图片{imgCount + 1}.jpg", cookie); imgDivNodeList[i].RemoveChild(imgNode); HtmlNode newImgNode = doc.CreateElement("div"); newImgNode.InnerHtml = $"图片{imgCount + 1}"; imgDivNodeList[i].AppendChild(newImgNode); imgCount++; } } } catch (Exception e) { myUtils.WriteLog(e); } } } htmlStr = mianContentNode.InnerHtml; sqlStr = $"UPDATE TeeprNewsUrl SET Title = '{title}' WHERE Url = '{newsUrl}'"; sh.RunSql(sqlStr); bool isOk = false; if (myUtils.TransToWord(htmlStr, title, fullFoldPath)) { if (myUtils.InsertPictureToWord(outPath, title)) { sqlStr = $"UPDATE TeeprNewsUrl SET IsDownload = 1 WHERE Url = '{newsUrl}'"; sh.RunSql(sqlStr); htmlCount++; myUtils.UpdateLabel(label3, htmlCount); myUtils.UpdateListBox(listBox1, title); isOk = true; } } if (!isOk) { if (Directory.Exists(fullFoldPath)) { Directory.Delete(fullFoldPath, true); } } } catch (Exception ew) { myUtils.WriteLog(ew); } } }
public void ParseTagForAttributeValueTest_input() { // arrange string uriPrefix = string.Empty; HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(this.htmlFileContents); doc.OptionUseIdAttribute = true; HtmlNode node = doc.CreateElement("input"); node.SetAttributeValue("value", "NSACSSEA"); HtmlParser target = new HtmlParser(uriPrefix); string returnAttributeName = "value"; string expected = "NSACSSEA"; string actual; // act actual = target.ParseTagForAttributeValue(node, returnAttributeName); // assert Assert.AreEqual(expected, actual); }