private static void UpdateHref(HtmlAgilityPack.HtmlNode link, string attribute, IDocumentBuildContext context, string relativePath) { var key = link.GetAttributeValue(attribute, null); string path; if (RelativePath.TryGetPathWithoutWorkingFolderChar(key, out path)) { var href = context.GetFilePath(key); var anchor = link.GetAttributeValue("anchor", null); if (href != null) { href = ((RelativePath)UpdateFilePath(href, relativePath)).UrlEncode(); if (!string.IsNullOrEmpty(anchor)) { href += anchor; link.Attributes.Remove("anchor"); } link.SetAttributeValue(attribute, href); } else { Logger.LogWarning($"File {path} is not found in {relativePath}."); // TODO: what to do if file path not exists? // CURRENT: fallback to the original one if (!string.IsNullOrEmpty(anchor)) { path += anchor; link.Attributes.Remove("anchor"); } link.SetAttributeValue(attribute, path); } } }
private static void UpdateHref(HtmlAgilityPack.HtmlNode link, string attribute, IDocumentBuildContext context, string relativePath) { var originalHref = link.GetAttributeValue(attribute, null); var anchor = link.GetAttributeValue("anchor", null); link.Attributes.Remove("anchor"); string href; var path = TypeForwardedToRelativePath.TryParse(originalHref); if (path?.IsFromWorkingFolder() == true) { var targetPath = (TypeForwardedToRelativePath)context.GetFilePath(path.UrlDecode()); if (targetPath != null) { href = (targetPath.RemoveWorkingFolder() - (TypeForwardedToRelativePath)relativePath).UrlEncode(); } else { Logger.LogWarning($"File {path} is not found in {relativePath}."); // TODO: what to do if file path not exists? // CURRENT: fallback to the original one href = (path.UrlDecode().RemoveWorkingFolder() - (TypeForwardedToRelativePath)relativePath).UrlEncode(); } link.SetAttributeValue(attribute, href + anchor); } }
/// <summary> /// Prepare tables /// </summary> /// <param name="html"></param> /// <returns></returns> private void PrepareTables(HtmlAgilityPack.HtmlDocument wordDoc) { List <HtmlAgilityPack.HtmlNode> tables = wordDoc.DocumentNode.Descendants(). Where(node => node.OriginalName.Equals("table")).ToList <HtmlAgilityPack.HtmlNode>(); for (int i = 0; i < tables.Count; i++) { HtmlAgilityPack.HtmlNode table = tables[i]; table.Attributes.RemoveAll(); table.SetAttributeValue("border", "1"); table.SetAttributeValue("cellspacing", "0"); table.SetAttributeValue("cellpadding", "0"); table.SetAttributeValue("style", "border-collapse:collapse;border:none;"); table.SetAttributeValue("width", "100%"); List <HtmlAgilityPack.HtmlNode> rows = table.Descendants(). Where(node => node.OriginalName.Equals("tr")).ToList <HtmlAgilityPack.HtmlNode>(); for (int j = 0; j < rows.Count; j++) { HtmlAgilityPack.HtmlNode row = rows[j]; row.Attributes.RemoveAll(); List <HtmlAgilityPack.HtmlNode> columns = row.Descendants(). Where(node => node.OriginalName.Equals("td")).ToList <HtmlAgilityPack.HtmlNode>(); for (int k = 0; k < columns.Count; k++) { HtmlAgilityPack.HtmlNode column = columns[k]; column.Attributes.RemoveAll(); column.SetAttributeValue("valign", "top"); string width = string.Format("{0}%", 100 / columns.Count); column.SetAttributeValue("width", width); } } } }
private static void UpdateSrc(HtmlAgilityPack.HtmlNode link, Dictionary <string, string> map, Func <string, string> updater) { string attribute = "src"; var key = link.GetAttributeValue(attribute, null); string path; if (PathUtility.TryGetPathFromWorkingFolder(key, out path)) { string xrefValue; if (map.TryGetValue(key, out xrefValue)) { xrefValue = updater(xrefValue); link.SetAttributeValue(attribute, xrefValue); } else { Logger.LogWarning($"File {path} is not found."); // TODO: what to do if file path not exists? // CURRENT: fallback to the original one link.SetAttributeValue(attribute, path); } } }
private static void UpdateHref(HtmlAgilityPack.HtmlNode link, Dictionary <string, string> map, Func <string, string> updater) { string attribute = "href"; var key = link.GetAttributeValue(attribute, null); string path; if (PathUtility.TryGetPathFromWorkingFolder(key, out path)) { string href; // For href, # may be appended, remove # before search file from map var anchorIndex = key.IndexOf("#"); var anchor = string.Empty; if (anchorIndex == 0) { return; } if (anchorIndex > 0) { anchor = key.Substring(anchorIndex); key = key.Remove(anchorIndex); } if (map.TryGetValue(key, out href)) { href = updater(href); href += anchor; link.SetAttributeValue(attribute, href); } else { Logger.LogWarning($"File {path} is not found."); // TODO: what to do if file path not exists? // CURRENT: fallback to the original one link.SetAttributeValue(attribute, path); } } }
private static void UpdateHref(HtmlAgilityPack.HtmlNode link, string attribute, IDocumentBuildContext context, string relativePath) { var key = link.GetAttributeValue(attribute, null); string path; if (PathUtility.TryGetPathFromWorkingFolder(key, out path)) { string href; // For href, # may be appended, remove # before search file from map var anchorIndex = key.IndexOf("#"); var anchor = string.Empty; if (anchorIndex == 0) { return; } if (anchorIndex > 0) { anchor = key.Substring(anchorIndex); key = key.Remove(anchorIndex); } href = context.GetFilePath(HttpUtility.UrlDecode(key)); if (href != null) { href = ((RelativePath)UpdateFilePath(href, relativePath)).UrlEncode(); href += anchor; link.SetAttributeValue(attribute, href); } else { Logger.LogWarning($"File {path} is not found in {relativePath}."); // TODO: what to do if file path not exists? // CURRENT: fallback to the original one link.SetAttributeValue(attribute, path); } } }
private void UpdateHref(HtmlAgilityPack.HtmlNode link, string attribute, IDocumentBuildContext context, string sourceFilePath, string destFilePath) { var originalHref = link.GetAttributeValue(attribute, null); var anchor = link.GetAttributeValue("anchor", null); link.Attributes.Remove("anchor"); var originalPath = UriUtility.GetPath(originalHref); var path = RelativePath.TryParse(originalPath); if (path == null) { return; } var fli = new FileLinkInfo { FromFileInSource = sourceFilePath, FromFileInDest = destFilePath, }; if (path.IsFromWorkingFolder()) { var targetInSource = path.UrlDecode(); fli.ToFileInSource = targetInSource.RemoveWorkingFolder(); fli.ToFileInDest = RelativePath.GetPathWithoutWorkingFolderChar(context.GetFilePath(targetInSource)); fli.FileLinkInSource = targetInSource - (RelativePath)sourceFilePath; if (fli.ToFileInDest != null) { var resolved = (RelativePath)fli.ToFileInDest - (RelativePath)destFilePath; fli.FileLinkInDest = resolved; fli.Href = resolved.UrlEncode(); } else { fli.Href = (targetInSource.RemoveWorkingFolder() - ((RelativePath)sourceFilePath).RemoveWorkingFolder()).UrlEncode(); } } else { fli.FileLinkInSource = path.UrlDecode(); fli.ToFileInSource = ((RelativePath)sourceFilePath + path).RemoveWorkingFolder(); fli.FileLinkInDest = fli.FileLinkInSource; fli.Href = originalPath; } var href = _settings.HrefGenerator?.GenerateHref(fli) ?? fli.Href; link.SetAttributeValue(attribute, href + UriUtility.GetQueryString(originalHref) + (anchor ?? UriUtility.GetFragment(originalHref))); }
/// <summary> /// Prepare images /// </summary> /// <param name="wordDoc"></param> /// <param name="folderName"></param> private void PrepareImages(HtmlAgilityPack.HtmlDocument wordDoc, string folderName) { List <HtmlAgilityPack.HtmlNode> images = wordDoc.DocumentNode.Descendants(). Where(node => node.OriginalName.Equals("img")).ToList <HtmlAgilityPack.HtmlNode>(); for (int i = 0; i < images.Count; i++) { HtmlAgilityPack.HtmlNode image = images[i]; string src = image.GetAttributeValue("src", string.Empty); byte[] data; using (System.Net.WebClient client = new System.Net.WebClient()) { client.Credentials = System.Net.CredentialCache.DefaultCredentials; data = client.DownloadData(src); } string imageSrc = folderName + "\\" + i + Path.GetExtension(src); File.WriteAllBytes(imageSrc, data); image.SetAttributeValue("src", imageSrc); } }
/// <summary> /// Prepare Html /// </summary> /// <param name="input"></param> /// <param name="output"></param> private void PrepareHtml(HtmlAgilityPack.HtmlNode input, HtmlAgilityPack.HtmlNode output) { HtmlAgilityPack.HtmlNode parent = output; switch (input.NodeType) { case HtmlAgilityPack.HtmlNodeType.Document: break; case HtmlAgilityPack.HtmlNodeType.Element: if (!availableTags.Contains(input.OriginalName)) { return; } if (input.OriginalName.Equals("img")) { output.AppendChild(input.CloneNode(true)); return; } parent = output.AppendChild(input.CloneNode(false)); string newStyle = string.Empty; string style = input.GetAttributeValue("style", string.Empty); string href = input.GetAttributeValue("href", string.Empty); parent.Attributes.RemoveAll(); if (style != string.Empty) { foreach (string item in style.Split(';')) { if ((item.Contains("font-weight") && item.Contains("bold")) || (item.Contains("font-style") && item.Contains("italic")) || (item.Contains("text-decoration") && item.Contains("underline"))) { newStyle += string.Format("{0};", item); } } } if (newStyle != string.Empty) { parent.SetAttributeValue("style", newStyle); } if (href != string.Empty) { parent.SetAttributeValue("href", href); } break; case HtmlAgilityPack.HtmlNodeType.Text: output.AppendChild(input.CloneNode(true)); return; default: return; } foreach (HtmlAgilityPack.HtmlNode child in input.ChildNodes) { PrepareHtml(child, parent); } }