public string ConvertMarkdown(string markdown) { var sanitizer = new HtmlSanitizer(); sanitizer.RemovingAttribute += OnRemovingAttribute; // disallow form elements sanitizer.AllowedTags.Remove("form"); sanitizer.AllowedTags.Remove("button"); sanitizer.AllowedTags.Remove("input"); // parse the markdown to HTML markdown = Westwind.AspNetCore.Markdown.Markdown.Parse(markdown); // encode any custom tags (pasting e-mails and other markup (XML)) // From author of HtmlSanitizer // https://gist.github.com/mganss/00bec2c2245c0ef86d9c82d6211def7b markdown = HtmlRegex.Replace(markdown, m => { var tagName = m.Groups[1].Value; if (!HtmlTags.Contains(tagName)) { return("<" + m.Value.Substring(1)); } return(m.Value); }); markdown = sanitizer.Sanitize(markdown); return(markdown); }
public static HtmlTags[] PushTag(HtmlTags[] subStack, HtmlTags tag) { HtmlTags[] newStack = new HtmlTags[subStack.Length + 1]; Array.Copy(subStack, newStack, subStack.Length); newStack[newStack.GetUpperBound(0)] = tag; return(newStack); }
static NameValueCollection ExtractAJAXHeader(HttpRequest request) { string asproxyAJAX = request.Headers[Consts.BackEndConenction.AJAX_Headers]; NameValueCollection coll = new NameValueCollection(); if (asproxyAJAX == null) { return(coll); } asproxyAJAX = HtmlTags.RemoveBraketsFromStartEnd(asproxyAJAX.Trim()); string[] parts = asproxyAJAX.Split(new string[] { _AJAXHeader_ItemsSeperator }, StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < parts.Length; i++) { string[] item = parts[i].Split(new string[] { _AJAXHeader_ValuesSeperator }, StringSplitOptions.RemoveEmptyEntries); // Items count should be 2 if (item.Length > 1) { string key = HtmlTags.RemoveQuotesFromStartEnd(item[0]); string value = HtmlTags.RemoveQuotesFromStartEnd(item[1]); coll.Add(key, value); } } return(coll); }
public TemplateAdditionalInfo() { ListAllowed = new HtmlTags(); Detail = new HtmlTags(); Back = new HtmlTags(); ExactlyAirline = new HtmlTags(); OnlyDirect = new HtmlTags(); }
public string GetJavascript() { var json = new { Description = "" }; return string.Format("function getBlankActionItem(){{ return ko.mapping.fromJS({0}); }}", Serialize.Javascript(json)); }
/// <summary> /// The method that copies values of objects from other. We need use such method as "deep link" method to make copy. Because other ways do not copy values of objects but links to the object. Thus, if we have changes in one of these objects data changed for all of them. /// </summary> /// <param name="searchEngine">This object variable describes object from which we make copy</param> public TemplateSearchEngine(TemplateSearchEngine searchEngine) { DeparturePoint = searchEngine.DeparturePoint; DepartureDate = searchEngine.DepartureDate; ArrivalPoint = searchEngine.ArrivalPoint; ArrivalDate = searchEngine.ArrivalDate; Roundtrip = searchEngine.Roundtrip; ConfirmationButton = searchEngine.ConfirmationButton; }
public TemplateSearchEngine() { DeparturePoint = new HtmlTags(); ArrivalPoint = new HtmlTags(); DepartureDate = new HtmlTags(); ArrivalDate = new HtmlTags(); Roundtrip = new HtmlTags(); ConfirmationButton = new HtmlTags(); }
public string GetJavascript() { var json = new { Notes = "", Rating = "" }; return string.Format("function getBlankFeedback(){{ return ko.mapping.fromJS({0}); }}", Serialize.Javascript(json)); }
public void Append(string content) { if (Html.Length > 0) { Html.Append("\r\n[...] "); } var tag = new HtmlTagSpan(new Span(Html.Length, Html.Length + content.Length - 1)); HtmlTags.Add(tag); Html.Append(content); }
/// <summary> /// Processes the html codes /// </summary> public override string Execute() { Encoding _encoding; string resultHtml = StringStream.GetString( WebData.ResponseData, WebData.ResponseInfo.ContentType, _UserOptions.ForceEncoding, true, out _encoding); ContentEncoding = _encoding; if (_UserOptions.Frames) { IsFrameSet = HtmlTags.IsFramesetHtml(ref resultHtml); } if (_UserOptions.PageTitle) { PageTitle = HtmlParser.GetTagContent(ref resultHtml, "title"); } if (_UserOptions.DocType) { DocType = HtmlTags.GetDocType(ref resultHtml); } // Page url. E.G. http://Site.com/users/profile.aspx?uid=90 string pageUrl = WebData.ResponseInfo.ResponseUrl; // this is page path, used in processing relative paths in source html // for example the pageRootUrl for "http://Site.com/users/profile.aspx" will be "http://Site.com/users/" // gets page root Url string pagePath = UrlProvider.GetPagePath(pageUrl); // the page Url without any query parameter, used in processing relative query parameters // the pageUrlNoQuery for "http://Site.com/profile.aspx?uid=90" will be "http://Site.com/profile.aspx" // Gets page Url without any query parameter string pageUrlNoQuery = UrlProvider.GetPageAbsolutePath(pageUrl); // Execute the result Execute(ref resultHtml, pageUrl, pageUrlNoQuery, pagePath, WebData.ResponseInfo.ResponseRootUrl); // the result return(resultHtml); }
public TemplateResultEngine() { DeparturePoint = new HtmlTags(); ArrivalPoint = new HtmlTags(); DepartureDate = new HtmlTags(); ArrivalDate = new HtmlTags(); AirlineName = new HtmlTags(); AirlineNumber = new HtmlTags(); DepartureTime = new HtmlTags(); ArrivalTime = new HtmlTags(); Tariff = new HtmlTags(); Tax = new HtmlTags(); Fee = new HtmlTags(); Price = new HtmlTags(); }
private BlockState TryParseTagType16(BlockProcessor state, StringSlice line, int startColumn, int startPosition) { char c; c = line.CurrentChar; if (c == '!') { c = line.NextChar(); if (c == '-' && line.PeekChar() == '-') { return(CreateHtmlBlock(state, HtmlBlockType.Comment, startColumn, startPosition)); // group 2 } if (c.IsAlphaUpper()) { return(CreateHtmlBlock(state, HtmlBlockType.DocumentType, startColumn, startPosition)); // group 4 } if (c == '[' && line.Match("CDATA[", 1)) { return(CreateHtmlBlock(state, HtmlBlockType.CData, startColumn, startPosition)); // group 5 } return(BlockState.None); } if (c == '?') { return(CreateHtmlBlock(state, HtmlBlockType.ProcessingInstruction, startColumn, startPosition)); // group 3 } var hasLeadingClose = c == '/'; if (hasLeadingClose) { c = line.NextChar(); } Span <char> tag = stackalloc char[10]; var count = 0; for (; count < tag.Length; count++) { if (!c.IsAlphaNumeric()) { break; } tag[count] = char.ToLowerInvariant(c); c = line.NextChar(); } if ( !(c == '>' || (!hasLeadingClose && c == '/' && line.PeekChar() == '>') || c.IsWhitespace() || c == '\0')) { return(BlockState.None); } if (count == 0) { return(BlockState.None); } if (!HtmlTags.TryMatchExact(tag.Slice(0, count), out var match)) { return(BlockState.None); } int tagIndex = match.Value; // Cannot start with </script </pre or </style or </textArea if ((tagIndex == 49 || tagIndex == 50 || tagIndex == 53 || tagIndex == 56)) { if (c == '/' || hasLeadingClose) { return(BlockState.None); } return(CreateHtmlBlock(state, HtmlBlockType.ScriptPreOrStyle, startColumn, startPosition)); } return(CreateHtmlBlock(state, HtmlBlockType.InterruptingBlock, startColumn, startPosition)); }
public static System.Xml.Linq.XName GetHtmlXTagName(HtmlTags tag) { return(System.Xml.Linq.XName.Get(GetHtmlTagName(tag))); }
public string CloseHtmlTag(HtmlTags tag) { return($"</{tag.ToString()}>"); }
public string OpenHtmlTag(HtmlTags tag, string cssClass, string style) { return($"<{tag.ToString()} {GetClassNameAttribute(cssClass)} {GetStyleAttribute(style)}>"); }
/// <summary> /// Process the styles and replace them /// </summary> public static void ReplaceCSSClassStyleUrl(ref string htmlCode, string currentUrlWithoutParameters, string newUrl, string replacmentBasePath, string siteAddress, bool encodeUrl, bool forImportRule) { int index = 0; TextRange position; string oldValue, newValue; string bookmarkPart = ""; do { if (forImportRule) { // do not find "Import Rule"s with url option, it will done by other codes. Since v4.0 position = CSSParser.FindImportRuleUrlPosition(ref htmlCode, index, false, false); } else { position = CSSParser.FindCSSClassStyleUrlValuePosition(ref htmlCode, index); } if (position.Start == -1) { break; } if (position.Start != -1 && position.End != -1) { bool shouldAddQuote = false; //======OK. go to end of tag============= index = position.End; //========================================================// //====================Replace new address=================// //========================================================// //====== Correct value position according to quotes existence======= position = HtmlTags.CorrectValueIfQuoteExists(ref htmlCode, position); //====== Get the attribute value ====== oldValue = htmlCode.Substring(position.Start, position.End - position.Start); // Trim! oldValue = oldValue.Trim(); // Removes URL attribute if there is any if (HtmlTags.RemoveUrlAttribCssLocation(oldValue, out oldValue)) { shouldAddQuote = true; } oldValue = HtmlTags.RemoveQuotesFromTagAttributeValue(oldValue); //===== If link is a bookmark don't change it===== if (oldValue.StartsWith("#")) { continue; } //====== Convert virtual url to absolute ====== oldValue = UrlProvider.JoinUrl(oldValue, currentUrlWithoutParameters, replacmentBasePath, siteAddress); //====== Delete invalid character such as tab and line feed ====== oldValue = UrlProvider.IgnoreInvalidUrlCharctersInHtml(oldValue); //===== If another site url, has bookmark===== if (StringCompare.IndexOfMatchCase(ref oldValue, '#') != -1) { oldValue = UrlBuilder.RemoveUrlBookmark(oldValue, out bookmarkPart); } //==== Make it clear========= oldValue = HttpUtility.HtmlDecode(oldValue); //====== Encode url to make it unknown ====== if (encodeUrl) { oldValue = UrlProvider.EncodeUrl(oldValue); } else { // just url safe oldValue = UrlProvider.EscapeUrlQuery(oldValue); } //====== Add it to our url ====== newValue = string.Format(newUrl, oldValue); //===== Add bookmark to last url ===== if (bookmarkPart.Length > 0) { newValue += bookmarkPart; bookmarkPart = ""; } // Make it safe //newValue = HttpUtility.HtmlEncode(newValue); if (shouldAddQuote) { newValue = "\"" + newValue + "\""; } //====== Replace it with old url ====== htmlCode = htmlCode.Remove(position.Start, position.End - position.Start); htmlCode = htmlCode.Insert(position.Start, newValue); //========================================================// //==============End of Replace new address================// //========================================================// } else { if (position.Start != -1) { index = position.Start; } index = StringCompare.IndexOfMatchCase(ref htmlCode, ' ', index); } }while ((index != -1)); }
private Tag(string tagName) { TagName = tagName; TagType = HtmlTags.GetTagType(tagName); }
public static void ReplaceTwoAttributeTagsValue(ref string htmlCodes, string pageUrlNoQuery, string newPageFormat, string pagePath, string siteRootUrl, bool encodeUrl, string tagName, string attr1, string attr1Value, string attr2) { TextRange attr1Result = new TextRange(-1, -1); TextRange attr2Result = new TextRange(-1, -1); int cursorPos = 0; string tmp, actionSrc = ""; do { attr1Result = HtmlParser.GetTagAttributeValuePos(ref htmlCodes, tagName, attr1, cursorPos); if (attr1Result.Start > -1 && attr1Result.End > -1) { string tmpRelType = htmlCodes.Substring(attr1Result.Start, attr1Result.End - attr1Result.Start); if (tmpRelType.Trim().ToLower() != attr1Value.Trim().ToLower()) { if (attr1Result.Start != -1) { cursorPos = attr1Result.Start; } continue; } } else { break; } attr2Result = HtmlParser.GetTagAttributeValuePos(ref htmlCodes, tagName, attr2, cursorPos); if (attr2Result.Start == -1) { break; } if (attr2Result.Start > -1 && attr2Result.End > -1) { cursorPos = attr2Result.Start; //====== Correct value position according to quotes existence======= attr2Result = HtmlTags.CorrectValueIfQuoteExists(ref htmlCodes, attr2Result); // Get the value actionSrc = htmlCodes.Substring(attr2Result.Start, attr2Result.End - attr2Result.Start); //====== Convert virtual url to absolute ====== actionSrc = UrlProvider.JoinUrl(actionSrc, pageUrlNoQuery, pagePath, siteRootUrl); //====== Delete invalid character such as tab and line feed ====== actionSrc = UrlProvider.IgnoreInvalidUrlCharctersInHtml(actionSrc); //===== If another site url, has bookmark===== if (actionSrc.IndexOf('#') != -1) { actionSrc = UrlBuilder.RemoveUrlBookmark(actionSrc, out tmp); } // Get clear url actionSrc = HttpUtility.HtmlDecode(actionSrc); //====== Encode url to make it unknown ====== if (encodeUrl) { actionSrc = UrlProvider.EncodeUrl(actionSrc); } else { // just url safe actionSrc = UrlProvider.EscapeUrlQuery(actionSrc); } //====== Add it to our url ====== actionSrc = string.Format(newPageFormat, actionSrc); // Make it safe actionSrc = HttpUtility.HtmlEncode(actionSrc); //====== Replace it with old url ====== htmlCodes = htmlCodes.Remove(attr2Result.Start, attr2Result.End - attr2Result.Start); htmlCodes = htmlCodes.Insert(attr2Result.Start, actionSrc); } else { if (attr2Result.Start != -1) { cursorPos = attr2Result.Start; } cursorPos = StringCompare.IndexOfMatchCase(ref htmlCodes, ">", cursorPos); } }while (attr2Result.Start != -1); }
/// <summary> /// Locate value of attribute position using specified start index /// </summary> internal static TextRange FindAttributeValuePosition(ref string htmlCode, int tagEnd, int valueStart) { // Allocate result with default values TextRange result; // = new TextRange(-1, -1); // Increase start position valueStart++; int valueStartPos = -1, valueEndPos = -1; int index = valueStart; char current; bool valueStartFound = false; bool continueDo = true; ValueStartType startType = ValueStartType.None; // Set default start position valueStartPos = valueStart; do { if (index >= tagEnd) { valueEndPos = index; break; } current = htmlCode[index]; if (valueStartFound == false && current != ' ' && current != '\r' && current != '\t' && current != '\n') { valueStartPos = index; valueStartFound = true; if (current == '\'') { startType = ValueStartType.Quote; } else if (current == '\"') { startType = ValueStartType.DblQuote; } else { startType = ValueStartType.None; } index++; continue; } if (valueStartFound && (startType == ValueStartType.None) && (current == '\r' || current == '\n' || current == ' ' || current == '\t')) { valueEndPos = index; break; } if (valueStartFound && (startType != ValueStartType.None)) { if (startType == ValueStartType.Quote && current == '\'') { valueEndPos = index; break; } else if ((startType == ValueStartType.DblQuote) && current == '\"') { valueEndPos = index; break; } } index++; }while (continueDo); result.End = valueEndPos; result.Start = valueStartPos; // Remove needless characters result = HtmlTags.CorrectValueIfQuoteExists(ref htmlCode, result); return(result); }
/// <summary> /// Generate dynamic encoding javascript codes /// </summary> /// <param name="pageBasePath">base path of current request page</param> /// <returns>Javascript codes</returns> string GenerateJsEncoderCodes( string pageUrl, string pageUrlNoQuery, string pagePath, string rootUrl) { string userConfig; string reqInfo; string cookieNames; string locationObject; userConfig = string.Format(Consts.ClientContent.JSEncoder_UserConfig, _UserOptions.EncodeUrl.ToString().ToLower(), _UserOptions.OrginalUrl.ToString().ToLower(), _UserOptions.Links.ToString().ToLower(), _UserOptions.Images.ToString().ToLower(), _UserOptions.SubmitForms.ToString().ToLower(), _UserOptions.Frames.ToString().ToLower(), _UserOptions.Cookies.ToString().ToLower(), _UserOptions.RemoveScripts.ToString().ToLower(), _UserOptions.RemoveObjects.ToString().ToLower(), _UserOptions.TempCookies.ToString().ToLower() ); reqInfo = string.Format(Consts.ClientContent.JSEncoder_RequestInfo, // V5.5b4 BUGFIX: page url should be encoded, it may contain unsecure chars. HtmlTags.EncodeJavascriptString(pageUrl, true), HtmlTags.EncodeJavascriptString(pageUrlNoQuery, true), HtmlTags.EncodeJavascriptString(pagePath, true), HtmlTags.EncodeJavascriptString(rootUrl, true), Systems.CookieManager.GetCookieName(pageUrl), Systems.CookieManager.GetCookieNameExt, UrlProvider.JoinUrl(UrlProvider.GetAppAbsolutePath(), Consts.FilesConsts.PageDefault_Dynamic), UrlProvider.GetAppAbsolutePath(), UrlProvider.GetAppAbsoluteBasePath(), Consts.FilesConsts.PageDefault_Dynamic, Consts.Query.Base64Unknowner ); // Cookie names StringCollection strColl = Systems.CookieManager.GetAppliedCookieNamesList(pageUrl); string cookieNamesTemp = ""; for (int i = 0; i < strColl.Count; i++) { string name = strColl[i]; cookieNamesTemp += "'" + name + "'"; if (i != strColl.Count - 1) { cookieNamesTemp += ','; } } cookieNames = string.Format(Consts.ClientContent.JSEncoder_AppliedCookieNames, cookieNamesTemp); // Page uri Uri pageUri = new Uri(pageUrl); locationObject = string.Format(Consts.ClientContent.JSEncoder_RequestLocation, pageUri.Fragment, // Hash pageUri.Authority, // Host pageUri.Host, // Hostname pageUri.AbsolutePath, // Pathname pageUri.Query, // Search pageUri.Port, // Port pageUri.Scheme // Protocol ); StringBuilder result = new StringBuilder(); try { // ASProxy encoder variables result.Append(Resources.ASProxyJavaScriptTag(userConfig + reqInfo + locationObject + cookieNames, "")); // Base64 encoder result.Append(Resources.ASProxyJavaScriptTag("", Consts.FilesConsts.JSBase64)); // ASProxy encoder result.Append(Resources.ASProxyJavaScriptTag("", Consts.FilesConsts.JSASProxyEncoder)); // AJAX wrapper core, Usless since v5.5b4 // result.Append(Resources.ASProxyJavaScriptTag("", Consts.FilesConsts.JSAJAXWrapperCore)); return(result.ToString()); } finally { // release the memory of stringBuilder result.Length = 0; } }
/// <summary> /// copy of HtmlTag object in "deep link" mode (not as relation between objects but copy values of object fields in other /// </summary> /// <param name="tags">Object that need to copy</param> public HtmlTags(HtmlTags tags) { Tag = tags.Tag; Attr = tags.Attr; Name = tags.Name; }
private static void ReplaceTagSrcAttribute(ref string htmlCodes, string pageUrlNoQuery, string tagName, string attributeName, string pagePath, string newPageFormat, string siteRootUrl, bool encodeUrl, string extraAttributeFormat, bool canEncloseWithTags) { int index = 0; //====== In first run, index must be Zero ====== TextRange position; string oldValue, newValue; string orgValue = ""; string bookmarkPart = ""; string newAttribute = ""; bool addNewAttribute = false; bool hasNewAttribute = false; if (!string.IsNullOrEmpty(extraAttributeFormat)) { addNewAttribute = true; hasNewAttribute = true; } do { addNewAttribute = hasNewAttribute; position = HtmlParser.GetTagAttributeValuePos(ref htmlCodes, tagName, attributeName, index); if (position.Start == -1) { break; } // If requested, test statement that shouldn't enclose with specified tags bool continueLoop = true; // this causes heavy pressure //if (canEncloseWithTags && position.Start != -1 && position.End != -1) // continueLoop = !HtmlTags.IsEnclosedBy(ref htmlCodes, position.Start, "<script", "</script>"); if (continueLoop && position.Start != -1 && position.End != -1 && position.End > position.Start) { //======OK. go to end of tag============= index = StringCompare.IndexOfMatchCase(ref htmlCodes, '>', position.Start); // Replace new address //====== Correct value position according to quotes existence======= //position = ASProxyFunctions.CorrectValueIfQuoteExists(ref pageHtml, position); //====== Get the attribute value ====== oldValue = htmlCodes.Substring(position.Start, position.End - position.Start); oldValue = HtmlTags.RemoveEscapeQuotesFromTagAttributeValue(oldValue); oldValue = HtmlTags.RemoveQuotesFromTagAttributeValue(oldValue); //===== If link is a bookmark don't change it===== if (oldValue.StartsWith("#")) { continue; } if (UrlProvider.IsClientSitdeUrl(oldValue) == false) { //====== Convert virtual url to absolute ====== oldValue = UrlProvider.JoinUrl(oldValue, pageUrlNoQuery, pagePath, siteRootUrl); //====== Delete invalid character such as tab and line feed ====== oldValue = UrlProvider.IgnoreInvalidUrlCharctersInHtml(oldValue); // Save orginal value orgValue = oldValue; //===== If another site url, has bookmark if (oldValue.IndexOf('#') != -1) { oldValue = UrlBuilder.RemoveUrlBookmark(oldValue, out bookmarkPart); } //====== Get desigred url addrress oldValue = HttpUtility.HtmlDecode(oldValue); //====== Encode url to make it unknown ====== if (encodeUrl) { oldValue = UrlProvider.EncodeUrl(oldValue); } else { // just url safe oldValue = UrlProvider.EscapeUrlQuery(oldValue); } //====== Add it to our url ====== newValue = string.Format(newPageFormat, oldValue); //===== Add bookmark to last url ===== if (bookmarkPart.Length > 0) { newValue += bookmarkPart; bookmarkPart = ""; } } else { newValue = oldValue; addNewAttribute = false; } //====== Make it safe newValue = HttpUtility.HtmlEncode(newValue); //====== Replace it with old url htmlCodes = htmlCodes.Remove(position.Start, position.End - position.Start); htmlCodes = htmlCodes.Insert(position.Start, newValue); if (addNewAttribute) { // Apply original value and encoded value to format // BUG: Problem with format string that contain (') or (") characters // Bug Fixed since version 4.7 newAttribute = string.Format(extraAttributeFormat, orgValue, newValue); // Locate end of tag index = StringCompare.IndexOfMatchCase(ref htmlCodes, '>', position.Start); if (htmlCodes[index - 1] == '/') { index--; } // Insert to tag htmlCodes = htmlCodes.Insert(index, newAttribute); } //===============End of Replace new address ========= } else { if (position.Start != -1) { index = position.Start; } index = StringCompare.IndexOfMatchCase(ref htmlCodes, '>', index); } }while ((index != -1)); }
/// <summary> /// Find "BASE" tag and return "HREF" attrribute value, then remove the tag "HREF" attribute. /// </summary> /// <param name="hrefPath">href value</param> /// <returns>return if Base tag found or not</returns> public static bool ReplaceBaseSources(ref string pageHtml, bool removeHREFAttribute, out string hrefPath) { string tagName = "<base"; string attributeName = "href"; TextRange position; string oldValue; hrefPath = ""; // Find position of BASE tag position = HtmlParser.GetTagAttributeValuePos(ref pageHtml, tagName, attributeName, 0); // If any Base tag exists if (position.Start != -1 && position.End != -1) { //====== Get the attribute value ====== oldValue = pageHtml.Substring(position.Start, position.End - position.Start); // Remove unwanted characters oldValue = HtmlTags.RemoveEscapeQuotesFromTagAttributeValue(oldValue); oldValue = HtmlTags.RemoveQuotesFromTagAttributeValue(oldValue); //===== If link is a bookmark don't change it ===== if (oldValue.Length == 0 || oldValue.StartsWith("#")) { return(false); } // Browsers law!! The base url should end with slash(/) if (oldValue[oldValue.Length - 1] != '/') { // If the entered url isn't a directory specified with (/) // try to find the end a base directory int lastI = oldValue.LastIndexOf('/'); if (lastI == -1) { return(false); } lastI++; // character lenght oldValue = oldValue.Substring(0, lastI); } // Set href value hrefPath = oldValue; //====== Replace it with old url ====== if (removeHREFAttribute) { pageHtml = pageHtml.Remove(position.Start, position.End - position.Start); } return(true); } else { return(false); } }
private void Append(int pageIdx, Span span, StringBuilder str) { PdfPage page = Document.Pages[pageIdx]; PdfText pdfText = page.Text; // Get all text objects TextObject GetTextObject(PdfTextObject textObj) { var bbox = textObj.GetCharRect(0); var absIdx = pdfText.GetCharIndexAtPos(bbox.left, bbox.top, 1, 1); return(new TextObject(textObj, absIdx)); } var textObjects = page.PageObjects .Where(o => o.ObjectType == PageObjectTypes.PDFPAGE_TEXT) .Select(o => GetTextObject((PdfTextObject)o)) .OrderBy(t => t.StartIndex) .ToList(); // Some PDF documents are improperly formatted and miss PdfTextObjects -- Fill the gaps int lastEndIdx = textObjects.FirstOrDefault()?.EndIndex ?? 0; for (int i = 1; i < textObjects.Count; i++) { var textObj = textObjects[i]; if (textObj.StartIndex <= lastEndIdx + 1) // This shouldn't be < -- But allow it nevertheless { lastEndIdx = textObj.EndIndex; continue; } var gapTextObj = new TextObject(textObjects[i - 1], lastEndIdx + 1, textObj.StartIndex - lastEndIdx - 1); textObjects.Insert(i++, gapTextObj); lastEndIdx = textObj.EndIndex; } // Build the HTML tags int shift = str.Length; foreach (var textObj in textObjects) { // Check overlap var objSpan = new Span(textObj.StartIndex, textObj.StartIndex + textObj.Length - 1); if (objSpan.Overlaps(span, out var overlap) == false) { continue; } // Look behind for line return, and extend span for inclusion -- Unlike PdfTextObjects, GetText includes \r\n int lookbackIdx = textObj.StartIndex - 2; int tagStartIdxExtendBehind = 0; if (lookbackIdx >= span.StartIdx && pdfText.GetText(lookbackIdx, 2) is "\r\n") { tagStartIdxExtendBehind = -2; } // Generate text object tag var relStartIdx = shift + overlap.StartIdx - span.StartIdx; var tag = new HtmlTagSpan(new Span(relStartIdx + tagStartIdxExtendBehind, relStartIdx + overlap.Length - 1)) .WithStyle(s => SetTextStyle(s, textObj)); HtmlTags.Add(tag); // Generate extract tag if (OverlapsWithExtract(pageIdx, overlap, out var extractOverlaps)) { foreach (var extractOverlap in extractOverlaps) { int extractStartIdx = shift + extractOverlap.StartIdx - span.StartIdx; var extractSpan = new Span(extractStartIdx, extractStartIdx + extractOverlap.Length - 1); var extractTag = new HtmlTagSpan(extractSpan, 100); extractTag.WithStyle(s => s.WithBackgroundColorColor(extractOverlap.Object)); HtmlTags.Add(extractTag); } } } str.Append(pdfText.GetText(span.StartIdx, span.Length)); }
public string GetHtmlTag(HtmlTags tag, object value, string cssClass, string style, string otherAttributes = "") { return($"<{tag.ToString()} {GetClassNameAttribute(cssClass)} {GetStyleAttribute(style)} {otherAttributes}>{value}</{tag.ToString()}>"); }
public static void ReplaceTwoAttributeTagsValue(ref string htmlCodes, string newValueFormat, bool encodeUrl, string tagName, string attr1, string attr1Value, string attr2) { TextRange attr1Result = new TextRange(-1, -1); TextRange attr2Result = new TextRange(-1, -1); int cursorPos = 0; string actionSrc = ""; do { attr1Result = HtmlParser.GetTagAttributeValuePos(ref htmlCodes, tagName, attr1, cursorPos); if (attr1Result.Start > -1 && attr1Result.End > -1) { string tmpRelType = htmlCodes.Substring(attr1Result.Start, attr1Result.End - attr1Result.Start); if (tmpRelType.Trim().ToLower() != attr1Value.Trim().ToLower()) { if (attr1Result.Start != -1) { cursorPos = attr1Result.Start; attr2Result.Start = attr1Result.Start; } continue; } } else { break; } attr2Result = HtmlParser.GetTagAttributeValuePos(ref htmlCodes, tagName, attr2, cursorPos); if (attr2Result.Start == -1) { break; } if (attr2Result.Start > -1 && attr2Result.End > -1) { cursorPos = attr2Result.Start; //====== Correct value position according to quotes existence======= attr2Result = HtmlTags.CorrectValueIfQuoteExists(ref htmlCodes, attr2Result); // Get the value actionSrc = htmlCodes.Substring(attr2Result.Start, attr2Result.End - attr2Result.Start); // Get clear url actionSrc = HttpUtility.HtmlDecode(actionSrc); //====== Encode url to make it unknown ====== if (encodeUrl) { actionSrc = UrlProvider.EncodeUrl(actionSrc); } else { // just url safe actionSrc = UrlProvider.EscapeUrlQuery(actionSrc); } //====== Add it to our url ====== actionSrc = string.Format(newValueFormat, actionSrc); // Make it safe actionSrc = HttpUtility.HtmlEncode(actionSrc); //====== Replace it with old url ====== htmlCodes = htmlCodes.Remove(attr2Result.Start, attr2Result.End - attr2Result.Start); htmlCodes = htmlCodes.Insert(attr2Result.Start, actionSrc); } else { if (attr2Result.Start != -1) { cursorPos = attr2Result.Start; } cursorPos = StringCompare.IndexOfMatchCase(ref htmlCodes, ">", cursorPos); } }while (attr2Result.Start != -1); }
public static string GetHtmlTagName(HtmlTags tag) { switch (tag) { case HtmlTags.col: return("col"); case HtmlTags.colgroup: return("colgroup"); case HtmlTags.div: return("div"); case HtmlTags.h1: return("h1"); case HtmlTags.h2: return("h2"); case HtmlTags.h3: return("h3"); case HtmlTags.h4: return("h4"); case HtmlTags.h5: return("h5"); case HtmlTags.h6: return("h6"); case HtmlTags.ul: return("ul"); case HtmlTags.ol: return("ol"); case HtmlTags.li: return("li"); case HtmlTags.p: return("p"); case HtmlTags.imagemap: return("imagemap"); case HtmlTags.image: return("img"); case HtmlTags.span: return("span"); case HtmlTags.table: return("table"); case HtmlTags.tableDataCell: return("td"); case HtmlTags.tableHeaderCell: return("th"); case HtmlTags.tableRow: return("tr"); default: throw new XhtmlException("GetHtmlTagName: unknown html- Tag: " + tag.ToString()); } }