/// <summary> Extract the link from the given string. The URL of the actual html page is also /// provided. /// </summary> public virtual string ExtractLink(Tag tag, string url) { try { System.Collections.Hashtable table = tag.Attributes; string relativeLink = (string)table["HREF"]; if (relativeLink != null) { relativeLink = ParserUtils.RemoveChars(relativeLink, '\n'); relativeLink = ParserUtils.RemoveChars(relativeLink, '\r'); } return(processor.Extract(relativeLink, url)); } catch (System.Exception e) { string msg; if (tag != null) { msg = tag.Text; } else { msg = "null"; } throw new ParserException( "HTMLLinkScanner.ExtractLink() : Error while extracting link from tag " + msg + ", url = " + url, e); } }
public void RemoveChars() { string test = "hello\nworld\n\tqsdsds"; TagScanner scanner = new AnonymousClassTagScanner(this); string result = ParserUtils.RemoveChars(test, '\n'); Assert.AreEqual("helloworld\tqsdsds", result, "Removing Chars"); }
/// <summary> Extract the location of the image, given the string to be parsed, and the url /// of the html page in which this tag exists. /// </summary> /// <param name="tag">String to be parsed /// </param> /// <param name="url">URL of web page being parsed /// /// </param> public virtual string ExtractImageLocn(Tag tag, string url) { string relativeLink = null; try { table = tag.Attributes; relativeLink = (string)table["SRC"]; if (relativeLink != null) { relativeLink = ParserUtils.RemoveChars(relativeLink, '\n'); relativeLink = ParserUtils.RemoveChars(relativeLink, '\r'); } if (relativeLink == null || relativeLink.Length == 0) { // try fix string tagText = tag.Text.ToUpper(); int indexSrc = tagText.IndexOf("SRC"); if (indexSrc != -1) { // There is a missing equals. tag.Text = tag.Text.Substring(0, (indexSrc + 3) - (0)) + "=" + tag.Text.Substring(indexSrc + 3, (tag.Text.Length) - (indexSrc + 3)); table = tag.RedoParseAttributes(); relativeLink = (string)table["SRC"]; } } if (relativeLink == null) { return(""); } else { return(processor.Extract(relativeLink, url)); } } catch (System.Exception e) { throw new ParserException( "HTMLImageScanner.ExtractImageLocn() : Error in extracting image location, relativeLink = " + relativeLink + ", url = " + url, e); } }
public override string ToString() { return(ParserUtils.ToString(this)); }