示例#1
0
 /// <summary> Extract the link from the given string. The URL of the actual html page is also
 /// provided.
 /// </summary>
 public virtual string ExtractLink(Tag tag, string url)
 {
     try
     {
         System.Collections.Hashtable table = tag.Attributes;
         string relativeLink = (string)table["HREF"];
         if (relativeLink != null)
         {
             relativeLink = ParserUtils.RemoveChars(relativeLink, '\n');
             relativeLink = ParserUtils.RemoveChars(relativeLink, '\r');
         }
         return(processor.Extract(relativeLink, url));
     }
     catch (System.Exception e)
     {
         string msg;
         if (tag != null)
         {
             msg = tag.Text;
         }
         else
         {
             msg = "null";
         }
         throw new ParserException(
                   "HTMLLinkScanner.ExtractLink() : Error while extracting link from tag " + msg + ", url = " + url, e);
     }
 }
示例#2
0
        public void RemoveChars()
        {
            string     test    = "hello\nworld\n\tqsdsds";
            TagScanner scanner = new AnonymousClassTagScanner(this);
            string     result  = ParserUtils.RemoveChars(test, '\n');

            Assert.AreEqual("helloworld\tqsdsds", result, "Removing Chars");
        }
示例#3
0
        /// <summary> Extract the location of the image, given the string to be parsed, and the url
        /// of the html page in which this tag exists.
        /// </summary>
        /// <param name="tag">String to be parsed
        /// </param>
        /// <param name="url">URL of web page being parsed
        ///
        /// </param>
        public virtual string ExtractImageLocn(Tag tag, string url)
        {
            string relativeLink = null;

            try
            {
                table        = tag.Attributes;
                relativeLink = (string)table["SRC"];
                if (relativeLink != null)
                {
                    relativeLink = ParserUtils.RemoveChars(relativeLink, '\n');
                    relativeLink = ParserUtils.RemoveChars(relativeLink, '\r');
                }
                if (relativeLink == null || relativeLink.Length == 0)
                {
                    // try fix
                    string tagText  = tag.Text.ToUpper();
                    int    indexSrc = tagText.IndexOf("SRC");
                    if (indexSrc != -1)
                    {
                        // There is a missing equals.
                        tag.Text = tag.Text.Substring(0, (indexSrc + 3) - (0)) + "=" +
                                   tag.Text.Substring(indexSrc + 3, (tag.Text.Length) - (indexSrc + 3));
                        table        = tag.RedoParseAttributes();
                        relativeLink = (string)table["SRC"];
                    }
                }
                if (relativeLink == null)
                {
                    return("");
                }
                else
                {
                    return(processor.Extract(relativeLink, url));
                }
            }
            catch (System.Exception e)
            {
                throw new ParserException(
                          "HTMLImageScanner.ExtractImageLocn() : Error in extracting image location, relativeLink = " +
                          relativeLink + ", url = " + url, e);
            }
        }
示例#4
0
 public override string ToString()
 {
     return(ParserUtils.ToString(this));
 }