示例#1
0
        public static string removeScripts(string strString)
        {
            string       str        = "";
            string       strExtract = "";
            StringParser parser     = new StringParser(strString);

            while (parser.extractToNoCase("<script", ref strExtract))
            {
                str = str + strExtract;
                if (!parser.skipToEndOfNoCase("</script>"))
                {
                    parser.Content = str;
                    return(strString);
                }
            }
            parser.extractToEnd(ref strExtract);
            return(str + strExtract);
        }
示例#2
0
 public static void getLinks(string strString, string strRootUrl, ref ArrayList documents, ref ArrayList images)
 {
     strString = removeComments(strString);
     strString = removeScripts(strString);
     StringParser parser = new StringParser(strString);
     parser.replaceEvery("'", "\"");
     string uri = "";
     if (strRootUrl != null)
     {
         uri = strRootUrl.Trim();
     }
     if ((uri.Length > 0) && !uri.EndsWith("/"))
     {
         uri = uri + "/";
     }
     string strExtract = "";
     parser.resetPosition();
     while (parser.skipToEndOfNoCase("href=\""))
     {
         if (parser.extractTo("\"", ref strExtract))
         {
             strExtract = strExtract.Trim();
             if ((strExtract.Length > 0) && (strExtract.IndexOf("mailto:") == -1))
             {
                 if (!strExtract.StartsWith("http://") && !strExtract.StartsWith("ftp://"))
                 {
                     try
                     {
                         UriBuilder builder = new UriBuilder(uri);
                         builder.Path = strExtract;
                         strExtract = builder.Uri.ToString();
                     }
                     catch (Exception)
                     {
                         strExtract = "http://" + strExtract;
                     }
                 }
                 if (!documents.Contains(strExtract))
                 {
                     documents.Add(strExtract);
                 }
             }
         }
     }
     parser.resetPosition();
     while (parser.skipToEndOfNoCase("src=\""))
     {
         if (parser.extractTo("\"", ref strExtract))
         {
             strExtract = strExtract.Trim();
             if (strExtract.Length > 0)
             {
                 if (!strExtract.StartsWith("http://") && !strExtract.StartsWith("ftp://"))
                 {
                     try
                     {
                         UriBuilder builder2 = new UriBuilder(uri);
                         builder2.Path = strExtract;
                         strExtract = builder2.Uri.ToString();
                     }
                     catch (Exception)
                     {
                         strExtract = "http://" + strExtract;
                     }
                 }
                 if (!images.Contains(strExtract))
                 {
                     images.Add(strExtract);
                 }
             }
         }
     }
 }
示例#3
0
 public static string removeScripts(string strString)
 {
     string str = "";
     string strExtract = "";
     StringParser parser = new StringParser(strString);
     while (parser.extractToNoCase("<script", ref strExtract))
     {
         str = str + strExtract;
         if (!parser.skipToEndOfNoCase("</script>"))
         {
             parser.Content = str;
             return strString;
         }
     }
     parser.extractToEnd(ref strExtract);
     return (str + strExtract);
 }
示例#4
0
        public static void getLinks(string strString, string strRootUrl, ref ArrayList documents, ref ArrayList images)
        {
            strString = removeComments(strString);
            strString = removeScripts(strString);
            StringParser parser = new StringParser(strString);

            parser.replaceEvery("'", "\"");
            string uri = "";

            if (strRootUrl != null)
            {
                uri = strRootUrl.Trim();
            }
            if ((uri.Length > 0) && !uri.EndsWith("/"))
            {
                uri = uri + "/";
            }
            string strExtract = "";

            parser.resetPosition();
            while (parser.skipToEndOfNoCase("href=\""))
            {
                if (parser.extractTo("\"", ref strExtract))
                {
                    strExtract = strExtract.Trim();
                    if ((strExtract.Length > 0) && (strExtract.IndexOf("mailto:") == -1))
                    {
                        if (!strExtract.StartsWith("http://") && !strExtract.StartsWith("ftp://"))
                        {
                            try
                            {
                                UriBuilder builder = new UriBuilder(uri);
                                builder.Path = strExtract;
                                strExtract   = builder.Uri.ToString();
                            }
                            catch (Exception)
                            {
                                strExtract = "http://" + strExtract;
                            }
                        }
                        if (!documents.Contains(strExtract))
                        {
                            documents.Add(strExtract);
                        }
                    }
                }
            }
            parser.resetPosition();
            while (parser.skipToEndOfNoCase("src=\""))
            {
                if (parser.extractTo("\"", ref strExtract))
                {
                    strExtract = strExtract.Trim();
                    if (strExtract.Length > 0)
                    {
                        if (!strExtract.StartsWith("http://") && !strExtract.StartsWith("ftp://"))
                        {
                            try
                            {
                                UriBuilder builder2 = new UriBuilder(uri);
                                builder2.Path = strExtract;
                                strExtract    = builder2.Uri.ToString();
                            }
                            catch (Exception)
                            {
                                strExtract = "http://" + strExtract;
                            }
                        }
                        if (!images.Contains(strExtract))
                        {
                            images.Add(strExtract);
                        }
                    }
                }
            }
        }