예제 #1
0
        public static string removeScripts(string strString)
        {
            string       text         = "";
            string       text2        = "";
            StringParser stringParser = new StringParser(strString);

            while (stringParser.extractToNoCase("<script", ref text2))
            {
                text += text2;
                if (!stringParser.skipToEndOfNoCase("</script>"))
                {
                    stringParser.Content = text;
                    return(strString);
                }
            }
            stringParser.extractToEnd(ref text2);
            return(text + text2);
        }
예제 #2
0
 public static void getLinks(string strString, string strRootUrl, ref ArrayList documents, ref ArrayList images)
 {
     strString = StringParser.removeComments(strString);
     strString = StringParser.removeScripts(strString);
     StringParser stringParser = new StringParser(strString);
     stringParser.replaceEvery("'", "\"");
     string text = "";
     if (strRootUrl != null)
     {
         text = strRootUrl.Trim();
     }
     if (text.Length > 0 && !text.EndsWith("/"))
     {
         text += "/";
     }
     string text2 = "";
     stringParser.resetPosition();
     while (stringParser.skipToEndOfNoCase("href=\""))
     {
         if (stringParser.extractTo("\"", ref text2))
         {
             text2 = text2.Trim();
             if (text2.Length > 0 && text2.IndexOf("mailto:") == -1)
             {
                 if (!text2.StartsWith("http://") && !text2.StartsWith("ftp://"))
                 {
                     try
                     {
                         UriBuilder uriBuilder = new UriBuilder(text);
                         uriBuilder.Path =text2;
                         text2 = uriBuilder.Uri.ToString();
                     }
                     catch (Exception)
                     {
                         text2 = "http://" + text2;
                     }
                 }
                 if (!documents.Contains(text2))
                 {
                     documents.Add(text2);
                 }
             }
         }
     }
     stringParser.resetPosition();
     while (stringParser.skipToEndOfNoCase("src=\""))
     {
         if (stringParser.extractTo("\"", ref text2))
         {
             text2 = text2.Trim();
             if (text2.Length > 0)
             {
                 if (!text2.StartsWith("http://") && !text2.StartsWith("ftp://"))
                 {
                     try
                     {
                         UriBuilder uriBuilder2 = new UriBuilder(text);
                         uriBuilder2.Path = text2;
                         text2 = uriBuilder2.Uri.ToString();
                     }
                     catch (Exception)
                     {
                         text2 = "http://" + text2;
                     }
                 }
                 if (!images.Contains(text2))
                 {
                     images.Add(text2);
                 }
             }
         }
     }
 }
예제 #3
0
 public static string removeScripts(string strString)
 {
     string text = "";
     string text2 = "";
     StringParser stringParser = new StringParser(strString);
     while (stringParser.extractToNoCase("<script", ref text2))
     {
         text += text2;
         if (!stringParser.skipToEndOfNoCase("</script>"))
         {
             stringParser.Content = text;
             return strString;
         }
     }
     stringParser.extractToEnd(ref text2);
     return text + text2;
 }
예제 #4
0
        public static void getLinks(string strString, string strRootUrl, ref ArrayList documents, ref ArrayList images)
        {
            strString = StringParser.removeComments(strString);
            strString = StringParser.removeScripts(strString);
            StringParser stringParser = new StringParser(strString);

            stringParser.replaceEvery("'", "\"");
            string text = "";

            if (strRootUrl != null)
            {
                text = strRootUrl.Trim();
            }
            if (text.Length > 0 && !text.EndsWith("/"))
            {
                text += "/";
            }
            string text2 = "";

            stringParser.resetPosition();
            while (stringParser.skipToEndOfNoCase("href=\""))
            {
                if (stringParser.extractTo("\"", ref text2))
                {
                    text2 = text2.Trim();
                    if (text2.Length > 0 && text2.IndexOf("mailto:") == -1)
                    {
                        if (!text2.StartsWith("http://") && !text2.StartsWith("ftp://"))
                        {
                            try
                            {
                                UriBuilder uriBuilder = new UriBuilder(text);
                                uriBuilder.Path = text2;
                                text2           = uriBuilder.Uri.ToString();
                            }
                            catch (Exception)
                            {
                                text2 = "http://" + text2;
                            }
                        }
                        if (!documents.Contains(text2))
                        {
                            documents.Add(text2);
                        }
                    }
                }
            }
            stringParser.resetPosition();
            while (stringParser.skipToEndOfNoCase("src=\""))
            {
                if (stringParser.extractTo("\"", ref text2))
                {
                    text2 = text2.Trim();
                    if (text2.Length > 0)
                    {
                        if (!text2.StartsWith("http://") && !text2.StartsWith("ftp://"))
                        {
                            try
                            {
                                UriBuilder uriBuilder2 = new UriBuilder(text);
                                uriBuilder2.Path = text2;
                                text2            = uriBuilder2.Uri.ToString();
                            }
                            catch (Exception)
                            {
                                text2 = "http://" + text2;
                            }
                        }
                        if (!images.Contains(text2))
                        {
                            images.Add(text2);
                        }
                    }
                }
            }
        }