public static string removeScripts(string strString) { string str = ""; string strExtract = ""; StringParser parser = new StringParser(strString); while (parser.extractToNoCase("<script", ref strExtract)) { str = str + strExtract; if (!parser.skipToEndOfNoCase("</script>")) { parser.Content = str; return(strString); } } parser.extractToEnd(ref strExtract); return(str + strExtract); }
public static void getLinks(string strString, string strRootUrl, ref ArrayList documents, ref ArrayList images) { strString = removeComments(strString); strString = removeScripts(strString); StringParser parser = new StringParser(strString); parser.replaceEvery("'", "\""); string uri = ""; if (strRootUrl != null) { uri = strRootUrl.Trim(); } if ((uri.Length > 0) && !uri.EndsWith("/")) { uri = uri + "/"; } string strExtract = ""; parser.resetPosition(); while (parser.skipToEndOfNoCase("href=\"")) { if (parser.extractTo("\"", ref strExtract)) { strExtract = strExtract.Trim(); if ((strExtract.Length > 0) && (strExtract.IndexOf("mailto:") == -1)) { if (!strExtract.StartsWith("http://") && !strExtract.StartsWith("ftp://")) { try { UriBuilder builder = new UriBuilder(uri); builder.Path = strExtract; strExtract = builder.Uri.ToString(); } catch (Exception) { strExtract = "http://" + strExtract; } } if (!documents.Contains(strExtract)) { documents.Add(strExtract); } } } } parser.resetPosition(); while (parser.skipToEndOfNoCase("src=\"")) { if (parser.extractTo("\"", ref strExtract)) { strExtract = strExtract.Trim(); if (strExtract.Length > 0) { if (!strExtract.StartsWith("http://") && !strExtract.StartsWith("ftp://")) { try { UriBuilder builder2 = new UriBuilder(uri); builder2.Path = strExtract; strExtract = builder2.Uri.ToString(); } catch (Exception) { strExtract = "http://" + strExtract; } } if (!images.Contains(strExtract)) { images.Add(strExtract); } } } } }
public static string removeScripts(string strString) { string str = ""; string strExtract = ""; StringParser parser = new StringParser(strString); while (parser.extractToNoCase("<script", ref strExtract)) { str = str + strExtract; if (!parser.skipToEndOfNoCase("</script>")) { parser.Content = str; return strString; } } parser.extractToEnd(ref strExtract); return (str + strExtract); }