private HtmlCompressor createCompressorClone() { var clone = new HtmlCompressor(); clone.setJavaScriptCompressor(javaScriptCompressor); clone.setCssCompressor(cssCompressor); clone.setRemoveComments(_removeComments); clone.setRemoveMultiSpaces(_removeMultiSpaces); clone.setRemoveIntertagSpaces(_removeIntertagSpaces); clone.setRemoveQuotes(_removeQuotes); clone.setCompressJavaScript(_compressJavaScript); clone.setCompressCss(_compressCss); clone.setSimpleDoctype(_simpleDoctype); clone.setRemoveScriptAttributes(_removeScriptAttributes); clone.setRemoveStyleAttributes(_removeStyleAttributes); clone.setRemoveLinkAttributes(_removeLinkAttributes); clone.setRemoveFormAttributes(_removeFormAttributes); clone.setRemoveInputAttributes(_removeInputAttributes); clone.setSimpleBooleanAttributes(_simpleBooleanAttributes); clone.setRemoveJavaScriptProtocol(_removeJavaScriptProtocol); clone.setRemoveHttpProtocol(_removeHttpProtocol); clone.setRemoveHttpsProtocol(_removeHttpsProtocol); clone.setPreservePatterns(preservePatterns); //clone.setYuiJsNoMunge(yuiJsNoMunge); //clone.setYuiJsPreserveAllSemiColons(yuiJsPreserveAllSemiColons); //clone.setYuiJsDisableOptimizations(yuiJsDisableOptimizations); //clone.setYuiJsLineBreak(yuiJsLineBreak); //clone.setYuiCssLineBreak(yuiCssLineBreak); //clone.setYuiErrorReporter(yuiErrorReporter); return clone; }
static void downloadPages(string query) { outputLabelText(" DOWNLOADING URLS:", ""); // Read the file and display it line by line. string outputFile = System.IO.Path.GetFullPath(query.Replace(":", "-")) + "\\__googleSearchResults.txt"; System.IO.StreamReader file = new System.IO.StreamReader(outputFile); string row = ""; while ((row = file.ReadLine()) != null) { string[] split = row.Split(delimiter.ToCharArray()); string g = split[0]; string title = split[1]; string url = split[2]; //HtmlWeb web = new HtmlWeb(); try { //HtmlDocument hdoc = web.Load(url); WebClient client = new WebClient(); // Add a user agent header in case the // requested URI contains a query. client.Headers.Add("user-agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)"); Stream data = client.OpenRead(url); StreamReader reader = new StreamReader(data); string page = reader.ReadToEnd(); //page = Regex.Replace(page, @"(?<! )([A-Z])", " $1"); page = System.Text.RegularExpressions.Regex.Replace(page, @"\s+", " "); var comp = new HtmlCompressor(); var output = comp.compress(page.ToString()); System.IO.File.WriteAllText(System.IO.Path.GetFullPath(query.Replace(":", "-")) + "\\pages\\" + g + ".html", output); //hdoc.Save(System.IO.Path.GetFullPath(query.Replace(":", "-")) + "\\pages\\" + g + ".html"); outputLabelText(" DOWNLOAD SUCCESS [" + g + "]:", title); } catch (Exception) { outputLabelText(" DOWNLOAD FAILED: [" + g + "]", title); } } file.Close(); }