private static void Main(string[] args) { Console.WriteLine("Zeta Html Compressor Console. http://zeta.li"); Console.WriteLine(); Console.WriteLine("Syntax: hc.exe <HTML input file path>"); Console.WriteLine(); if (args.Length != 1) { Console.Error.WriteLine("No input file specified or invalid number of arguments ({0} found).", args.Length); } else { var inputFilePath = args[0]; var input = File.ReadAllText(inputFilePath); var compressor = new HtmlContentCompressor(); var output = compressor.Compress(input); var outputFilePath = Path.Combine( Path.GetDirectoryName(inputFilePath), Path.GetFileNameWithoutExtension(inputFilePath) + // @"-output" + Path.GetExtension(inputFilePath)); if (File.Exists(outputFilePath)) { File.Delete(outputFilePath); } File.WriteAllText(outputFilePath, output, Encoding.UTF8); Console.WriteLine("Successfully written output file '{0}'.", outputFilePath); } }
private static void Main(string[] args) { Console.WriteLine("Zeta Html Compressor Console. http://zeta.li"); Console.WriteLine(); Console.WriteLine("Syntax: hc.exe <HTML input file path>"); Console.WriteLine(); if (args.Length != 1) { Console.Error.WriteLine("No input file specified or invalid number of arguments ({0} found).", args.Length); } else { var inputFilePath = args[0]; var input = File.ReadAllText(inputFilePath); var compressor = new HtmlContentCompressor(); var output = compressor.Compress(input); var outputFilePath = Path.Combine( Path.GetDirectoryName(inputFilePath), Path.GetFileNameWithoutExtension(inputFilePath) + // @"-output" + Path.GetExtension(inputFilePath)); if (File.Exists(outputFilePath)) File.Delete(outputFilePath); File.WriteAllText(outputFilePath, output, Encoding.UTF8); Console.WriteLine("Successfully written output file '{0}'.", outputFilePath); } }
public void BenchHtmlCompressor() { var compressor = new HtmlContentCompressor(); var result = compressor.Compress(html); var size = result.Length; //File.WriteAllText( // Path.Combine(Path.GetDirectoryName(typeof(Program).Assembly.Location), "HTMLStandard.min3.htm"), // result); }
public void Test02() { var comp = new HtmlContentCompressor(); comp.AddPreservePattern(new Regex(@"<!-- PROTECTED: .*? -->", RegexOptions.Singleline | RegexOptions.IgnoreCase)); var input = Resources.Html02a; var output = comp.Compress(input); Assert.IsNotEmpty(output); var ratio = Math.Round((double)output.Length / input.Length * 100.0, 2); Assert.IsTrue(ratio < 100.0); }
public void MinifyHtml(string filepath) { HtmlContentCompressor contentCompressor = new HtmlContentCompressor(); //read var html = File.ReadAllText(filepath); //compress var compressed = contentCompressor.Compress(html); //write File.WriteAllText(filepath, compressed); }
public void Test02() { var comp = new HtmlContentCompressor(); comp.AddPreservePattern(new Regex(@"<!-- PROTECTED: .*? -->", RegexOptions.Singleline | RegexOptions.IgnoreCase)); var input = Resources.Html02a; var output = comp.Compress(input); Assert.IsNotEmpty(output); var ratio = Math.Round((double)output.Length / input.Length * 100.0, 2); Assert.IsTrue(ratio < 100.0); }
public static string MinifyHTML(string html) { ZetaHtmlCompressor.HtmlContentCompressor compressor = new HtmlContentCompressor(); return(compressor.Compress(html)); }
static System.IO.FileInfo ProcessPage(string url, List <string> ankiCsvEntries, HashSet <string> visitedURLs) { var srcHtmlFile = new System.IO.FileInfo(UrlToTempDownloadPath(url) + ".html"); var dstHtmlFile = new System.IO.FileInfo(UrlToLocalPath(url) + ".html"); dstHtmlFile.Directory.Create(); if (visitedURLs.Contains(url)) { return(dstHtmlFile); } visitedURLs.Add(url); var doc = new HtmlDocument(); doc.Load(srcHtmlFile.FullName); doc.DocumentNode.SelectSingleNode("html/head").Remove(); var appSiblings = doc.DocumentNode.SelectSingleNode("html/body").ChildNodes.Where(node => !(node.Name == "div" && node.Id == "app")).ToArray(); foreach (var appSibling in appSiblings) { appSibling.Remove(); } var mainSiblings = doc.DocumentNode.SelectSingleNode("html/body/div[@id='app']/div").ChildNodes.Where(node => node.Name != "main").ToArray(); foreach (var mainSibling in mainSiblings) { mainSibling.Remove(); } var images = doc.DocumentNode.SelectNodes("//img"); if (images != null) { foreach (var image in images) { string imgSrc = image.GetAttributeValue("src", null); string imgSrcSet = image.GetAttributeValue("srcset", null); if (imgSrcSet.Substring(0, imgSrc.Length) != imgSrc) { throw new Exception("Expected srcset to be the same link as src"); } FileInfo srcImgFile = new FileInfo(UrlToLocalPath(imgSrc)); string ankiFriendlyName = "metal_" + new Uri(imgSrc).AbsolutePath.Substring(1).Replace('/', '_'); FileInfo dstImgFile = new FileInfo("anki_media/" + ankiFriendlyName); dstImgFile.Directory.Create(); File.Copy(srcImgFile.FullName, dstImgFile.FullName, true); image.SetAttributeValue("src", ankiFriendlyName); image.SetAttributeValue("srcset", imgSrcSet.Replace(imgSrc, ankiFriendlyName)); } } // reserve an index so it properly shows up in pre-order traversal int ankiCsvEntryIndex = -1; if (ankiCsvEntries != null) { ankiCsvEntryIndex = ankiCsvEntries.Count(); ankiCsvEntries.Add(""); } var sections = doc.DocumentNode.SelectNodes("html/body/div[@id='app']/div/main/section[@id='topics']/div/div/section"); foreach (var section in sections) { var topics = section.SelectNodes("div/div[@class='contenttable-section-content column large-9 medium-9 small-12']/div[@class='task-topics']/div"); foreach (var topic in topics) { var anchor = topic.SelectSingleNode("div/a"); var href = anchor.GetAttributeValue("href", null); var entriesToPass = ankiCsvEntries; var codeNode = anchor.SelectSingleNode("code"); if (codeNode != null) { if (new List <string> { "func ", "var ", "case ", "static var ", "static let ", "static func ", "class func ", "init", "subscript" }.Contains(codeNode.FirstChild.InnerText)) { entriesToPass = null; } } System.IO.FileInfo processedPage = ProcessPage("https://developer.apple.com" + href, entriesToPass, visitedURLs); anchor.SetAttributeValue("href", "https://developer.apple.com" + href); } } doc.DocumentNode.SelectSingleNode("html/body/div[@id='app']/div/main/div[@class='topic-title']/span[@class='eyebrow']")?.Remove(); doc.DocumentNode.SelectSingleNode("html/body/div[@id='app']/div/main/div[@class='topic-container section-content row']/div[@class='topic-summary column large-3 medium-3 small-12']")?.Remove(); doc.DocumentNode.SelectSingleNode("html/body/div[@id='app']/div/main/section[@id='see-also']")?.Remove(); doc.DocumentNode.SelectSingleNode("html/body/div[@id='app']/div/main/div[@class='betainfo section-alt row']")?.Remove(); var anchors = doc.DocumentNode.SelectNodes("//a"); if (anchors != null) { foreach (var anchor in anchors) { string href = anchor.GetAttributeValue("href", null); if (href.StartsWith("/documentation/")) { anchor.SetAttributeValue("href", "https://developer.apple.com" + href); } } } doc.Save(dstHtmlFile.FullName); if (ankiCsvEntryIndex != -1) { string docAsString = File.ReadAllText(dstHtmlFile.FullName); if (docAsString.Contains('\t')) { throw new Exception("tab is not a good enough CSV separator!"); } docAsString = new HtmlContentCompressor().Compress(docAsString); docAsString = new System.Text.RegularExpressions.Regex("\r?\n").Replace(docAsString, "<br>"); var topicHeading = doc.DocumentNode.SelectSingleNode("html/body/div[@id='app']/div/main/div[@class='topic-title']/h1[@class='topic-heading']").InnerText; var titleOnlyDoc = new HtmlDocument(); titleOnlyDoc.LoadHtml(docAsString); titleOnlyDoc.DocumentNode.SelectSingleNode("html/body/div[@id='app']/div/main/div[@class='topic-container section-content row']/div[@id='topic-content']")?.Remove(); titleOnlyDoc.DocumentNode.SelectSingleNode("html/body/div[@id='app']/div/main/section[@id='topics']")?.Remove(); StringBuilder sb = new StringBuilder(); StringWriter sw = new StringWriter(sb); titleOnlyDoc.Save(sw); string titleOnlyString = sb.ToString(); string ankiCsvEntry = topicHeading + "\t" + url + "\t" + titleOnlyString + "\t" + docAsString; ankiCsvEntries[ankiCsvEntryIndex] = ankiCsvEntry; } return(dstHtmlFile); }