/// <summary> /// Appends a downloaded external binary file to our MhtBuilder using Base64 encoding /// </summary> private void AppendMhtBinaryFile(WebFile ef) { AppendMhtBoundary(); AppendMhtLine("Content-Type: " + ef.ContentType); AppendMhtLine("Content-Transfer-Encoding: base64"); AppendMhtLine("Content-Location: " + ef.Url); AppendMhtLine(String.Empty); //-- note that chunk size is equal to maximum line width (expanded = 75 chars) const int ChunkSize = 57; if (ef.Storage == FileStorage.Memory) { int len = ef.DownloadedBytes.Length; if (len <= ChunkSize) { AppendMhtLine(Convert.ToBase64String(ef.DownloadedBytes, 0, len)); } else { int i = 0; while (i + ChunkSize < len) { AppendMhtLine(Convert.ToBase64String(ef.DownloadedBytes, i, ChunkSize)); i += ChunkSize; } if (i != len) { AppendMhtLine(Convert.ToBase64String(ef.DownloadedBytes, i, len - i)); } } } else { //--Initialized to Nothing by Reza FileStream fs = null; byte[] b = new byte[ChunkSize + 1]; int BytesRead = 0; try { fs = new FileStream(ef.DownloadPath, FileMode.Open, FileAccess.Read); BytesRead = fs.Read(b, 0, ChunkSize); while (BytesRead > 0) { AppendMhtLine(Convert.ToBase64String(b, 0, BytesRead)); BytesRead = fs.Read(b, 0, ChunkSize); } } finally { if ((fs != null)) { fs.Close(); } } } }
/// <summary> /// Saves URL to disk as a single file Mht archive /// if a folder is provided instead of a filename, the TITLE tag is used to name the file /// </summary> /// <param name="outputFilePath">path to generate to, or filename to generate</param> /// <param name="st">type of storage to use when generating the Mht archive</param> /// <param name="url">fully qualified URL you wish to save as Mht</param> /// <returns>the complete path of the Mht archive file that was generated</returns> public string SavePageArchive(String outputFilePath, FileStorage st, params String[] url) { String URL = String.Empty; if (url.Length > 0) { URL = url[0]; } ValidateFilename(outputFilePath, ".mht"); // check this to see wotDownloadHtmlFile(URL)does if URL == null if (URL != null) { DownloadHtmlFile(URL); } _HtmlFile.DownloadPath = outputFilePath; _HtmlFile.UseHtmlTitleAsFilename = true; //-- if set to permanent disk storage, make a local copy of the HTML if (st == FileStorage.DiskPermanent) { _HtmlFile.SaveToFile(Path.ChangeExtension(_HtmlFile.DownloadPath, ".htm")); } //-- download all references _HtmlFile.DownloadExternalFiles(st, _AllowRecursion); //-- build the Mht AppendMhtHeader(_HtmlFile); AppendMhtFiles(); FinalizeMht(Path.ChangeExtension(_HtmlFile.DownloadPath, ".mht")); //-- possibly destroy temporary resources if (st == FileStorage.DiskTemporary) { foreach (DictionaryEntry de in WebFiles) { WebFile ef = (WebFile)de.Value; if (ef.Storage == FileStorage.DiskTemporary) { File.Delete(ef.DownloadPath); } //-- if the temp folder is empty, kill that too if (Directory.GetFileSystemEntries(ef.DownloadFolder).Length == 0) { Directory.Delete(ef.DownloadFolder); } } } WebFiles.Clear(); return(Path.ChangeExtension(_HtmlFile.DownloadPath, ".mht")); }
/// <summary> /// appends all downloaded files (from _ExternalFiles) to our MhtBuilder /// </summary> private void AppendMhtFiles() { foreach (DictionaryEntry de in WebFiles) { WebFile ef = (WebFile)de.Value; AppendMhtFile(ef); } AppendMhtBoundary(); }
/// <summary> /// Appends a downloaded external text file to our MhtBuilder using Quoted-Printable encoding /// </summary> private void AppendMhtTextFile(WebFile ef) { AppendMhtBoundary(); AppendMhtLine("Content-Type: " + ef.ContentType + ";"); AppendMhtLine(Convert.ToChar(9) + "charset=\"" + ef.TextEncoding.WebName + "\""); AppendMhtLine("Content-Transfer-Encoding: quoted-printable"); AppendMhtLine("Content-Location: " + ef.Url); AppendMhtLine(String.Empty); AppendMhtLine(QuotedPrintableEncode(ef.ToString(), ef.TextEncoding)); }
/// <summary> /// Download a single externally referenced file (if we haven't already downloaded it) /// </summary> private void DownloadExternalFile(string url, Builder.FileStorage st, string targetFolder, params bool[] recursive) { bool recrsv = false; WebFile wf = default(WebFile); bool isNew = false; if (recursive.Length != 0) { recrsv = recursive[0]; } //-- have we already downloaded (or attempted to) this file? if (_Builder.WebFiles.Contains(url) | _Builder.Url == url) { wf = (WebFile)_Builder.WebFiles[url]; isNew = false; } else { wf = new WebFile(_Builder, url, st); isNew = true; } //-- if we're planning to store this file on disk, make sure we can if (st == Builder.FileStorage.DiskPermanent || st == Builder.FileStorage.DiskTemporary) { if (!Directory.Exists(targetFolder)) { Directory.CreateDirectory(targetFolder); } wf.DownloadFolder = targetFolder; } wf.Download(); if (isNew) { //-- add this (possibly) downloaded file to our shared collection _Builder.WebFiles.Add(wf.UrlUnmodified, wf); //-- if this is an HTML file, it has dependencies of its own; //-- download them into a subfolder if ((wf.IsHtml || wf.IsCss) & recrsv) { wf.DownloadExternalFiles(st, recrsv); } } }
/// <summary> /// Appends a downloaded external file to our MhtBuilder /// </summary> private void AppendMhtFile(WebFile ef) { if (ef.WasDownloaded & !ef.WasAppended) { if (ef.IsBinary) { AppendMhtBinaryFile(ef); } else { AppendMhtTextFile(ef); } } ef.WasAppended = true; }
public string ConvertHTMLToMHTML(String HTMLString, String outputFilePath, FileStorage st) { _HtmlFile.setDownLoadedBytes(HTMLString); ValidateFilename(outputFilePath, ".mht"); _HtmlFile.DownloadPath = outputFilePath; _HtmlFile.UseHtmlTitleAsFilename = true; //-- if set to permanent disk storage, make a local copy of the HTML if (st == FileStorage.DiskPermanent) { _HtmlFile.SaveToFile(Path.ChangeExtension(_HtmlFile.DownloadPath, ".htm")); } //-- download all references _HtmlFile.DownloadExternalFiles(st, _AllowRecursion); //-- build the Mht AppendMhtHeader(_HtmlFile); AppendMhtFiles(); FinalizeMht(Path.ChangeExtension(_HtmlFile.DownloadPath, ".mht")); //-- possibly destroy temporary resources if (st == FileStorage.DiskTemporary) { foreach (DictionaryEntry de in WebFiles) { WebFile ef = (WebFile)de.Value; if (ef.Storage == FileStorage.DiskTemporary) { File.Delete(ef.DownloadPath); } //-- if the temp folder is empty, kill that too if (Directory.GetFileSystemEntries(ef.DownloadFolder).Length == 0) { Directory.Delete(ef.DownloadFolder); } } } WebFiles.Clear(); return(Path.ChangeExtension(_HtmlFile.DownloadPath, ".mht")); }
/// <summary> /// converts all external Html files (gif, jpg, css, etc) to local refs /// external ref: /// <img src="http://mywebsite/myfolder/myimage.gif"> /// into local refs: /// <img src="mypage_files/myimage.gif"> /// </summary> public void ConvertReferencesToLocal() { Console.WriteLine("ConvertReferencesToLocal()"); if (!IsHtml & !IsCss) { throw new CustomException.ExternalFileHTMLException( "Converting references only makes sense for HTML or CSS files; this file is of type '" + ContentType + "'"); } //-- get a list of all external references string html = ToString(); NameValueCollection FileCollection = ExternalHtmlFiles(); //-- no external refs? nothing to do if (FileCollection.Count == 0) { return; } //Dim FolderName As String string FileUrl = null; foreach (string DelimitedFileUrl in FileCollection.AllKeys) { FileUrl = FileCollection[DelimitedFileUrl]; if (_Builder.WebFiles.Contains(FileUrl)) { WebFile wf = (WebFile)_Builder.WebFiles[FileUrl]; string NewPath = ExternalFilesFolder + "/" + wf.DownloadFilename; string DelimitedReplacement = Regex.Replace(DelimitedFileUrl, "^(?<StartDelim>\"|'|\\()*(?<Value>[^'\")]*)(?<EndDelim>\"|'|\\))*$", "${StartDelim}" + NewPath + "${EndDelim}"); //-- correct original Url references in Html so they point to our local files html = html.Replace(DelimitedFileUrl, DelimitedReplacement); } } _DownloadedBytes = _TextEncoding.GetBytes(html); }
/// <summary> /// appends the Mht header, which includes the root HTML /// </summary> private void AppendMhtHeader(WebFile ef) { //-- clear the stringbuilder contents _MhtBuilder = new StringBuilder(); AppendMhtLine("From: <Saved by " + Environment.UserName + " on " + Environment.MachineName + ">"); AppendMhtLine("Subject: " + ef.HtmlTitle); AppendMhtLine("Date: " + DateTime.Now.ToString("ddd, dd MMM yyyy HH:mm:ss zzz")); AppendMhtLine("MIME-Version: 1.0"); AppendMhtLine("Content-Type: multipart/related;"); AppendMhtLine(Convert.ToChar(9) + "type=\"text/html\";"); AppendMhtLine(Convert.ToChar(9) + "boundary=\"" + _MimeBoundaryTag + "\""); AppendMhtLine("X-MimeOLE: Produced by " + GetType() + " " + Assembly.GetExecutingAssembly().GetName().Version); AppendMhtLine(string.Empty); AppendMhtLine("This is a multi-part message in MIME format."); AppendMhtFile(ef); }
/// <summary> /// Saves URL to disk as multiple files: a single HTML file, modified with local references /// to externally referenced files in a subfolder /// if a folder is provided instead of a filename, the TITLE tag is used to name the file /// </summary> /// <param name="outputFilePath">path to generate to, or filename to generate</param> /// <param name="url">fully qualified URL you wish to save</param> /// <returns>the complete path of the HTML file that was saved to disk</returns> public string SavePageComplete(string outputFilePath, params String[] url) { String URL = String.Empty; if (url.Length > 0) { URL = url[0]; } ValidateFilename(outputFilePath, ".htm;.html"); if (URL != null) { DownloadHtmlFile(URL); } //-- first, let's get all the external files _HtmlFile.DownloadPath = outputFilePath; _HtmlFile.UseHtmlTitleAsFilename = true; _HtmlFile.DownloadExternalFiles(FileStorage.DiskPermanent, _AllowRecursion); //-- convert any references in external files foreach (DictionaryEntry de in WebFiles) { WebFile ef = (WebFile)de.Value; if (ef.IsHtml | ef.IsCss) { ef.ConvertReferencesToLocal(); ef.SaveToFile(); } } //-- convert the main HTML references _HtmlFile.ConvertReferencesToLocal(); _HtmlFile.SaveToFile(); return(_HtmlFile.DownloadPath); }
public Builder() { _HtmlFile = new WebFile(this, FileStorage.Memory); }