Пример #1
0
        public string Capture(int timeoutMs)
        {
            // flag indicating whether we should continue with the capture
            bool continueCapture = true;

            // request the page
            HttpWebResponse response = RequestPage(TargetUrl, timeoutMs);

            OnHeadersReceived(response.Headers, ref continueCapture);
            if (!continueCapture)
            {
                throw new OperationCancelledException();
            }

            // transfer it to a stream
            MemoryStream pageStream = new MemoryStream();

            using (Stream responseStream = response.GetResponseStream())
                StreamHelper.Transfer(responseStream, pageStream);
            pageStream.Seek(0, SeekOrigin.Begin);

            // allow filter on content
            OnContentReceived(new StreamReader(pageStream).ReadToEnd(), ref continueCapture);
            if (!continueCapture)
            {
                throw new OperationCancelledException();
            }
            pageStream.Seek(0, SeekOrigin.Begin);

            // Read the stream into a lightweight HTML doc. We use from LightWeightHTMLDocument.FromIHTMLDocument2
            // instead of LightWeightHTMLDocument.FromStream because from stream improperly shoves a saveFrom declaration
            // above the docType (bug 289357)
            IHTMLDocument2          doc  = HTMLDocumentHelper.StreamToHTMLDoc(pageStream, TargetUrl, false);
            LightWeightHTMLDocument ldoc = LightWeightHTMLDocument.FromIHTMLDocument2(doc, TargetUrl, true);

            // download references
            FileBasedSiteStorage       siteStorage = new FileBasedSiteStorage(DestinationPath, "index.htm");
            PageToDownload             page        = new PageToDownload(ldoc, TargetUrl, siteStorage.RootFile);
            PageAndReferenceDownloader downloader  = new PageAndReferenceDownloader(new PageToDownload[] { page }, siteStorage);

            downloader.Download(new TimeoutProgressHost(timeoutMs));

            // return path to captured page
            return(Path.Combine(DestinationPath, siteStorage.RootFile));
        }
Пример #2
0
        /// <summary>
        /// Used to actually commit the HTML to disk
        /// </summary>
        /// <param name="pageInfo">The PageToDownload to write</param>
        /// <param name="downloadedReferences">A hashtable of download references</param>
        /// <param name="storage">The storage to write the file into</param>
        private void WriteHtmlToDisk(PageToDownload pageInfo, FileBasedSiteStorage storage)
        {
            // Set the character set for this document
            pageInfo.LightWeightHTMLDocument.MetaData.Charset = Encoding.UTF8.WebName;
            string html = string.Empty;

            // Replace references to any URL that we downloaded!
            foreach (PageToDownload pageToDownload in _pagesToDownload)
            {
                if (!pageToDownload.IsRootPage)
                {
                    pageInfo.LightWeightHTMLDocument.AddUrlToEscape(new UrlToReplace(pageToDownload.UrlToReplace, pageInfo.GetRelativeUrlForReferencedPage(pageToDownload)));
                }
            }

            foreach (ReferenceToDownload referenceToDownload in _referencesToDownload.Values)
            {
                ReferenceToDownload downloadedReference = ((ReferenceToDownload)_referencesToDownload[referenceToDownload.AbsoluteUrl]);

                // Since we consolidated references, replace the UrToReplace from the original reference
                // with the relativePath to the reference that actually got downloaded
                string path = downloadedReference.GetRelativeUrlForReference(pageInfo);
                pageInfo.LightWeightHTMLDocument.AddUrlToEscape(new UrlToReplace(referenceToDownload.AbsoluteUrl, path));
            }


            html = pageInfo.LightWeightHTMLDocument.GenerateHtml();

            // finally, write the html out to disk
            string destination = Path.Combine(_siteStorage.BasePath, pageInfo.RelativePath);
            Stream htmlStream  = _siteStorage.Open(destination, AccessMode.Write);

            using (StreamWriter writer = new StreamWriter(htmlStream, Encoding.UTF8))
            {
                writer.Write(html);
            }

            // if this is the entry page, write the path token and root file name
            if (pageInfo.IsRootPage)
            {
                this._pathToken       = pageInfo.ReferencedFileRelativePath;
                _siteStorage.RootFile = pageInfo.FileName;
            }
        }
Пример #3
0
        public string Capture(int timeoutMs)
        {
            // flag indicating whether we should continue with the capture
            bool continueCapture = true ;

            // request the page
            HttpWebResponse response = RequestPage(TargetUrl, timeoutMs);
            OnHeadersReceived(response.Headers, ref continueCapture) ;
            if ( !continueCapture )
                throw new OperationCancelledException() ;

            // transfer it to a stream
            MemoryStream pageStream = new MemoryStream();
            using ( Stream responseStream = response.GetResponseStream() )
                StreamHelper.Transfer(responseStream, pageStream);
            pageStream.Seek(0, SeekOrigin.Begin) ;

            // allow filter on content
            OnContentReceived( new StreamReader(pageStream).ReadToEnd(), ref continueCapture ) ;
            if ( !continueCapture )
                throw new OperationCancelledException() ;
            pageStream.Seek(0, SeekOrigin.Begin) ;

            // Read the stream into a lightweight HTML doc. We use from LightWeightHTMLDocument.FromIHTMLDocument2
            // instead of LightWeightHTMLDocument.FromStream because from stream improperly shoves a saveFrom declaration
            // above the docType (bug 289357)
            IHTMLDocument2 doc = HTMLDocumentHelper.StreamToHTMLDoc(pageStream, TargetUrl, false);
            LightWeightHTMLDocument ldoc = LightWeightHTMLDocument.FromIHTMLDocument2(doc, TargetUrl, true);

            // download references
            FileBasedSiteStorage siteStorage = new FileBasedSiteStorage(DestinationPath, "index.htm");
            PageToDownload page = new PageToDownload(ldoc, TargetUrl, siteStorage.RootFile);
            PageAndReferenceDownloader downloader = new PageAndReferenceDownloader(new PageToDownload[]{page}, siteStorage) ;
            downloader.Download(new TimeoutProgressHost(timeoutMs)) ;

            // return path to captured page
            return Path.Combine(DestinationPath, siteStorage.RootFile) ;
        }
 /// <summary>
 /// Constructs a new asynchrous page download.
 /// </summary>
 /// <param name="pagesToDownload">The array of pagesToDownload</param>
 /// <param name="siteStorage">The File based site storage into which to place the page and references</param>
 /// <param name="target">The ISynchronizeInvoke that will be used to call back</param>
 /// <param name="throwOnFailure">Indicates whether downloader should throw on failure, or just
 /// log the failure and continue</param>
 public PageAndReferenceDownloader(PageToDownload[] pagesToDownload, FileBasedSiteStorage siteStorage, bool throwOnFailure)
 {
     _siteStorage = siteStorage;
     _pagesToDownload = pagesToDownload;
     _throwOnFailure = throwOnFailure;
 }
 /// <summary>
 /// Constructs a new asynchrous page download.
 /// </summary>
 /// <param name="pagesToDownload">The array of pagesToDownload</param>
 /// <param name="siteStorage">The File based site storage into which to place the page and references</param>
 /// <param name="target">The ISynchronizeInvoke that will be used to call back</param>
 public PageAndReferenceDownloader(PageToDownload[] pagesToDownload, FileBasedSiteStorage siteStorage)
     : this(pagesToDownload, siteStorage, true)
 {
 }
 public DownloadWorkItem(ReferenceToDownload reference, FileBasedSiteStorage siteStorage, IProgressHost progressHost)
 {
     this.reference = reference;
     this.siteStorage = siteStorage;
     this.progressHost = progressHost;
 }
        /// <summary>
        /// Used to actually commit the HTML to disk
        /// </summary>
        /// <param name="pageInfo">The PageToDownload to write</param>
        /// <param name="downloadedReferences">A hashtable of download references</param>
        /// <param name="storage">The storage to write the file into</param>
        private void WriteHtmlToDisk(PageToDownload pageInfo, FileBasedSiteStorage storage)
        {
            // Set the character set for this document
            pageInfo.LightWeightHTMLDocument.MetaData.Charset = Encoding.UTF8.WebName;
            string html = string.Empty;

            // Replace references to any URL that we downloaded!
            foreach (PageToDownload pageToDownload in _pagesToDownload)
                if (!pageToDownload.IsRootPage)
                    pageInfo.LightWeightHTMLDocument.AddUrlToEscape(new UrlToReplace(pageToDownload.UrlToReplace, pageInfo.GetRelativeUrlForReferencedPage(pageToDownload)));

            foreach (ReferenceToDownload referenceToDownload in _referencesToDownload.Values)
            {
                ReferenceToDownload downloadedReference = ((ReferenceToDownload)_referencesToDownload[referenceToDownload.AbsoluteUrl]);

                // Since we consolidated references, replace the UrToReplace from the original reference
                // with the relativePath to the reference that actually got downloaded
                string path = downloadedReference.GetRelativeUrlForReference(pageInfo);
                pageInfo.LightWeightHTMLDocument.AddUrlToEscape(new UrlToReplace(referenceToDownload.AbsoluteUrl, path));
            }

            html = pageInfo.LightWeightHTMLDocument.GenerateHtml();

            // finally, write the html out to disk
            string destination = Path.Combine(_siteStorage.BasePath, pageInfo.RelativePath);
            Stream htmlStream = _siteStorage.Open(destination, AccessMode.Write);
            using (StreamWriter writer = new StreamWriter(htmlStream, Encoding.UTF8))
            {
                writer.Write(html);
            }

            // if this is the entry page, write the path token and root file name
            if (pageInfo.IsRootPage)
            {
                this._pathToken = pageInfo.ReferencedFileRelativePath;
                _siteStorage.RootFile = pageInfo.FileName;
            }
        }
        /// <summary>
        /// Download a reference, providing progress
        /// </summary>
        /// <param name="reference">The reference to download</param>
        /// <param name="fileStorage">The storage to download the refernce into</param>
        /// <param name="progressHost">The progressHost to provide feedback to</param>
        private void DownloadReference(ReferenceToDownload reference, FileBasedSiteStorage fileStorage, IProgressHost progressHost)
        {
            UrlDownloadToFile downloader;
            string fullPath;

            downloader = new UrlDownloadToFile();
            downloader.TimeoutMs = 30000;

            if (progressHost.CancelRequested)
                throw new OperationCancelledException();

            // make sure that the directory exists
            fullPath = Path.Combine(fileStorage.BasePath, reference.RelativePath);

            string directory = Path.GetDirectoryName(fullPath);
            if (!Directory.Exists(directory))
                Directory.CreateDirectory(directory);

            // Make sure there aren't any conflicts
            lock (this)
            {
                string newFileName = Path.GetFileName(fullPath);
                do
                {
                    fullPath = PathHelper.GetNonConflictingPath(fullPath);
                    newFileName = Path.GetFileName(fullPath);

                    reference.SetFileName(ref newFileName);

                } while (newFileName != Path.GetFileName(fullPath));

                using (File.Open(fullPath, FileMode.Create, FileAccess.ReadWrite, FileShare.None))
                {
                }
            }

            string downloadUrl = reference.AbsoluteUrl;
            if (UrlHelper.IsFileUrl(downloadUrl))
                downloadUrl = HttpUtility.UrlDecode(downloadUrl);

            downloader.Url = downloadUrl.Trim();
            downloader.FilePath = fullPath;
            downloader.ShowSecurityUI = true;
            if (CredentialsContext != null)
                downloader.CredentialsContext = CredentialsContext;

            try
            {
                downloader.Download(progressHost);
            }
            catch (COMException e)
            {
                // If the file couldn't be downloaded, this doesn't matter.  But log it
                Trace.WriteLine("Didn't download file: " + downloader.Url + " " + e.ToString());
            }

            // Fix the filename of the downloaded entity to have the correct extension
            string contentType = downloader.ContentType;
            if (contentType != null && File.Exists(fullPath))
            {
                string suggestedExtension = MimeHelper.GetExtensionFromContentType(contentType);
                if (suggestedExtension == null)
                    suggestedExtension = UrlHelper.GetExtensionForUrl(downloader.FinalUrl);

                if (Path.GetExtension(fullPath) != suggestedExtension)
                {
                    string newFilePath = Path.ChangeExtension(fullPath, suggestedExtension);
                    string newFileName = Path.GetFileName(newFilePath);

                    // Try to reset the name until we can both agree
                    while (true)
                    {
                        newFilePath = PathHelper.GetNonConflictingPath(newFilePath);
                        newFileName = Path.GetFileName(newFilePath);

                        FileHelper.Rename(fullPath, newFilePath);
                        reference.SetFileName(ref newFileName);
                        if (newFileName != Path.GetFileName(newFilePath))
                        {
                            try
                            {
                                File.Delete(newFilePath);
                            }
                            catch (Exception e) { Debug.Fail("Unable to delete failed temp file: " + e.ToString()); }
                        }
                        else
                        {
                            break;
                        }
                    }

                    fullPath = newFilePath;
                }
            }

            // Handle anything special we need to do for stylesheet and js file references
            if (Path.GetExtension(fullPath) == ".css" && File.Exists(fullPath))
            {
                string fileContents = string.Empty;
                using (StreamReader reader = new StreamReader(fullPath))
                    fileContents = reader.ReadToEnd();

                if (fileContents != string.Empty)
                {
                    LightWeightCSSReplacer cssReplacer = new LightWeightCSSReplacer(fileContents);
                    // fix up references
                    foreach (ReferenceToDownload referenceInfo in _referencesToDownload.Values)
                        cssReplacer.AddUrlToReplace(new UrlToReplace(referenceInfo.UrlToReplace, referenceInfo.FileName));

                    string newCss = cssReplacer.DoReplace();
                    using (StreamWriter writer = new StreamWriter(fullPath, false))
                        writer.Write(newCss);
                }
            }
        }
 /// <summary>
 /// Constructs a new asynchronous page download.
 /// </summary>
 /// <param name="pagesToDownload">The array of pagesToDownload</param>
 /// <param name="siteStorage">The File based site storage into which to place the page and references</param>
 /// <param name="target">The ISynchronizeInvoke that will be used to call back</param>
 /// <param name="throwOnFailure">Indicates whether downloader should throw on failure, or just
 /// log the failure and continue</param>
 public PageAndReferenceDownloader(PageToDownload[] pagesToDownload, FileBasedSiteStorage siteStorage, bool throwOnFailure)
 {
     _siteStorage     = siteStorage;
     _pagesToDownload = pagesToDownload;
     _throwOnFailure  = throwOnFailure;
 }
 /// <summary>
 /// Constructs a new asynchronous page download.
 /// </summary>
 /// <param name="pagesToDownload">The array of pagesToDownload</param>
 /// <param name="siteStorage">The File based site storage into which to place the page and references</param>
 /// <param name="target">The ISynchronizeInvoke that will be used to call back</param>
 public PageAndReferenceDownloader(PageToDownload[] pagesToDownload, FileBasedSiteStorage siteStorage) : this(pagesToDownload, siteStorage, true)
 {
 }
        /// <summary>
        /// Download a reference, providing progress
        /// </summary>
        /// <param name="reference">The reference to download</param>
        /// <param name="fileStorage">The storage to download the reference into</param>
        /// <param name="progressHost">The progressHost to provide feedback to</param>
        private void DownloadReference(ReferenceToDownload reference, FileBasedSiteStorage fileStorage, IProgressHost progressHost)
        {
            if (IsBase64EmbededImage(reference.AbsoluteUrl))
            {
                return;
            }
            UrlDownloadToFile downloader;
            string            fullPath;

            downloader           = new UrlDownloadToFile();
            downloader.TimeoutMs = 30000;

            if (progressHost.CancelRequested)
            {
                throw new OperationCancelledException();
            }

            // make sure that the directory exists
            fullPath = Path.Combine(fileStorage.BasePath, reference.RelativePath);

            string directory = Path.GetDirectoryName(fullPath);

            if (!Directory.Exists(directory))
            {
                Directory.CreateDirectory(directory);
            }

            // Make sure there aren't any conflicts
            lock (this)
            {
                string newFileName = Path.GetFileName(fullPath);
                do
                {
                    fullPath    = PathHelper.GetNonConflictingPath(fullPath);
                    newFileName = Path.GetFileName(fullPath);

                    reference.SetFileName(ref newFileName);
                } while (newFileName != Path.GetFileName(fullPath));

                using (File.Open(fullPath, FileMode.Create, FileAccess.ReadWrite, FileShare.None))
                {
                }
            }

            string downloadUrl = reference.AbsoluteUrl;

            if (UrlHelper.IsFileUrl(downloadUrl))
            {
                downloadUrl = HttpUtility.UrlDecode(downloadUrl);
            }

            downloader.Url            = downloadUrl.Trim();
            downloader.FilePath       = fullPath;
            downloader.ShowSecurityUI = true;
            if (CredentialsContext != null)
            {
                downloader.CredentialsContext = CredentialsContext;
            }

            try
            {
                downloader.Download(progressHost);
            }
            catch (COMException e)
            {
                // If the file couldn't be downloaded, this doesn't matter.  But log it
                Trace.WriteLine("Didn't download file: " + downloader.Url + " " + e.ToString());
            }

            // Fix the filename of the downloaded entity to have the correct extension
            string contentType = downloader.ContentType;

            if (contentType != null && File.Exists(fullPath))
            {
                string suggestedExtension = MimeHelper.GetExtensionFromContentType(contentType);
                if (suggestedExtension == null)
                {
                    suggestedExtension = UrlHelper.GetExtensionForUrl(downloader.FinalUrl);
                }

                if (Path.GetExtension(fullPath) != suggestedExtension)
                {
                    string newFilePath = Path.ChangeExtension(fullPath, suggestedExtension);
                    string newFileName = Path.GetFileName(newFilePath);

                    // Try to reset the name until we can both agree
                    while (true)
                    {
                        newFilePath = PathHelper.GetNonConflictingPath(newFilePath);
                        newFileName = Path.GetFileName(newFilePath);

                        FileHelper.Rename(fullPath, newFilePath);
                        reference.SetFileName(ref newFileName);
                        if (newFileName != Path.GetFileName(newFilePath))
                        {
                            try
                            {
                                File.Delete(newFilePath);
                            }
                            catch (Exception e) { Debug.Fail("Unable to delete failed temp file: " + e.ToString()); }
                        }
                        else
                        {
                            break;
                        }
                    }

                    fullPath = newFilePath;
                }
            }

            // Handle anything special we need to do for stylesheet and js file references
            if (Path.GetExtension(fullPath) == ".css" && File.Exists(fullPath))
            {
                string fileContents = string.Empty;
                using (StreamReader reader = new StreamReader(fullPath))
                    fileContents = reader.ReadToEnd();

                if (fileContents != string.Empty)
                {
                    LightWeightCSSReplacer cssReplacer = new LightWeightCSSReplacer(fileContents);
                    // fix up references
                    foreach (ReferenceToDownload referenceInfo in _referencesToDownload.Values)
                    {
                        cssReplacer.AddUrlToReplace(new UrlToReplace(referenceInfo.UrlToReplace, referenceInfo.FileName));
                    }

                    string newCss = cssReplacer.DoReplace();
                    using (StreamWriter writer = new StreamWriter(fullPath, false))
                        writer.Write(newCss);
                }
            }
        }
 public DownloadWorkItem(ReferenceToDownload reference, FileBasedSiteStorage siteStorage, IProgressHost progressHost)
 {
     this.reference    = reference;
     this.siteStorage  = siteStorage;
     this.progressHost = progressHost;
 }
 /// <summary>
 /// Fixes up the downloaded supporting files that were in the template.
 /// </summary>
 /// <param name="blogTemplateFile"></param>
 /// <param name="storage"></param>
 /// <param name="supportingFilesDir"></param>
 protected internal virtual void FixupDownloadedFiles(string blogTemplateFile, FileBasedSiteStorage storage, string supportingFilesDir)
 {
     templateStrategy.FixupDownloadedFiles(blogTemplateFile, storage, supportingFilesDir);
 }
        private string DownloadTemplateFiles(string templateContents, string templateUrl, IProgressHost progress)
        {
            progress.UpdateProgress(Res.Get(StringId.ProgressDownloadingSupportingFiles));
            FileBasedSiteStorage files = new FileBasedSiteStorage(_blogTemplateDir);

            // convert the string to a stream
            MemoryStream templateStream = new MemoryStream();
            StreamWriter writer = new StreamWriter(templateStream, Encoding.UTF8);
            writer.Write(templateContents);
            writer.Flush();
            templateStream.Seek(0, SeekOrigin.Begin);

            //read the stream into a lightweight HTML.  Note that we use from LightWeightHTMLDocument.FromIHTMLDocument2
            //instead of LightWeightHTMLDocument.FromStream because from stream improperly shoves a saveFrom declaration
            //above the docType (bug 289357)
            IHTMLDocument2 doc = HTMLDocumentHelper.StreamToHTMLDoc(templateStream, templateUrl, true);
            LightWeightHTMLDocument ldoc = LightWeightHTMLDocument.FromIHTMLDocument2(doc, templateUrl, true, false);

            PageDownloadContext downloadContext = new PageDownloadContext(0);
            ApplyCredentials(downloadContext, templateUrl);
            using (PageToDownloadFactory downloadFactory = new PageToDownloadFactory(ldoc, downloadContext, _parentControl))
            {
                //calculate the dependent styles and resources
                ProgressTick tick = new ProgressTick(progress, 50, 100);
                downloadFactory.CreatePagesToDownload(tick);
                tick.UpdateProgress(100, 100);

                //download the dependent styles and resources
                tick = new ProgressTick(progress, 50, 100);
                PageAndReferenceDownloader downloader = new PageAndReferenceDownloader(downloadFactory.PagesToDownload, files);
                this.ApplyCredentials(downloader, templateUrl);
                downloader.Download(tick);
                tick.UpdateProgress(100, 100);

                //Expand out the relative paths in the downloaded HTML file with absolute paths.
                //Note: this is necessary so that template resources are not improperly resolved relative
                //      to the location of the file the editor is editing.
                string blogTemplateFile = Path.Combine(_blogTemplateDir, files.RootFile);
                string origFile = blogTemplateFile + ".token";
                File.Move(blogTemplateFile, origFile);
                string absPath = String.Format(CultureInfo.InvariantCulture, "file:///{0}/{1}", _blogTemplateDir.Replace('\\', '/'), downloader.PathToken);
                TextHelper.ReplaceInFile(origFile, downloader.PathToken, blogTemplateFile, absPath);
                File.Delete(origFile);

                //fix up the files
                FixupDownloadedFiles(blogTemplateFile, files, downloader.PathToken);

                //complete the progress.
                progress.UpdateProgress(100, 100);

                File.WriteAllText(blogTemplateFile + ".path", absPath);
                return blogTemplateFile;
            }
        }
        /// <summary>
        /// Fixes up the CSS styles of in the template to make them editor safe.
        /// </summary>
        /// <param name="blogTemplateFile"></param>
        /// <param name="storage"></param>
        /// <param name="supportingFilesDir"></param>
        protected internal override void FixupDownloadedFiles(string blogTemplateFile, FileBasedSiteStorage storage, string supportingFilesDir)
        {
            //Fix up the downloaded files to remove position:relative CSS settings (fixes bugs 244951, 287589, 287556)
            string supportingFilesPath = Path.Combine(storage.BasePath, supportingFilesDir);
            if (Directory.Exists(supportingFilesPath))
            {
                string[] files = Directory.GetFiles(supportingFilesPath);
                foreach (string file in files)
                {
                    if (file.EndsWith(".css", StringComparison.OrdinalIgnoreCase))
                    {
                        string cssPath = Path.Combine(supportingFilesPath, file);
                        string newCssContent = "";
                        using (StreamReader sr = new StreamReader(cssPath))
                        {
                            string cssContent = sr.ReadToEnd();
                            newCssContent = new LightWeightCSSReplacer(cssContent).DoReplace();
                        }

                        using (StreamWriter sw = new StreamWriter(cssPath, false, Encoding.UTF8))
                        {
                            sw.Write(newCssContent);
                        }
                    }
                }
            }

            //fix up the styles in the HTML document
            string templateFilePath = Path.Combine(supportingFilesPath, blogTemplateFile);
            string newTemplateContent = "";
            using (StreamReader sr = new StreamReader(templateFilePath))
            {
                string docContent = sr.ReadToEnd();
                newTemplateContent = new LightWeightDocumentCssReplacer(docContent).DoReplace();
            }

            using (StreamWriter sw = new StreamWriter(templateFilePath, false, Encoding.UTF8))
            {
                sw.Write(newTemplateContent);
            }
        }