private PageToDownload[] GetSubPagesToDownload(IProgressHost progress, ArrayList downloadedPagesToScan, PageToDownload parentPage)
        {
            ArrayList subPages = new ArrayList();
            // enumerate the other downloads to do (if we're scanning)
            string[] subUrlsToDownload;
            if (_context.SelectedUrlsToDownload.Count < 1)
                subUrlsToDownload = GetSubPagesToDownload((PageToDownload[])downloadedPagesToScan.ToArray(typeof(PageToDownload)), parentPage);
            else
                subUrlsToDownload = (string[])_context.SelectedUrlsToDownload.ToArray(typeof(string));

            // do the other downloads, passing the context controlling depth
            foreach (string subUrl in subUrlsToDownload)
            {
                if (_context.ShouldContinue(_currentDepth))
                {
                    ProgressTick tick = new ProgressTick(progress, 1, subUrlsToDownload.Length);
                    subPages.AddRange(DownloadPages(tick, subUrl, null, parentPage));
                }
            }
            return (PageToDownload[])subPages.ToArray(typeof(PageToDownload));
        }
        /// <summary>
        /// Downloads the pages and their references, providing progress feedback
        /// </summary>
        /// <param name="progressHost">The progresshost to use for feedback</param>
        /// <returns>this</returns>
        public object Download(IProgressHost progressHost)
        {
            // Prepare the list of references to download
            progressHost.UpdateProgress(Res.Get(StringId.ProgressPreparingListOfFiles));
            foreach (PageToDownload pageToDownload in _pagesToDownload)
            {
                // Lay down a placeholder file with the correct file name
                try
                {
                    string destination = Path.Combine(_siteStorage.BasePath, pageToDownload.RelativePath);
                    destination = PathHelper.GetNonConflictingPath(destination);
                    pageToDownload.FileName = Path.GetFileName(destination);

                    using (Stream htmlStream = _siteStorage.Open(destination, AccessMode.Write)) { }
                }
                catch (Exception e)
                {
                    HandleException(e);
                }

                foreach (ReferenceToDownload reference in pageToDownload.References)
                {
                    // Don't add the same item more than once
                    if (!_referencesToDownload.ContainsKey(reference.AbsoluteUrl))
                        _referencesToDownload.Add(reference.AbsoluteUrl, reference);
                }
            }

            // Enqueue the work items
            progressHost.UpdateProgress(Res.Get(StringId.ProgressStartingDownloadOfReferences));
            IProgressHost[] progressHosts = new JointProgressHosts(progressHost, _referencesToDownload.Count, 8000, 10000).ProgressHosts;
            int tickNum = 0;
            foreach (ReferenceToDownload reference in _referencesToDownload.Values)
                workQueue.Enqueue(new DownloadWorkItem(reference, _siteStorage, progressHosts[tickNum++]));

            // Start up the parallel execution of the downloads
            ParallelExecution parallelExecution = new ParallelExecution(new ThreadStart(WorkerThreadStart), 2);
            parallelExecution.Execute();
            parallelExecution = null;

            // Now go through and get HTML for each page, and emit the HTML to disk
            ProgressTick allPagesProgress = new ProgressTick(progressHost, 2000, 10000);
            for (int i = 0; i < _pagesToDownload.Length; i++)
            {
                try
                {
                    allPagesProgress.UpdateProgress(i, _pagesToDownload.Length, string.Format(CultureInfo.CurrentCulture, Res.Get(StringId.ProgressSaving), _pagesToDownload[i].FileName));
                    WriteHtmlToDisk(_pagesToDownload[i], _siteStorage);
                }
                catch (Exception e)
                {
                    HandleException(e);
                }
                if (allPagesProgress.CancelRequested)
                    throw new OperationCancelledException();
            }

            // We're complete!
            progressHost.UpdateProgress(1, 1, Res.Get(StringId.ProgressDownloadFinished));

            return this;
        }
        /// <summary>
        /// Actually downloads the pages
        /// </summary>
        private PageToDownload[] DownloadPages(IProgressHost progress, string url, LightWeightHTMLDocument lightWeightDocument, PageToDownload parentPageToDownload)
        {
            // Check for cancel
            if (progress.CancelRequested)
                throw new OperationCancelledException();

            _currentDepth++;
            ArrayList downloadedPages = new ArrayList();

            // Set up our progress
            int thisPageTicks = FIRSTPAGETICKS;
            if (_context.Depth == _currentDepth)
                thisPageTicks = TOTALTICKS;
            ProgressTick firstPagedownloadProgress = new ProgressTick(progress, thisPageTicks, TOTALTICKS);

            string safeUrl = UrlHelper.GetUrlWithoutAnchorIdentifier(url);

            // Look up the content type of this pageToDownload
            UrlContentTypeInfo headerInfo = null;
            if (_headerInfo.ContainsKey(safeUrl))
            {
                headerInfo = (UrlContentTypeInfo)_headerInfo[safeUrl];
            }
            else
            {
                if (lightWeightDocument != null)
                    headerInfo = new UrlContentTypeInfo("text/html", url);
                else if (headerInfo == null && !_context.IsTimedOutUrl(url) && _context.ShouldDownloadThisUrl(url))
                {
                    progress.UpdateProgress(string.Format(CultureInfo.CurrentCulture, Res.Get(StringId.ProgressDeterminingType), url));
                    if (lightWeightDocument == null)
                        headerInfo = ContentTypeHelper.ExpensivelyGetUrlContentType(url, _context.TimeoutMS);
                    else
                        headerInfo = ContentTypeHelper.InexpensivelyGetUrlContentType(url);
                }
                _headerInfo.Add(safeUrl, headerInfo);
            }

            // If this is a web page and we should download it, do it!
            if ((lightWeightDocument != null && IsDownloadablePageResource(headerInfo)) ||
                (lightWeightDocument == null && IsDownloadablePageResource(headerInfo) && _context.ShouldDownloadThisUrl(headerInfo))
                )
            {
                bool downloadWorked = false;
                int downloadAttempts = -1;
                bool timedOut = true;

                // Max sure we are retrying the correct number of times
                ProgressTick pageDownloadProgress = new ProgressTick(firstPagedownloadProgress, 80, 100);
                while (!downloadWorked && downloadAttempts++ < _context.RetryCount && timedOut)
                {
                    timedOut = false;

                    pageDownloadProgress.UpdateProgress(0, 1);
                    try
                    {
                        // If we haven't downloaded this page yet download it
                        PageToDownload thisPageToDownload = null;

                        if (!_context.UrlAlreadyDownloaded(safeUrl))
                        {
                            if (lightWeightDocument == null)
                                thisPageToDownload = DownloadUrl(url, parentPageToDownload, pageDownloadProgress);
                            else
                            {
                                LightWeightHTMLDocument htmlDoc = lightWeightDocument;

                                // Only redownload if we absolutely need to
                                if (htmlDoc.HasFramesOrStyles && (htmlDoc.Frames == null || htmlDoc.StyleResourcesUrls == null))
                                {

                                    string html = htmlDoc.GenerateHtml();
                                    string tempFile = TempFileManager.Instance.CreateTempFile("temp.htm");
                                    using (StreamWriter writer = new StreamWriter(tempFile, false, Encoding.UTF8))
                                        writer.Write(html);
                                    using (HTMLDocumentDownloader downloader = new HTMLDocumentDownloader(_parentControl, UrlHelper.GetLocalFileUrl(tempFile), htmlDoc.Title, _context.CookieString, _context.TimeoutMS, false))
                                    {
                                        downloader.DownloadHTMLDocument(pageDownloadProgress);

                                        htmlDoc.UpdateBasedUponHTMLDocumentData(downloader.HtmlDocument, url);
                                    }
                                }
                                thisPageToDownload = new PageToDownload(htmlDoc, url, null, parentPageToDownload);
                                if (htmlDoc.StyleResourcesUrls != null)
                                    foreach (HTMLDocumentHelper.ResourceUrlInfo styleUrl in htmlDoc.StyleResourcesUrls)
                                        thisPageToDownload.AddReference(new ReferenceToDownload(styleUrl.ResourceUrl, thisPageToDownload, styleUrl.ResourceAbsoluteUrl));
                            }
                            // Add this page to our lists
                            _context.AddPageToDownload(safeUrl, thisPageToDownload, true);
                            downloadedPages.Add(thisPageToDownload);

                        }
                        else
                            thisPageToDownload = (PageToDownload)_context.CreatedPageToDownloadTable[safeUrl];

                        // If we're downloading a site, add a second copy of the root page in the references subdir
                        // This was, if the root page gets renamed, links back to it will still work correctly
                        // This is a bit of a hack, but otherwise, we'll need to escape urls whenever we output
                        // the site and change the root file name
                        if (thisPageToDownload.IsRootPage && _context.Depth > 0)
                        {
                            PageToDownload copyOfThisPageToDownload = new PageToDownload(thisPageToDownload.LightWeightHTMLDocument.Clone(), thisPageToDownload.UrlToReplace, thisPageToDownload.FileName, thisPageToDownload);
                            downloadedPages.Add(copyOfThisPageToDownload);
                        }

                        // enumerate the frames of this page and add them to the list of pages
                        PageToDownload[] subFramesToDownload = GetFramePagesToDownload(thisPageToDownload);
                        downloadedPages.AddRange(subFramesToDownload);
                        foreach (PageToDownload pageToDownload in subFramesToDownload)
                            _context.AddPageToDownload(pageToDownload.AbsoluteUrl, pageToDownload, false);

                        // Now drill down based upon the depth configuration
                        if (_context.ShouldContinue(_currentDepth))
                        {
                            ProgressTick otherPagesdownloadProgress = new ProgressTick(progress, TOTALTICKS - thisPageTicks, TOTALTICKS);
                            downloadedPages.AddRange(GetSubPagesToDownload(otherPagesdownloadProgress, downloadedPages, thisPageToDownload));
                        }
                        downloadWorked = true;
                        firstPagedownloadProgress.UpdateProgress(1, 1);

                    }
                    catch (OperationTimedOutException)
                    {
                        timedOut = true;
                    }
                    catch (WebPageDownloaderException htex)
                    {
                        HandleException(new Exception(htex.Message, htex));
                    }
                    catch (Exception ex)
                    {
                        HandleException(new Exception(String.Format(CultureInfo.CurrentCulture, "{0} could not be downloaded", _url), ex));
                    }
                }

                // If we never got the download to succeed, add it to the list of timed out Urls
                if (!downloadWorked && timedOut)
                {
                    _context.AddTimedOutUrl(_url);
                    firstPagedownloadProgress.UpdateProgress(1, 1);

                }
            }
            // If it isn't a page we'll just add the file to the reference list for the parent page
            // There is not an else, because we could be looking at a reference, but a reference that
            // should not be downloaded (in which case we just ignore it)
            else if (headerInfo != null && _context.ShouldDownloadThisUrl(headerInfo))
            {
                parentPageToDownload.AddReference(new ReferenceToDownload(url, parentPageToDownload));
                progress.UpdateProgress(1, 1);
            }

            progress.UpdateProgress(1, 1);

            _currentDepth--;
            return (PageToDownload[])downloadedPages.ToArray(typeof(PageToDownload));
        }
        private string DownloadTemplateFiles(string templateContents, string templateUrl, IProgressHost progress)
        {
            progress.UpdateProgress(Res.Get(StringId.ProgressDownloadingSupportingFiles));
            FileBasedSiteStorage files = new FileBasedSiteStorage(_blogTemplateDir);

            // convert the string to a stream
            MemoryStream templateStream = new MemoryStream();
            StreamWriter writer = new StreamWriter(templateStream, Encoding.UTF8);
            writer.Write(templateContents);
            writer.Flush();
            templateStream.Seek(0, SeekOrigin.Begin);

            //read the stream into a lightweight HTML.  Note that we use from LightWeightHTMLDocument.FromIHTMLDocument2
            //instead of LightWeightHTMLDocument.FromStream because from stream improperly shoves a saveFrom declaration
            //above the docType (bug 289357)
            IHTMLDocument2 doc = HTMLDocumentHelper.StreamToHTMLDoc(templateStream, templateUrl, true);
            LightWeightHTMLDocument ldoc = LightWeightHTMLDocument.FromIHTMLDocument2(doc, templateUrl, true, false);

            PageDownloadContext downloadContext = new PageDownloadContext(0);
            ApplyCredentials(downloadContext, templateUrl);
            using (PageToDownloadFactory downloadFactory = new PageToDownloadFactory(ldoc, downloadContext, _parentControl))
            {
                //calculate the dependent styles and resources
                ProgressTick tick = new ProgressTick(progress, 50, 100);
                downloadFactory.CreatePagesToDownload(tick);
                tick.UpdateProgress(100, 100);

                //download the dependent styles and resources
                tick = new ProgressTick(progress, 50, 100);
                PageAndReferenceDownloader downloader = new PageAndReferenceDownloader(downloadFactory.PagesToDownload, files);
                this.ApplyCredentials(downloader, templateUrl);
                downloader.Download(tick);
                tick.UpdateProgress(100, 100);

                //Expand out the relative paths in the downloaded HTML file with absolute paths.
                //Note: this is necessary so that template resources are not improperly resolved relative
                //      to the location of the file the editor is editing.
                string blogTemplateFile = Path.Combine(_blogTemplateDir, files.RootFile);
                string origFile = blogTemplateFile + ".token";
                File.Move(blogTemplateFile, origFile);
                string absPath = String.Format(CultureInfo.InvariantCulture, "file:///{0}/{1}", _blogTemplateDir.Replace('\\', '/'), downloader.PathToken);
                TextHelper.ReplaceInFile(origFile, downloader.PathToken, blogTemplateFile, absPath);
                File.Delete(origFile);

                //fix up the files
                FixupDownloadedFiles(blogTemplateFile, files, downloader.PathToken);

                //complete the progress.
                progress.UpdateProgress(100, 100);

                File.WriteAllText(blogTemplateFile + ".path", absPath);
                return blogTemplateFile;
            }
        }
        /// <summary>
        /// Creates a set of BlogTemplateFiles using a specific region locator strategy.
        /// </summary>
        /// <param name="progress"></param>
        /// <param name="regionLocatorStrategy"></param>
        /// <param name="templateStrategies"></param>
        /// <param name="templateTypes"></param>
        /// <returns></returns>
        private BlogEditingTemplateFile[] GetBlogTemplateFiles(IProgressHost progress, BlogPostRegionLocatorStrategy regionLocatorStrategy, BlogEditingTemplateStrategy[] templateStrategies, BlogEditingTemplateType[] templateTypes)
        {
            BlogEditingTemplateFile[] blogTemplateFiles = null;
            try
            {
                regionLocatorStrategy.PrepareRegions(new ProgressTick(progress, 25, 100));

                ArrayList templateFiles = new ArrayList();
                ProgressTick tick = new ProgressTick(progress, 50, 100);
                for (int i = 0; i < templateTypes.Length; i++)
                {
                    ProgressTick parseTick = new ProgressTick(tick, 1, templateTypes.Length);
                    try
                    {
                        CheckCancelRequested(parseTick);
                        templateStrategy = templateStrategies[i];

                        // Parse the blog post HTML into an editing template.
                        // Note: we can't use MarkupServices to parse the document from a non-UI thread,
                        // so we have to execute the parsing portion of the template download operation on the UI thread.
                        string editingTemplate = ParseWebpageIntoEditingTemplate_OnUIThread(_parentControl, regionLocatorStrategy, new ProgressTick(parseTick, 1, 5));

                        // check for cancel
                        CheckCancelRequested(parseTick);

                        string baseUrl = HTMLDocumentHelper.GetBaseUrl(editingTemplate, _blogHomepageUrl);

                        // Download the template stylesheets and embedded resources (this lets the editing template load faster..and works offline!)
                        string templateFile = DownloadTemplateFiles(editingTemplate, baseUrl, new ProgressTick(parseTick, 4, 5));
                        templateFiles.Add(new BlogEditingTemplateFile(templateTypes[i], templateFile));

                    }
                    catch (Exception e)
                    {
                        Trace.WriteLine(String.Format(CultureInfo.CurrentCulture, "Failed to download template {0}: {1}", templateTypes[i].ToString(), e.ToString()));
                    }
                }
                if (templateFiles.Count > 0)
                    blogTemplateFiles = (BlogEditingTemplateFile[])templateFiles.ToArray(typeof(BlogEditingTemplateFile));
            }
            finally
            {
                regionLocatorStrategy.CleanupRegions(new ProgressTick(progress, 25, 100));
            }
            return blogTemplateFiles;
        }
        /// <summary>
        /// Downloads a webpage from a blog.
        /// </summary>
        /// <param name="blogHomepageUrl"></param>
        /// <param name="progress"></param>
        /// <returns></returns>
        private MemoryStream DownloadBlogPage(string blogHomepageUrl, IProgressHost progress)
        {
            ProgressTick tick = new ProgressTick(progress, 50, 100);
            if (progress.CancelRequested)
                throw new OperationCancelledException();
            tick.UpdateProgress(0, 0, Res.Get(StringId.ProgressDownloadingWeblogEditingStyle));

            HttpWebResponse resp = _pageDownloader(blogHomepageUrl, 60000);
            MemoryStream memStream = new MemoryStream();
            using (Stream respStream = resp.GetResponseStream())
                StreamHelper.Transfer(respStream, memStream);

            //read in the HTML file and determine if it contains the title element
            memStream.Seek(0, SeekOrigin.Begin);

            tick.UpdateProgress(100, 100);

            //return the stream
            memStream.Seek(0, SeekOrigin.Begin);
            return memStream;
        }
        /// <summary>
        /// Downloads a webpage from a blog.
        /// </summary>
        /// <param name="blogHomepageUrl"></param>
        /// <param name="progress"></param>
        /// <returns></returns>
        private Stream DownloadBlogPage(string blogHomepageUrl, IProgressHost progress)
        {
            ProgressTick tick = new ProgressTick(progress, 50, 100);
            MemoryStream memStream = new MemoryStream();
            IHTMLDocument2 doc2 = null;
            // WinLive 221984: Theme detection timing out intermitantly on WordPress.com
            // The temp post *often* takes more than a minute to show up on the blog home page.
            // The download progress dialog has a cancel button, we'll try a lot before giving up.
            for (int i = 0; i < 30 && doc2 == null; i++)
            {
                if (progress.CancelRequested)
                    throw new OperationCancelledException();
                tick.UpdateProgress(0, 0, Res.Get(StringId.ProgressDownloadingWeblogEditingStyle));
                // Sleep to give the post enough time to show up.
                // We'll make 10 attempts with a 1 second delay.
                // Subsequent attempts will use a 10 second delay.
                // This means we'll try for 5 minutes (10s + 290s = 300s) before we consider the operation timed out.
                Thread.Sleep(i < 10 ? 1000 : 10000);

                HttpWebResponse resp = _pageDownloader(blogHomepageUrl, 60000);
                memStream = new MemoryStream();
                using (Stream respStream = resp.GetResponseStream())
                    StreamHelper.Transfer(respStream, memStream);

                //read in the HTML file and determine if it contains the title element
                memStream.Seek(0, SeekOrigin.Begin);
                doc2 = HTMLDocumentHelper.GetHTMLDocumentFromStream(memStream, blogHomepageUrl);
                if (HTMLDocumentHelper.FindElementContainingText(doc2, TEMPORARY_POST_TITLE_GUID) == null)
                    doc2 = null;
            }
            if (doc2 == null)
            {
                throw new OperationTimedOutException();
            }
            tick.UpdateProgress(100, 100);

            //return the stream
            memStream.Seek(0, SeekOrigin.Begin);
            return memStream;

        }