public void Download() { _tickableProgress.Message("Indexing " + _url); string filePath = TempFileManager.Instance.CreateTempFile(); WebRequestWithCache request = new WebRequestWithCache(_url); Stream response = request.GetResponseStream(WebRequestWithCache.CacheSettings.CHECKCACHE, _timeout); FileStream fileStream = new FileStream(filePath, FileMode.Open); using (response) using (fileStream) StreamHelper.Transfer(response, fileStream); _filePath = filePath; _tickableProgress.Tick(); }
/// <summary> /// Actually downloads the files (note that it is synchronous) /// </summary> protected override void DoWork() { if (CancelRequested) { AcknowledgeCancel(); return; } // If the base document hasn't been populated, go get it if (m_htmlDocument == null) m_htmlDocument = HTMLDocumentHelper.GetHTMLDocFromURL(m_url); if (CancelRequested) { AcknowledgeCancel(); return; } // Get a list of referenced URLs from the document Hashtable urlList = HTMLDocumentHelper.GetResourceUrlsFromDocument(m_htmlDocument); if (CancelRequested) { AcknowledgeCancel(); return; } // Get the HTML from this document- we'll use this as the base HTML and replace // paths inside of it. string finalHTML = HTMLDocumentHelper.HTMLDocToString(m_htmlDocument); IEnumerator urlEnum = urlList.GetEnumerator(); while (urlEnum.MoveNext()) { DictionaryEntry element = (DictionaryEntry) urlEnum.Current; string url = (string)element.Key; string urlType = (string)element.Value; string fullUrl = HTMLDocumentHelper.EscapeRelativeURL(m_url, url); string fileName = FileHelper.GetValidFileName(Path.GetFileName(new Uri(fullUrl).AbsolutePath)); string relativePath; if (fileName != string.Empty) { if (urlType != HTMLTokens.Frame && urlType != HTMLTokens.IFrame) { relativePath = "referencedFiles/" + fileName; WebRequestWithCache request = new WebRequestWithCache(fullUrl); // Add the html document to the site Storage. using (Stream requestStream = request.GetResponseStream()) { if (requestStream != null) { using (Stream fileStream = m_siteStorage.Open(m_rootPath + relativePath, AccessMode.Write)) { StreamHelper.Transfer(requestStream, fileStream, 8192, true); } } } } else { fileName = Path.GetFileNameWithoutExtension(fileName) + ".htm"; relativePath = "referencedFiles/" + fileName; AsyncPageDownload frameDownload = new AsyncPageDownload(fullUrl, m_siteStorage, fileName, m_rootPath + "referencedFiles/", this.Target); frameDownload.Start(); frameDownload.WaitUntilDone(); // Regular expressions would allow more flexibility here, but note that // characters like ? / & have meaning in regular expressions and so need // to be escaped } finalHTML = finalHTML.Replace(UrlHelper.CleanUpUrl(url), relativePath); } if (CancelRequested) { AcknowledgeCancel(); return; } } // Escape any high ascii characters finalHTML = HTMLDocumentHelper.EscapeHighAscii(finalHTML.ToCharArray()); // Add the html document to the site Storage. Stream htmlStream = m_siteStorage.Open(m_rootPath + m_rootFile, AccessMode.Write); using (StreamWriter writer = new StreamWriter(htmlStream, Encoding.UTF8)) { writer.Write(finalHTML); } m_siteStorage.RootFile = m_rootFile; }
/// <summary> /// Actually downloads the files (note that it is synchronous) /// </summary> protected override void DoWork() { if (CancelRequested) { AcknowledgeCancel(); return; } // If the base document hasn't been populated, go get it if (m_htmlDocument == null) { m_htmlDocument = HTMLDocumentHelper.GetHTMLDocFromURL(m_url); } if (CancelRequested) { AcknowledgeCancel(); return; } // Get a list of referenced URLs from the document Hashtable urlList = HTMLDocumentHelper.GetResourceUrlsFromDocument(m_htmlDocument); if (CancelRequested) { AcknowledgeCancel(); return; } // Get the HTML from this document- we'll use this as the base HTML and replace // paths inside of it. string finalHTML = HTMLDocumentHelper.HTMLDocToString(m_htmlDocument); IEnumerator urlEnum = urlList.GetEnumerator(); while (urlEnum.MoveNext()) { DictionaryEntry element = (DictionaryEntry)urlEnum.Current; string url = (string)element.Key; string urlType = (string)element.Value; string fullUrl = HTMLDocumentHelper.EscapeRelativeURL(m_url, url); string fileName = FileHelper.GetValidFileName(Path.GetFileName(new Uri(fullUrl).AbsolutePath)); string relativePath; if (fileName != string.Empty) { if (urlType != HTMLTokens.Frame && urlType != HTMLTokens.IFrame) { relativePath = "referencedFiles/" + fileName; WebRequestWithCache request = new WebRequestWithCache(fullUrl); // Add the html document to the site Storage. using (Stream requestStream = request.GetResponseStream()) { if (requestStream != null) { using (Stream fileStream = m_siteStorage.Open(m_rootPath + relativePath, AccessMode.Write)) { StreamHelper.Transfer(requestStream, fileStream, 8192, true); } } } } else { fileName = Path.GetFileNameWithoutExtension(fileName) + ".htm"; relativePath = "referencedFiles/" + fileName; AsyncPageDownload frameDownload = new AsyncPageDownload(fullUrl, m_siteStorage, fileName, m_rootPath + "referencedFiles/", this.Target); frameDownload.Start(); frameDownload.WaitUntilDone(); // Regular expressions would allow more flexibility here, but note that // characters like ? / & have meaning in regular expressions and so need // to be escaped } finalHTML = finalHTML.Replace(UrlHelper.CleanUpUrl(url), relativePath); } if (CancelRequested) { AcknowledgeCancel(); return; } } // Escape any high ascii characters finalHTML = HTMLDocumentHelper.EscapeHighAscii(finalHTML.ToCharArray()); // Add the html document to the site Storage. Stream htmlStream = m_siteStorage.Open(m_rootPath + m_rootFile, AccessMode.Write); using (StreamWriter writer = new StreamWriter(htmlStream, Encoding.UTF8)) { writer.Write(finalHTML); } m_siteStorage.RootFile = m_rootFile; }
public void Download() { _tickableProgress.Message("Indexing " + _url); string filePath = TempFileManager.Instance.CreateTempFile(); WebRequestWithCache request = new WebRequestWithCache(_url); Stream response = request.GetResponseStream(WebRequestWithCache.CacheSettings.CHECKCACHE,_timeout); FileStream fileStream = new FileStream(filePath, FileMode.Open); using (response) using (fileStream) StreamHelper.Transfer(response, fileStream); _filePath = filePath; _tickableProgress.Tick(); }