/// <summary> /// Check whether a file was already downloaded. /// </summary> /// <param name="uriInfo">The URI info.</param> /// <returns> /// <c>true</c> if [has downloaded URI] [the specified URI info]; /// otherwise, <c>false</c>. /// </returns> public bool HasDownloadedUri(DownloadedResourceInfo uriInfo) { //Search whether exist in the list. int foundPosition = _temporaryDownloadedResourceInfos.IndexOf(uriInfo); if (foundPosition < 0) { return(false); } else { //found .Check various attributes. DownloadedResourceInfo foundInfo = _temporaryDownloadedResourceInfos[foundPosition]; if (foundInfo.AddedByProcessID == Process.GetCurrentProcess().Id) { return(true); } else if (foundInfo.DateAdded.AddHours(10) > DateTime.Now) { return(true); } else { return(foundInfo.FileExists); } } }
/// <summary> /// Stores a binary resource to the local file system. /// </summary> /// <returns>Return the info about the stored data.</returns> public DownloadedResourceInfo StoreBinary( byte[] binaryContent, UriResourceInfo uriInfo) { DownloadedResourceInfo result = new DownloadedResourceInfo( uriInfo, _settings.Options.DestinationFolderPath); try { if (result.LocalFilePath.Exists) { result.LocalFilePath.Delete(); } if (binaryContent != null && binaryContent.Length > 0) { result.LocalFilePath.Create(); Trace.WriteLine($"Writing binary content to file '{result.LocalFilePath}'."); using (FileStream s = result.LocalFilePath.OpenWrite()) { s.Write(binaryContent, 0, binaryContent.Length); } } } catch (Exception x) { Trace.WriteLine($"Ignoring exception while storing binary file: '{ x.Message}'."); } return(result); }
/// <summary> /// Add information about a downloaded resource. /// </summary> /// <param name="info">The info.</param> public void AddDownloadedResourceInfo(DownloadedResourceInfo uriInfo) { if (_temporaryDownloadedResourceInfos.Contains(uriInfo)) { _temporaryDownloadedResourceInfos.Remove(uriInfo); } _temporaryDownloadedResourceInfos.Add(uriInfo); }
/// <summary> /// The URLs where to continue parsing when the stack Console gets too deep. /// </summary> /// <value>The continue downloaded resource infos.</value> public void AddContinueDownloadedResourceInfos(DownloadedResourceInfo resourceInfo) { if (_continueDownloadedResourceInfos.Contains(resourceInfo)) { _continueDownloadedResourceInfos.Remove(resourceInfo); } _continueDownloadedResourceInfos.Add(resourceInfo); Persist(); }
/// <summary> /// Pops the continue downloaded resource infos. /// </summary> /// <returns>Returns the first entry or NULL if none.</returns> public DownloadedResourceInfo PopContinueDownloadedResourceInfos() { if (_continueDownloadedResourceInfos.Count <= 0) { return(null); } else { DownloadedResourceInfo result = _continueDownloadedResourceInfos[0]; _continueDownloadedResourceInfos.RemoveAt(0); Persist(); return(result); } }
/// <summary> /// Persist information about a downloaded resource. /// </summary> /// <param name="uriInfo">The URI info.</param> public void PersistDownloadedResourceInfo(DownloadedResourceInfo uriInfo) { int foundPosition = _temporaryDownloadedResourceInfos.IndexOf(uriInfo); DownloadedResourceInfo foundInfo = _temporaryDownloadedResourceInfos[foundPosition]; //move if (_persistentDownloadedResourceInfos.Contains(foundInfo)) { _persistentDownloadedResourceInfos.Remove(foundInfo); } _persistentDownloadedResourceInfos.Add(foundInfo); Persist(); }
/// <summary> /// Stores a HTML resource to the local file system. /// Does no hyperlink replacement. /// </summary> /// <returns>Return the info about the stored data.</returns> public DownloadedResourceInfo StoreHtml( string textContent, Encoding encoding, UriResourceInfo uriInfo) { DownloadedResourceInfo result = new DownloadedResourceInfo( uriInfo, _settings.Options.DestinationFolderPath); try { if (result.LocalFilePath.Exists) { result.LocalFilePath.Delete(); } if (!result.LocalFilePath.Directory.Exists) { result.LocalFilePath.Directory.Create(); } Trace.WriteLine($"Writing text content to file '{result.LocalFilePath}'."); using (FileStream s = new FileStream(result.LocalFilePath.FullName, FileMode.Create, FileAccess.Write)) using (StreamWriter w = new StreamWriter(s, encoding)) { w.Write(textContent); } } catch (Exception x) { Trace.WriteLine($"Ignoring IO exception while storing HTML file: '{x.Message}'."); } return(result); }
/// <summary> /// Replace URIs inside a given HTML document that was previously /// downloaded with the local URIs. /// </summary> /// <returns>Returns the content text with the replaced links.</returns> public string ReplaceLinks( string textContent, UriResourceInfo uriInfo) { ResourceParser parser = new ResourceParser( _settings, uriInfo, textContent); List <UriResourceInfo> linkInfos = parser.ExtractLinks(); // For remembering duplicates. Dictionary <string, string> replacedLinks = new Dictionary <string, string>(); // -- foreach (UriResourceInfo linkInfo in linkInfos) { if (linkInfo.WantFollowUri || linkInfo.IsResourceUri) { DownloadedResourceInfo dlInfo = new DownloadedResourceInfo( linkInfo, _settings.Options.DestinationFolderPath); // /* if (!string.IsNullOrEmpty(linkInfo.OriginalUrl)) { string textContentBefore = textContent; string link = Regex.Escape(linkInfo.OriginalUrl); textContent = Regex.Replace( textContent, string.Format(@"""{0}""", link), string.Format(@"""{0}""", dlInfo.LocalFileName), RegexOptions.IgnoreCase | RegexOptions.Multiline); textContent = Regex.Replace( textContent, string.Format(@"'{0}'", link), string.Format(@"'{0}'", dlInfo.LocalFileName), RegexOptions.IgnoreCase | RegexOptions.Multiline); // For style-"url(...)"-links. textContent = Regex.Replace( textContent, string.Format(@"\(\s*{0}\s*\)", link), string.Format(@"({0})", dlInfo.LocalFileName), RegexOptions.IgnoreCase | RegexOptions.Multiline); // Some checking. // 2016-10-16, Uwe Keim. if (linkInfo.OriginalUrl != dlInfo.LocalFileName.Name && textContentBefore == textContent && !replacedLinks.ContainsKey(linkInfo.AbsoluteUri.AbsolutePath)) { throw new ApplicationException($"Failed to replace URI '{linkInfo.OriginalUrl}' with URI '{dlInfo.LocalFileName}' in HTML text '{textContent}'."); } else { // Remember. replacedLinks[linkInfo.AbsoluteUri.AbsolutePath] = linkInfo.AbsoluteUri.AbsolutePath; } } // */ } } // -- return(textContent); }