// ------------------------------------------------------------------ /// <summary> /// Constructor. /// </summary> /// <param name="settings">The settings.</param> /// <param name="uriInfo">The URI info.</param> /// <param name="textContent">Content of the text.</param> public ResourceParser( SpiderSettings settings, UriResourceInformation uriInfo, string textContent) { _settings = settings; _uriInfo = uriInfo; _textContent = textContent; }
// ------------------------------------------------------------------ /// <summary> /// Constructor. /// </summary> /// <param name="settings">The settings.</param> /// <param name="uriInfo">The URI info.</param> /// <param name="textContent">Content of the text.</param> public ResourceParser( SpiderSettings settings, UriResourceInformation uriInfo, string textContent ) { _settings = settings; _uriInfo = uriInfo; _textContent = textContent; }
public void SetOptions(WebSiteDownloaderOptions options) { Trace.WriteLine( string.Format( @"Constructing WebSiteDownloader for URI '{0}', destination folder path '{1}'.", options.DownloadUri, options.DestinationFolderPath)); _settings = SpiderSettings.Restore(options.DestinationFolderPath); _settings.Options = options; }
/// <summary> /// Restore a previously stored setting value from the given /// folder path. /// </summary> /// <returns>Returns an empty object if not found.</returns> public static SpiderSettings Restore( DirectoryInfo folderPath ) { string filePath = Path.Combine( folderPath.FullName, @"WebSiteDownloader.state" ); if ( File.Exists( filePath ) ) { try { BinaryFormatter serializer = new BinaryFormatter(); using ( FileStream reader = new FileStream( filePath, FileMode.Open, FileAccess.Read ) ) { SpiderSettings settings = (SpiderSettings)serializer.Deserialize( reader ); settings.Options = new WebSiteDownloaderOptions(); settings.Options.DestinationFolderPath = folderPath; if ( settings._temporaryDownloadedResourceInfos == null ) { settings._temporaryDownloadedResourceInfos = new List<DownloadedResourceInformation>(); } if ( settings._persistentDownloadedResourceInfos == null ) { settings._persistentDownloadedResourceInfos = new List<DownloadedResourceInformation>(); } if ( settings._continueDownloadedResourceInfos == null ) { settings._continueDownloadedResourceInfos = new List<DownloadedResourceInformation>(); } // Move from persistent storage back to memory. settings._temporaryDownloadedResourceInfos.Clear(); settings._temporaryDownloadedResourceInfos.AddRange( settings._persistentDownloadedResourceInfos ); Trace.WriteLine( string.Format( @"Successfully restored spider settings from file '{0}'. " + @"{1} temporary downloaded resources, " + @"{2} persistent downloaded resources, " + @"{3} continue downloaded resources.", filePath, settings._temporaryDownloadedResourceInfos.Count, settings._persistentDownloadedResourceInfos.Count, settings._continueDownloadedResourceInfos.Count ) ); return settings; } } catch ( SerializationException x ) { Trace.WriteLine( string.Format( @"Ignoring exception while deserializing spider settings: '{0}'.", x.Message ) ); SpiderSettings settings = new SpiderSettings(); settings.Options.DestinationFolderPath = folderPath; return settings; } catch ( IOException x ) { Trace.WriteLine( string.Format( @"Ignoring IO exception while loading spider settings: '{0}'.", x.Message ) ); SpiderSettings settings = new SpiderSettings(); settings.Options.DestinationFolderPath = folderPath; return settings; } catch ( UnauthorizedAccessException x ) { Trace.WriteLine( string.Format( @"Ignoring exception while loading spider settings: '{0}'.", x.Message ) ); SpiderSettings settings = new SpiderSettings(); settings.Options.DestinationFolderPath = folderPath; return settings; } } else { SpiderSettings settings = new SpiderSettings(); settings.Options.DestinationFolderPath = folderPath; return settings; } }
// ------------------------------------------------------------------ /// <summary> /// Constructor. /// </summary> public ResourceRewriter( SpiderSettings settings) { _settings = settings; }
// ------------------------------------------------------------------ /// <summary> /// Constructor. /// </summary> public ResourceStorer( SpiderSettings settings ) { _settings = settings; }
/// <summary> /// Restore a previously stored setting value from the given /// folder path. /// </summary> /// <returns>Returns an empty object if not found.</returns> public static SpiderSettings Restore( DirectoryInfo folderPath) { string filePath = Path.Combine( folderPath.FullName, @"WebSiteDownloader.state"); if (File.Exists(filePath)) { try { BinaryFormatter serializer = new BinaryFormatter(); using (FileStream reader = new FileStream( filePath, FileMode.Open, FileAccess.Read)) { SpiderSettings settings = (SpiderSettings)serializer.Deserialize(reader); settings.Options = new WebSiteDownloaderOptions(); settings.Options.DestinationFolderPath = folderPath; if (settings._temporaryDownloadedResourceInfos == null) { settings._temporaryDownloadedResourceInfos = new List <DownloadedResourceInformation>(); } if (settings._persistentDownloadedResourceInfos == null) { settings._persistentDownloadedResourceInfos = new List <DownloadedResourceInformation>(); } if (settings._continueDownloadedResourceInfos == null) { settings._continueDownloadedResourceInfos = new List <DownloadedResourceInformation>(); } // Move from persistent storage back to memory. settings._temporaryDownloadedResourceInfos.Clear(); settings._temporaryDownloadedResourceInfos.AddRange( settings._persistentDownloadedResourceInfos); Trace.WriteLine( string.Format( @"Successfully restored spider settings from file '{0}'. " + @"{1} temporary downloaded resources, " + @"{2} persistent downloaded resources, " + @"{3} continue downloaded resources.", filePath, settings._temporaryDownloadedResourceInfos.Count, settings._persistentDownloadedResourceInfos.Count, settings._continueDownloadedResourceInfos.Count )); return(settings); } } catch (SerializationException x) { Trace.WriteLine( string.Format( @"Ignoring exception while deserializing spider settings: '{0}'.", x.Message)); SpiderSettings settings = new SpiderSettings(); settings.Options.DestinationFolderPath = folderPath; return(settings); } catch (IOException x) { Trace.WriteLine( string.Format( @"Ignoring IO exception while loading spider settings: '{0}'.", x.Message)); SpiderSettings settings = new SpiderSettings(); settings.Options.DestinationFolderPath = folderPath; return(settings); } catch (UnauthorizedAccessException x) { Trace.WriteLine( string.Format( @"Ignoring exception while loading spider settings: '{0}'.", x.Message)); SpiderSettings settings = new SpiderSettings(); settings.Options.DestinationFolderPath = folderPath; return(settings); } } else { SpiderSettings settings = new SpiderSettings(); settings.Options.DestinationFolderPath = folderPath; return(settings); } }
// ------------------------------------------------------------------ /// <summary> /// Constructor. /// </summary> public ResourceStorer( SpiderSettings settings) { _settings = settings; }