Ejemplo n.º 1
0
 // ------------------------------------------------------------------
 /// <summary>
 /// Constructor.
 /// </summary>
 /// <param name="settings">The settings.</param>
 /// <param name="uriInfo">The URI info.</param>
 /// <param name="textContent">Content of the text.</param>
 public ResourceParser(
     SpiderSettings settings,
     UriResourceInformation uriInfo,
     string textContent)
 {
     _settings = settings;
     _uriInfo = uriInfo;
     _textContent = textContent;
 }
Ejemplo n.º 2
0
        // ------------------------------------------------------------------
        /// <summary>
        /// Initializes a new instance of the <see cref="WebSiteDownloader"/> 
        /// class.
        /// </summary>
        /// <param name="options">The options.</param>
        public WebSiteDownloader(
            WebSiteDownloaderOptions options)
        {
            Console.WriteLine(
                string.Format(
                    @"Constructing WebSiteDownloader for URI '{0}', destination folder path '{1}'.",
                    options.DownloadUri,
                    options.DestinationFolderPath));

            _settings = SpiderSettings.Restore(options.DestinationFolderPath, options.DestinationFileName);
            _settings.Options = options;
        }
Ejemplo n.º 3
0
 // ------------------------------------------------------------------
 /// <summary>
 /// Constructor.
 /// </summary>
 public ResourceRewriter(
     SpiderSettings settings)
 {
     _settings = settings;
 }
Ejemplo n.º 4
0
        /// <summary>
        /// Restore a previously stored setting value from the given
        /// folder path.
        /// </summary>
        /// <returns>Returns an empty object if not found.</returns>
        public static SpiderSettings Restore(
            DirectoryInfo folderPath, String fileName)
        {
            string filePath = Path.Combine(
                folderPath.FullName,
                fileName);

            if (File.Exists(filePath))
            {
                try
                {
                    BinaryFormatter serializer =
                        new BinaryFormatter();
                    using (FileStream reader = new FileStream(
                        filePath,
                        FileMode.Open,
                        FileAccess.Read))
                    {
                        SpiderSettings settings =
                            (SpiderSettings)serializer.Deserialize(reader);

                        settings.Options = new WebSiteDownloaderOptions();
                        settings.Options.DestinationFolderPath = folderPath;

                        //Laod temporary resources (Downloaded, but not processed).
                        if (settings._temporaryDownloadedResourceInfos == null)
                        {
                            settings._temporaryDownloadedResourceInfos =
                                new List<DownloadedResourceInformation>();
                        }

                        //Load persisted resources (Downloaded and processed)
                        if (settings._persistentDownloadedResourceInfos == null)
                        {
                            settings._persistentDownloadedResourceInfos =
                                new List<DownloadedResourceInformation>();
                        }

                        //Load persisted resources (Downloaded and processed)
                        if (settings._persistentCollectorResourceInfos == null)
                        {
                            settings._persistentCollectorResourceInfos =
                                new List<iCollector>();
                        }

                        //Not sure yet what counts as a continue download.
                        if (settings._continueDownloadedResourceInfos == null)
                        {
                            settings._continueDownloadedResourceInfos =
                                new List<DownloadedResourceInformation>();
                        }

                        // Move from persistent storage back to memory.
                        settings._temporaryDownloadedResourceInfos.Clear();
                        settings._temporaryDownloadedResourceInfos.AddRange(
                            settings._persistentDownloadedResourceInfos);

                        Console.WriteLine(
                            string.Format(
                                @"Successfully restored spider settings from file '{0}'. " +
                                    @"{1} temporary downloaded resources, " +
                                        @"{2} persistent downloaded resources, " +
                                            @"{3} continue downloaded resources.",
                                filePath,
                                settings._temporaryDownloadedResourceInfos.Count,
                                settings._persistentDownloadedResourceInfos.Count,
                                settings._continueDownloadedResourceInfos.Count
                                ));

                        return settings;

                    }
                }
                catch (SerializationException x)
                {
                    Console.WriteLine(
                        string.Format(
                        @"Ignoring exception while deserializing spider settings: '{0}'.",
                        x.Message));

                    SpiderSettings settings = new SpiderSettings();
                    settings.Options.DestinationFolderPath = folderPath;

                    return settings;
                }
                catch (IOException x)
                {
                    Console.WriteLine(
                        string.Format(
                        @"Ignoring IO exception while loading spider settings: '{0}'.",
                        x.Message));

                    SpiderSettings settings = new SpiderSettings();
                    settings.Options.DestinationFolderPath = folderPath;

                    return settings;
                }
                catch (UnauthorizedAccessException x)
                {
                    Console.WriteLine(
                        string.Format(
                        @"Ignoring exception while loading spider settings: '{0}'.",
                        x.Message));

                    SpiderSettings settings = new SpiderSettings();
                    settings.Options.DestinationFolderPath = folderPath;

                    return settings;
                }
            }
            else
            {
                SpiderSettings settings = new SpiderSettings();
                settings.Options.DestinationFolderPath = folderPath;

                return settings;
            }
        }
Ejemplo n.º 5
0
        // ------------------------------------------------------------------
        /// <summary>
        /// Constructor.
        /// </summary>
        public ResourceStorer(
			SpiderSettings settings )
        {
            _settings = settings;
        }