DownloadedResourceInformation C# (CSharp)のコード例

コード例 #1

0

ファイルを表示

ファイル: ResourceStorer.cs プロジェクト: vearvindkumar/Scrapper

		/// <summary>
		/// Stores a binary resource to the local file system.
		/// </summary>
		/// <returns>Return the info about the stored data.</returns>
		public DownloadedResourceInformation StoreBinary(
			byte[] binaryContent,
			UriResourceInformation uriInfo )
		{
			DownloadedResourceInformation result =
				new DownloadedResourceInformation(
				uriInfo,
				_settings.Options.DestinationFolderPath );

			try
			{
				if ( result.LocalFilePath.Exists )
				{
					result.LocalFilePath.Delete();
				}

				if ( binaryContent != null && binaryContent.Length > 0 )
				{
					Trace.WriteLine(
						string.Format(
						@"Writing binary content to file '{0}'.",
						result.LocalFilePath ) );

					using ( FileStream s = result.LocalFilePath.OpenWrite() )
					{
						s.Write( binaryContent, 0, binaryContent.Length );
					}
				}
			}
			catch ( IOException x )
			{
				Trace.WriteLine(
					string.Format(
					@"Ignoring IO exception while storing binary file: '{0}'.",
					x.Message ) );
			}
			catch ( UnauthorizedAccessException x )
			{
				Trace.WriteLine(
					string.Format(
					@"Ignoring exception while storing binary file: '{0}'.",
					x.Message ) );
			}

			return result;
		}

コード例 #2

0

ファイルを表示

ファイル: ResourceRewriter.cs プロジェクト: ragingsmurf/myLegis

        /// <summary>
        /// Replace URIs inside a given HTML document that was previously 
        /// downloaded with the local URIs.
        /// </summary>
        /// <returns>Returns the content text with the replaced links.</returns>
        public string ReplaceLinks(
            string textContent,
            UriResourceInformation uriInfo)
        {
            ResourceParser parser = new ResourceParser(
                _settings,
                uriInfo,
                textContent);

            List<UriResourceInformation> linkInfos =
                parser.ExtractLinks();

            // For remembering duplicates.
            Dictionary<string, string> replacedLinks =
                new Dictionary<string, string>();

            foreach (UriResourceInformation linkInfo in linkInfos)
            {
                if (linkInfo.WantFollowUri || linkInfo.IsResourceUri)
                {
                    DownloadedResourceInformation dlInfo =
                        new DownloadedResourceInformation(
                        linkInfo,
                        _settings.Options.DestinationFolderPath);

                    if (!string.IsNullOrEmpty(linkInfo.OriginalUrl))
                    {

                        string textContentBefore = textContent;

                        string link = Regex.Escape(linkInfo.OriginalUrl);

                        textContent = Regex.Replace(
                            textContent,
                            string.Format(@"""{0}""", link),
                            string.Format(@"""{0}""", dlInfo.LocalFileName),
                            RegexOptions.IgnoreCase | RegexOptions.Multiline);

                        textContent = Regex.Replace(
                            textContent,
                            string.Format(@"'{0}'", link),
                            string.Format(@"'{0}'", dlInfo.LocalFileName),
                            RegexOptions.IgnoreCase | RegexOptions.Multiline);

                        // For style-"url(...)"-links.
                        textContent = Regex.Replace(
                            textContent,
                            string.Format(@"\(\s*{0}\s*\)", link),
                            string.Format(@"({0})", dlInfo.LocalFileName),
                            RegexOptions.IgnoreCase | RegexOptions.Multiline);

                        // Some checking.
                        // 2007-07-27, Uwe Keim.
                        if (linkInfo.OriginalUrl != dlInfo.LocalFileName.Name &&
                            textContentBefore == textContent &&
                            !replacedLinks.ContainsKey(linkInfo.AbsoluteUri.AbsolutePath))
                        {
                            throw new ApplicationException(
                                string.Format(
                                    @"Failed to replace URI '{0}' with URI '{1}' in HTML text '{2}'.",
                                    linkInfo.OriginalUrl,
                                    dlInfo.LocalFileName,
                                    textContent));
                        }
                        else
                        {
                            // Remember.
                            replacedLinks[linkInfo.AbsoluteUri.AbsolutePath] =
                                linkInfo.AbsoluteUri.AbsolutePath;
                        }
                    }
                    //					*/
                }
            }

            // --

            return textContent;
        }

コード例 #3

0

ファイルを表示

ファイル: SpiderSettings.cs プロジェクト: vearvindkumar/Scrapper

		/// <summary>
		/// The URLs where to continue parsing when the stack trace gets too deep.
		/// </summary>
		/// <value>The continue downloaded resource infos.</value>
		public void AddContinueDownloadedResourceInfos(
			DownloadedResourceInformation resourceInfo )
		{
			if ( _continueDownloadedResourceInfos.Contains( resourceInfo ) )
			{
				_continueDownloadedResourceInfos.Remove( resourceInfo );
			}

			_continueDownloadedResourceInfos.Add( resourceInfo );
			Persist();
		}

コード例 #4

0

ファイルを表示

ファイル: SpiderSettings.cs プロジェクト: vearvindkumar/Scrapper

		/// <summary>
		/// Persist information about a downloaded resource.
		/// </summary>
		/// <param name="uriInfo">The URI info.</param>
		public void PersistDownloadedResourceInfo(
			DownloadedResourceInformation uriInfo )
		{
			int foundPosition =
				_temporaryDownloadedResourceInfos.IndexOf(
				uriInfo );

			DownloadedResourceInformation foundInfo =
				_temporaryDownloadedResourceInfos[foundPosition];

			// --

			// Move over.
			if ( _persistentDownloadedResourceInfos.Contains( foundInfo ) )
			{
				_persistentDownloadedResourceInfos.Remove( foundInfo );
			}

			_persistentDownloadedResourceInfos.Add( foundInfo );

			// And store.
			Persist();
		}

コード例 #5

0

ファイルを表示

ファイル: SpiderSettings.cs プロジェクト: vearvindkumar/Scrapper

		/// <summary>
		/// Add information about a downloaded resource.
		/// </summary>
		/// <param name="info">The info.</param>
		public void AddDownloadedResourceInfo(
			DownloadedResourceInformation info )
		{
			if ( _temporaryDownloadedResourceInfos.Contains( info ) )
			{
				_temporaryDownloadedResourceInfos.Remove( info );
			}

			_temporaryDownloadedResourceInfos.Add( info );
		}

コード例 #6

0

ファイルを表示

ファイル: SpiderSettings.cs プロジェクト: vearvindkumar/Scrapper

		/// <summary>
		/// Check whether a file was already downloaded.
		/// </summary>
		/// <param name="uriInfo">The URI info.</param>
		/// <returns>
		/// 	<c>true</c> if [has downloaded URI] [the specified URI info]; 
		/// otherwise, <c>false</c>.
		/// </returns>
		public bool HasDownloadedUri(
			DownloadedResourceInformation uriInfo )
		{
			// Search whether exists in list.
			int foundPosition =
				_temporaryDownloadedResourceInfos.IndexOf(
				uriInfo );

			if ( foundPosition < 0 )
			{
				return false;
			}
			else
			{
				// Found. Check various attributes.
				DownloadedResourceInformation foundInfo =
					_temporaryDownloadedResourceInfos[foundPosition];

				if ( foundInfo.AddedByProcessID ==
					Process.GetCurrentProcess().Id )
				{
					return true;
				}
				else if ( foundInfo.DateAdded.AddHours( 10 ) > DateTime.Now )
				{
					return true;
				}
				else
				{
					return foundInfo.FileExists;
				}
			}
		}

コード例 #7

0

ファイルを表示

ファイル: ResourceStorer.cs プロジェクト: vearvindkumar/Scrapper

		/// <summary>
		/// Stores a HTML resource to the local file system.
		/// Does no hyperlink replacement.
		/// </summary>
		/// <returns>Return the info about the stored data.</returns>
		public DownloadedResourceInformation StoreHtml(
			string textContent,
			Encoding encoding,
			UriResourceInformation uriInfo )
		{
			DownloadedResourceInformation result =
				new DownloadedResourceInformation(
				uriInfo,
				_settings.Options.DestinationFolderPath );

			try
			{
				if ( result.LocalFilePath.Exists )
				{
					result.LocalFilePath.Delete();
				}

				if ( !result.LocalFilePath.Directory.Exists )
				{
					result.LocalFilePath.Directory.Create();
				}

				Trace.WriteLine(
					string.Format(
					@"Writing text content to file '{0}'.",
					result.LocalFilePath ) );

				using ( FileStream s = new FileStream(
					result.LocalFilePath.FullName,
					FileMode.Create,
					FileAccess.Write ) )
				using ( StreamWriter w = new StreamWriter( s, encoding ) )
				{
					w.Write( textContent );
				}
			}
			catch ( IOException x )
			{
				Trace.WriteLine(
					string.Format(
					@"Ignoring IO exception while storing HTML file: '{0}'.",
					x.Message ) );
			}
			catch ( UnauthorizedAccessException x )
			{
				Trace.WriteLine(
					string.Format(
					@"Ignoring exception while storing HTML file: '{0}'.",
					x.Message ) );
			}

			return result;
		}

コード例 #8

0

ファイルを表示

ファイル: WebSiteDownloader.cs プロジェクト: vearvindkumar/Scrapper

		// ------------------------------------------------------------------
		#endregion

		#region Private methods.
		// ------------------------------------------------------------------

		/// <summary>
		/// Process one single URI with a document behind (i.e. no
		/// resource URI).
		/// </summary>
		/// <param name="uriInfo">The URI info.</param>
		/// <param name="depth">The depth.</param>
		private void ProcessUrl(
			DownloadedResourceInformation uriInfo,
			int depth )
		{
			Trace.WriteLine(
				string.Format(
					@"Processing URI '{0}', with depth {1}.",
					uriInfo.AbsoluteUri.AbsoluteUri,
					depth ) );

			if ( _settings.Options.MaximumLinkDepth > 0 &&
				depth > _settings.Options.MaximumLinkDepth )
			{
				Trace.WriteLine(
					string.Format(
						@"Depth {1} exceeds maximum configured depth. Ending recursion " +
							@"at URI '{0}'.",
						uriInfo.AbsoluteUri.AbsoluteUri,
						depth ) );
			}
			else if ( depth > _maxDepth )
			{
				Trace.WriteLine(
					string.Format(
						@"Depth {1} exceeds maximum allowed recursion depth. " +
							@"Ending recursion at URI '{0}' to possible continue later.",
						uriInfo.AbsoluteUri.AbsoluteUri,
						depth ) );

				// Add myself to start there later.
				// But only if not yet process, otherwise we would never finish.
				if ( _settings.HasDownloadedUri( uriInfo ) )
				{
					Trace.WriteLine(
						string.Format(
							@"URI '{0}' was already downloaded. NOT continuing later.",
							uriInfo.AbsoluteUri.AbsoluteUri ) );
				}
				else
				{
					_settings.AddDownloadedResourceInfo( uriInfo );

					// Finished the function.

					Trace.WriteLine(
						string.Format(
							@"Added URI '{0}' to continue later.",
							uriInfo.AbsoluteUri.AbsoluteUri ) );
				}
			}
			else
			{
				// If we are in asynchron mode, periodically check for stopps.
				if ( processAsyncBackgroundWorker != null )
				{
					if ( processAsyncBackgroundWorker.CancellationPending )
					{
						//throw new StopProcessingException();
					}
				}

				// --

				// Notify event sinks about this URL.
				if ( ProcessingUrl != null )
				{
					ProcessingUrlEventArgs e = new ProcessingUrlEventArgs(
						uriInfo,
						depth );

					ProcessingUrl( this, e );
				}

				// --

				if ( uriInfo.IsProcessableUri )
				{
					if ( _settings.HasDownloadedUri( uriInfo ) )
					{
						Trace.WriteLine(
							string.Format(
								@"URI '{0}' was already downloaded. Skipping.",
								uriInfo.AbsoluteUri.AbsoluteUri ) );
					}
					else
					{
						Trace.WriteLine(
							string.Format(
								@"URI '{0}' was not already downloaded. Processing.",
								uriInfo.AbsoluteUri.AbsoluteUri ) );

						if ( uriInfo.LinkType == UriType.Resource )
						{
							Trace.WriteLine(
								string.Format(
									@"Processing resource URI '{0}', with depth {1}.",
									uriInfo.AbsoluteUri.AbsoluteUri,
									depth ) );

							byte[] binaryContent;

							ResourceDownloader.DownloadBinary(
								uriInfo.AbsoluteUri,
								out binaryContent,
								_settings.Options );

							ResourceStorer storer =
								new ResourceStorer( _settings );

							storer.StoreBinary(
								binaryContent,
								uriInfo );

							_settings.AddDownloadedResourceInfo( uriInfo );
							_settings.PersistDownloadedResourceInfo( uriInfo );
						}
						else
						{
							Trace.WriteLine(
								string.Format(
									@"Processing content URI '{0}', with depth {1}.",
									uriInfo.AbsoluteUri.AbsoluteUri,
									depth ) );

							string textContent;
							string encodingName;
							Encoding encoding;
							byte[] binaryContent;

							ResourceDownloader.DownloadHtml(
								uriInfo.AbsoluteUri,
								out textContent,
								out encodingName,
								out encoding,
								out binaryContent,
								_settings.Options );

							ResourceParser parser = new ResourceParser(
								_settings,
								uriInfo,
								textContent );

							List<UriResourceInformation> linkInfos =
								parser.ExtractLinks();

							ResourceRewriter rewriter =
								new ResourceRewriter( _settings );
							textContent = rewriter.ReplaceLinks(
								textContent,
								uriInfo );

							ResourceStorer storer =
								new ResourceStorer( _settings );

							storer.StoreHtml(
								textContent,
								encoding,
								uriInfo );

							// Add before parsing childs.
							_settings.AddDownloadedResourceInfo( uriInfo );

							foreach ( UriResourceInformation linkInfo in linkInfos )
							{
								DownloadedResourceInformation dlInfo =
									new DownloadedResourceInformation(
										linkInfo,
										uriInfo.LocalFolderPath,
										uriInfo.LocalBaseFolderPath );

								// Recurse.
								ProcessUrl( dlInfo, depth + 1 );

								// Do not return or break immediately if too deep, 
								// because this would omit certain pages at this
								// recursion level.
							}

							// Persist after completely parsed childs.
							_settings.PersistDownloadedResourceInfo( uriInfo );
						}

						Trace.WriteLine(
							string.Format(
								@"Finished processing URI '{0}'.",
								uriInfo.AbsoluteUri.AbsoluteUri ) );
					}
				}
				else
				{
					Trace.WriteLine(
						string.Format(
							@"URI '{0}' is not processable. Skipping.",
							uriInfo.AbsoluteUri.AbsoluteUri ) );
				}
			}
		}

コード例 #9

0

ファイルを表示

ファイル: WebSiteDownloader.cs プロジェクト: vearvindkumar/Scrapper

		/// <summary>
		/// Performs the complete downloading (synchronously). 
		/// Does return only when completely finished or when an exception
		/// occured.
		/// </summary>
		public void Process()
		{
			string baseUrl =
				_settings.Options.DownloadUri.OriginalString.TrimEnd( '/' ).
					Split( '?' )[0];

			if ( _settings.Options.DownloadUri.AbsolutePath.IndexOf( '/' ) >= 0 &&
				_settings.Options.DownloadUri.AbsolutePath.Length > 1 )
			{
				baseUrl = baseUrl.Substring( 0, baseUrl.LastIndexOf( '/' ) );
			}

			// --

			// The URI that is configured to be the start URI.
			Uri baseUri = new Uri( baseUrl, UriKind.Absolute );

			// The initial seed.
			DownloadedResourceInformation seedInfo =
				new DownloadedResourceInformation(
					_settings.Options,
					@"/",
					_settings.Options.DownloadUri,
					baseUri,
					_settings.Options.DestinationFolderPath,
					_settings.Options.DestinationFolderPath,
					UriType.Content );

			// --

			// Add the first one as the seed.
			if ( !_settings.HasContinueDownloadedResourceInfos )
			{
				_settings.AddContinueDownloadedResourceInfos( seedInfo );
			}

			// 2007-07-27, Uwe Keim:
			// Doing a multiple looping, to avoid stack overflows.
			// Since a download-"tree" (i.e. the hierachy of all downloadable
			// pages) can get _very_ deep, process one part at a time only.
			// The state is already persisted, so we need to set up again at
			// the previous position.
			int index = 0;
			while ( _settings.HasContinueDownloadedResourceInfos )
			{
				// Fetch one.
				DownloadedResourceInformation processInfo =
					_settings.PopContinueDownloadedResourceInfos();

				Trace.WriteLine(
					string.Format(
						@"{0}. loop: Starting processing URLs from '{1}'.",
						index + 1,
						processInfo.AbsoluteUri.AbsoluteUri ) );

				// Process the URI, add any continue URIs to start
				// again, later.
				ProcessUrl( processInfo, 0 );

				index++;
			}

			Trace.WriteLine(
				string.Format(
					@"{0}. loop: Finished processing URLs from seed '{1}'.",
					index + 1,
					_settings.Options.DownloadUri ) );
		}

コード例 #10

0

ファイルを表示

ファイル: WebSiteDownloader.cs プロジェクト: vearvindkumar/Scrapper

			/// <summary>
			/// Constructor.
			/// </summary>
			internal ProcessingUrlEventArgs(
				DownloadedResourceInformation uriInfo,
				int depth )
			{
				this.uriInfo = uriInfo;
				this.depth = depth;
			}

コード例 #11

0

ファイルを表示

ファイル: SpiderSettings.cs プロジェクト: ragingsmurf/myLegis

        /// <summary>
        /// Persist information about a downloaded resource. (Permanent Storage)
        /// </summary>
        /// <param name="uriInfo">The URI info.</param>
        public void PersistDownloadedResourceInfo(
            DownloadedResourceInformation uriInfo)
        {
            //Find resource in temporary bucket
            int foundPosition =
                _temporaryDownloadedResourceInfos.IndexOf(
                uriInfo);

            //Grab resource
            DownloadedResourceInformation foundInfo =
                _temporaryDownloadedResourceInfos[foundPosition];

            // Move over from temp to persisted.
            if (_persistentDownloadedResourceInfos.Contains(foundInfo))
            {
                _persistentDownloadedResourceInfos.Remove(foundInfo);
            }

            _persistentDownloadedResourceInfos.Add(foundInfo);

            // Save in spider store.
            Persist();
        }

コード例 #12

0

ファイルを表示

ファイル: SpiderSettings.cs プロジェクト: ragingsmurf/myLegis

        /// <summary>
        /// Check whether a file was already downloaded.
        /// </summary>
        /// <param name="uriInfo">The URI info.</param>
        /// <returns>
        /// 	<c>true</c> if [has downloaded URI] [the specified URI info]; 
        /// otherwise, <c>false</c>.
        /// </returns>
        public bool HasDownloadedUri(
            DownloadedResourceInformation uriInfo)
        {
            // Search whether exists in the unsaved list.
            int foundPosition =
                _temporaryDownloadedResourceInfos.IndexOf(
                uriInfo);

            if (foundPosition < 0)
            {
                return false; //Not Found.
            }
            else
            {
                // Found. Check various attributes.
                DownloadedResourceInformation foundInfo =
                    _temporaryDownloadedResourceInfos[foundPosition];

                //If downloaded by the process running
                if (foundInfo.AddedByProcessID ==
                    Process.GetCurrentProcess().Id)
                {
                    return true;
                }
                else if (foundInfo.DateAdded.AddHours(10) > DateTime.Now)
                {
                    return true;//If processed less than 10 hr ago.
                }
                else
                {
                    return foundInfo.FileExists;// Does file exist?
                }
            }
        }

コード例 #13

0

ファイルを表示

ファイル: SpiderSettings.cs プロジェクト: ragingsmurf/myLegis

        /// <summary>
        /// The URLs where to continue parsing when the stack trace gets too deep.
        /// </summary>
        /// <value>The continue downloaded resource infos.</value>
        public void AddContinueDownloadedResourceInfos(
            DownloadedResourceInformation resourceInfo)
        {
            //If in the array, remove it and then re-add it.
            if (_continueDownloadedResourceInfos.Contains(resourceInfo))
            {
                _continueDownloadedResourceInfos.Remove(resourceInfo);
            }

            _continueDownloadedResourceInfos.Add(resourceInfo);

            //Save
            Persist();
        }

コード例 #14

0

ファイルを表示

ファイル: WebSiteDownloader.cs プロジェクト: ragingsmurf/myLegis

        // ------------------------------------------------------------------
        /// <summary>
        /// Process one single URI with a document behind (i.e. no
        /// resource URI).
        /// </summary>
        /// <param name="uriInfo">The URI info.</param>
        /// <param name="depth">The depth.</param>
        private void ProcessUrl(
            DownloadedResourceInformation uriInfo,
            int depth, List<iCollector> git, List<iFollower> folo)
        {
            Console.WriteLine(
                string.Format(
                    @"Processing URI '{0}', with depth {1}.",
                    uriInfo.AbsoluteUri.AbsoluteUri,
                    depth));

            bool blnFollow = true;
            //Check to see if the uriInfo is followable by an internal list
            if (depth > 0)
                blnFollow = ((from f in folo.ToList()
                              let matches = f.pattern.Matches(uriInfo.AbsoluteUri.AbsoluteUri.ToString())
                              where matches.Count > 0 && f.depth == depth
                              select f).Count()) > 0 ? true : false;  //return true if follow exists.

            if (_settings.Options.MaximumLinkDepth > -1 &&
                depth > _settings.Options.MaximumLinkDepth)
            {
                Console.WriteLine(
                    string.Format(
                        @"Depth {1} exceeds maximum configured depth. Ending recursion " +
                            @"at URI '{0}'.",
                        uriInfo.AbsoluteUri.AbsoluteUri,
                        depth));
            }
            else if (!blnFollow)
            {
                Console.WriteLine(
                    string.Format(
                        @"Follower {1} exceeds maximum configured depth. Not following " +
                            @" URI '{0}'.",
                        uriInfo.AbsoluteUri.AbsoluteUri,
                        depth));

                //Fake our way into it.
                //_settings.AddDownloadedResourceInfo(uriInfo);

            }
            else if (depth > _maxDepth)
            {
                Console.WriteLine(
                    string.Format(
                        @"Depth {1} exceeds maximum allowed recursion depth. " +
                            @"Ending recursion at URI '{0}' to possible continue later.",
                        uriInfo.AbsoluteUri.AbsoluteUri,
                        depth));

                // Add myself to start there later.
                // But only if not yet process, otherwise we would never finish.
                if (_settings.HasDownloadedUri(uriInfo))
                {
                    Console.WriteLine(
                        string.Format(
                            @"URI '{0}' was already downloaded. NOT continuing later.",
                            uriInfo.AbsoluteUri.AbsoluteUri));
                }
                else
                {
                    _settings.AddDownloadedResourceInfo(uriInfo);

                    // Finished the function.

                    Console.WriteLine(
                        string.Format(
                            @"Added URI '{0}' to continue later.",
                            uriInfo.AbsoluteUri.AbsoluteUri));
                }
            }
            else
            {
                // If we are in asynchron mode, periodically check for stops.
                if (processAsyncBackgroundWorker != null)
                {
                    if (processAsyncBackgroundWorker.CancellationPending)
                    {
                        throw new StopProcessingException();
                    }
                }

                // --

                // Notify event sinks about this URL.
                if (ProcessingUrl != null)
                {
                    ProcessingUrlEventArgs e = new ProcessingUrlEventArgs(
                        uriInfo,
                        depth);

                    ProcessingUrl(this, e);
                }

                // --

                if (uriInfo.IsProcessableUri)
                {

                    if (_settings.HasDownloadedUri(uriInfo))
                    {
                        Console.WriteLine(
                            string.Format(
                                @"URI '{0}' was already downloaded. Skipping.",
                                uriInfo.AbsoluteUri.AbsoluteUri));
                    }
                    else
                    {

                        Console.WriteLine(
                            string.Format(
                                @"URI '{0}' was not already downloaded. Processing.",
                                uriInfo.AbsoluteUri.AbsoluteUri));

                        //Switch case variables.
                        string textContent;
                        string encodingName;
                        Encoding encoding;
                        byte[] binaryContent;

                        //Local storage.
                        ResourceStorer storer = new ResourceStorer(_settings);
                        ResourceParser parser = null;
                        List<UriResourceInformation> linkInfos = null;
                        List<iCollector> req = null;

                        switch (uriInfo.LinkType)
                        {
                            case UriType.Content:

                                Console.WriteLine(string.Format(@"Processing content URI '{0}', with depth {1}.",
                                                                uriInfo.AbsoluteUri.AbsoluteUri, depth));

                                //Grab the page content.
                                ResourceDownloader.DownloadHtml(
                                    uriInfo.AbsoluteUri,
                                    out textContent,
                                    out encodingName,
                                    out encoding,
                                    out binaryContent,
                                    _settings.Options);

                                //Fire-up resource parser (A, FORMS, IMG) parser.
                                parser = new ResourceParser(
                                    _settings,
                                    uriInfo,
                                    textContent);

                                //Grab all the Git collector requests, that match the Uri.
                                req = (from g in _settings.Options.GitCollectionRequest
                                       where g.pageType == UriType.Content
                                       && uriInfo.AbsoluteUri.AbsoluteUri.ToString().Contains(g.pageName)
                                       && !(from o in _settings.Parsings
                                            where o.pageType == g.pageType
                                            select o.source.AbsoluteUri.AbsoluteUri)
                                           .Contains(uriInfo.AbsoluteUri.AbsoluteUri)
                                       select g.Clone()).ToList();

                                //Have valid requests?
                                if (req.Count() > 0)
                                {
                                    //Persist the collector results.
                                    _settings.PersistCollectorResultInfo(parser.ExtractCollectorRequest(req));
                                }

                                //Process link extraction.
                                linkInfos = parser.ExtractLinks();

                                // Add before parsing childs.
                                _settings.AddDownloadedResourceInfo(uriInfo);

                                foreach (UriResourceInformation linkInfo in linkInfos)
                                {
                                    DownloadedResourceInformation dlInfo =
                                        new DownloadedResourceInformation(
                                            linkInfo,
                                            uriInfo.LocalFolderPath,
                                            uriInfo.LocalBaseFolderPath,
                                            linkInfo.Parent,
                                            depth + 1);

                                    // Recurse.
                                    ProcessUrl(dlInfo, depth + 1, git, folo);

                                    // Do not return or break immediately if too deep,
                                    // because this would omit certain pages at this
                                    // recursion level.
                                }

                                // Persist after completely parsed childs.
                                _settings.PersistDownloadedResourceInfo(uriInfo);
                                break;
                            case UriType.Resource:

                                //Console.WriteLine(
                                //string.Format(
                                //    @"Processing resource URI '{0}', with depth {1}.",
                                //    uriInfo.AbsoluteUri.AbsoluteUri,
                                //    depth));

                                //Scrape Resource (IMG, JS)
                                //ResourceDownloader.DownloadBinary(
                                //    uriInfo.AbsoluteUri,
                                //    out binaryContent,
                                //    _settings.Options);

                                //storer = new ResourceStorer(_settings);
                                //Save the resource (IMG, JS)
                                //storer.StoreBinary(
                                //    binaryContent,
                                //    uriInfo);

                                //Act like we did it.
                                _settings.AddDownloadedResourceInfo(uriInfo);
                                _settings.PersistDownloadedResourceInfo(uriInfo);

                                break;
                            case UriType.Form:
                                Console.WriteLine(
                                   string.Format(
                                       @"Processing Form POST to URI '{0}', with depth {1}.",
                                       uriInfo.AbsoluteUri.AbsoluteUri,
                                       depth));

                                //Grab the Form response content.
                                ResourceDownloader.DownloadForm(
                                    uriInfo.AbsoluteUri,
                                    out textContent,
                                    out encodingName,
                                    out encoding,
                                    out binaryContent,
                                    _settings.Options);

                                //Fire-up resource parser (A, FORMS, IMG) parser.
                                parser = new ResourceParser(
                                   _settings,
                                   uriInfo,
                                   textContent);

                                var w = _settings.Parsings;

                                //Grab all the Git collector requests, that match the Uri.
                                req = (from g in _settings.Options.GitCollectionRequest
                                       where g.pageType == UriType.Form
                                       && uriInfo.AbsoluteUri.AbsoluteUri.ToString().Contains(g.pageName)
                                       && !(from o in _settings.Parsings
                                            where o.pageType == g.pageType
                                            select o.source.AbsoluteUri.AbsoluteUri)
                                           .Contains(uriInfo.AbsoluteUri.AbsoluteUri)
                                       select g.Clone()).ToList();

                                //Have requests?
                                if (req.Count() > 0)
                                {
                                    //Persist the collector results.
                                    _settings.PersistCollectorResultInfo(parser.ExtractCollectorRequest(req));
                                }

                                //Process link extraction.
                                linkInfos = parser.ExtractLinks();

                                // Add before parsing childs.
                                _settings.AddDownloadedResourceInfo(uriInfo);

                                foreach (UriResourceInformation linkInfo in linkInfos)
                                {
                                    DownloadedResourceInformation dlInfo =
                                        new DownloadedResourceInformation(
                                            linkInfo,
                                            uriInfo.LocalFolderPath,
                                            uriInfo.LocalBaseFolderPath,
                                            linkInfo.Parent,
                                            depth + 1);

                                    // Recurse.
                                    ProcessUrl(dlInfo, depth + 1, git, folo);

                                    // Do not return or break immediately if too deep,
                                    // because this would omit certain pages at this
                                    // recursion level.
                                }

                                // Persist after completely parsed childs.
                                _settings.PersistDownloadedResourceInfo(uriInfo);

                                break;
                            default:
                                break;
                        }

                        Console.WriteLine(
                            string.Format(
                                @"Finished processing URI '{0}'.",
                                uriInfo.AbsoluteUri.AbsoluteUri));

                    }
                }
                else
                {

                    Console.WriteLine(
                        string.Format(
                            @"URI '{0}' is not processable. Skipping.",
                            uriInfo.AbsoluteUri.AbsoluteUri));

                }
            }
        }

C# (CSharp) DownloadedResourceInformationの例