예제 #1
0
 private void DownloadPage()
 {
     try
     {
         WebRequest request = HttpWebRequest.Create(Uri);
         WebResponse response = request.GetResponse();
         using (StreamReader streamReader =
             new StreamReader(response.GetResponseStream()))
         {
             DownloadedPage = new Page(streamReader.ReadToEnd(), Uri);
         }
     }
     catch (WebException ex)
     {
         Console.WriteLine("Network or protocol error : {0}", ex.Message);
     }
     catch (NotSupportedException ex)
     {
         Console.WriteLine("URI format not supported : {0}", ex.Message);
     }
     finally
     {
         //The completed flag is set true no matter what the
         //outcome, so the dispatcher thread can dispose of
         //the fetcher appropriately.
         Completed = true;
     }
 }
예제 #2
0
 //When a page is saved, two files are created on the filesystem,
 //in the appropriate folder:
 //a <pagehash>.link file, which contains one line only - the
 //absolute URI of the page and a <pagehash> file, containing the
 //actual body of the page.
 //The write operation is locked in order to avoid
 //IO problems with possible concurrent writes
 public void Save(Page page)
 {
     String path = Path.Combine(_targetFolder.FullName, page.Hash);
     lock (_lockObject)
     {
         using (var writer = new StreamWriter(path + ".link"))
         {
             writer.WriteLine(page.Uri.AbsoluteUri);
         }
         page.Document.Save(Path.Combine(_targetFolder.FullName, page.Hash));
     }
 }
예제 #3
0
 //Every time a new page is fetched, if not in cache,
 //an event is raised. The event is listened
 //by the main program, which decides what to do based on context.
 //Every URL on the page which is not excluded by any
 //of the filtering criteria is then added to the queue, and round it goes.
 public void OnPageLoaded(Page page)
 {
     if (NewPageFetched != null && _cache.Get(page.Uri) == null)
     {
         _cache.Add(page);
         NewPageFetched(page);
     }
     foreach (Uri link in page.Links
         .Where(x => _linkFilters.All(y => !y.Matches(x))))
     {
         _queue.Enqueue(new Fetcher(link));
     }
 }
예제 #4
0
        //http://msdn.microsoft.com/en-us/library/86wf6409%28v=vs.71%29.aspx
        private void OnStreamRead(IAsyncResult result)
        {
            RequestState state = result.AsyncState as RequestState;
            // Retrieve the ResponseStream that was set in RespCallback.
            Stream responseStream = state.ResponseStream;

            // Read rs.BufferRead to verify that it contains data.
            int read = responseStream.EndRead(result);
            if (read > 0)
            {
                // Prepare a Char array buffer for converting to Unicode.
                Char[] charBuffer = new Char[RequestState.BUFFER_SIZE * 2];

                // Convert byte stream to Char array and then to String.
                // len contains the number of characters converted to Unicode.
                int len =
                   state.StreamDecode.GetChars(state.BufferRead, 0, read, charBuffer, 0);

                String str = new String(charBuffer, 0, len);

                // Append the recently read data to the RequestData stringbuilder
                // object contained in RequestState.
                state.RequestData.Append(str);

                // Continue reading data until
                // responseStream.EndRead returns –1.
                IAsyncResult ar = responseStream.BeginRead(
                   state.BufferRead, 0, RequestState.BUFFER_SIZE,
                   new AsyncCallback(OnStreamRead), state);
            }
            else
            {
                if (state.RequestData.Length > 0)
                {
                    DownloadedPage = new Page(state.RequestData.ToString(), Uri);
                }
                Completed = true;
                // Close down the response stream.
                responseStream.Close();
            }
        }