// Private methods. /// <summary> /// Crawls the feed at the specified parameters. /// </summary> /// <param name="asyncResult">The asynchronous result.</param> /// <param name="feedId">The feed.</param> /// <param name="timeId">The time.</param> /// <param name="category">The category.</param> /// <param name="regionId">The region.</param> /// <param name="obj">The standard feed object.</param> /// <returns>The crawl result.</returns> private CrawlResult CrawlFeed( SpiderAsyncResult asyncResult, YouTubeStandardFeed feedId, YouTubeTimeId timeId, string category, string regionId, ref DbObjectStandardFeed obj) { // If the asynchronousn operation has been canceled, do nothing. if (asyncResult.IsCanceled) return CrawlResult.Canceled; // Compute the feed key. string key = this.EncodeFeedKey(feedId, timeId, category, regionId); // Compute the feed URI starting at index 1 and ask for 1 result. Uri uri = YouTubeUri.GetStandardFeed(feedId, regionId, category, timeId, 1, 1); // Create a new video request. YouTubeRequestFeed<Video> request = new YouTubeRequestFeed<Video>(this.crawler.YouTube.Settings); // Set the feed URL. obj.Url = uri.AbsoluteUri; try { // Begin an asynchronous request for the standard feed. AsyncWebResult result = request.Begin(uri, (AsyncWebResult webResult) => { }) as AsyncWebResult; // Add the result of the web operation to the collection of web requests. AsyncWebOperation operation = asyncResult.AddAsyncWeb(request, result); // Wait for the asynchronous operation to complete. result.AsyncWaitHandle.WaitOne(); // Remove the result of the web operation from the collection of web requests. asyncResult.RemoveAsyncWeb(operation); // Complete the request and get the video feed. Feed<Video> feed = request.End(result); // If the operation completed successfully, set the browsable to true. obj.Browsable = true; // Set the response HTTP code. obj.HttpCode = (int)(result as AsyncWebResult).Response.StatusCode; // Return the result. return (feed.FailuresAtom.Count == 0) && (feed.FailuresEntry.Count == 0) ? CrawlResult.Success : CrawlResult.Warning; } catch (WebException exception) { if (exception.Status == WebExceptionStatus.RequestCanceled) { return CrawlResult.Canceled; } else { // If the operation failed with a web exception, set the browsable to false. obj.Browsable = false; // Set the response HTTP code. obj.HttpCode = (int)(exception.Response as HttpWebResponse).StatusCode; // Return the result. return CrawlResult.Fail; } } catch (Exception) { // If the operation failed with a web exception, set the browsable to false. obj.Browsable = false; // Set the response HTTP code to null. obj.HttpCode = null; // Return the result. return CrawlResult.Fail; } }
// Public methods. /// <summary> /// Begins an asynchronous spider crawling using the specified user state. /// </summary> /// <param name="callback"></param> /// <param name="userState">The user state.</param> /// <returns>The result of the asynchronous spider operation.</returns> public IAsyncResult BeginCrawl(SpiderCallback callback, object userState = null) { // Update the spider state. base.OnStarted(); try { // Compute the standard feeds to crawl. Dictionary<string, DbObjectStandardFeed> feeds = new Dictionary<string, DbObjectStandardFeed>(); // For all standard feeds. foreach (YouTubeStandardFeed feed in InetApi.YouTube.Api.V2.YouTube.StandardFeeds) { // If the feed is not selected, continue. if (!this.GetFeedSelected(feed)) continue; // Get the valid times for this feed. YouTubeTimeId[] times = YouTubeUri.GetValidTime(feed); // For all times corresponding to this feed. foreach (YouTubeTimeId time in times) { // Create a new standard feed object. DbObjectStandardFeed obj = new DbObjectStandardFeed(); obj.Id = this.EncodeFeedKey(feed, time, null, null); obj.FeedId = (int)feed; obj.TimeId = (int)time; obj.Category = null; obj.Region = null; feeds.Add(obj.Id, obj); // For all assignable and non-deprecated categories. foreach (YouTubeCategory category in this.crawler.YouTube.Categories) { // If the category supports browsable regions. if (category.Browsable != null) { // Create a new standard feed object. obj = new DbObjectStandardFeed(); obj.Id = this.EncodeFeedKey(feed, time, category.Label, null); obj.FeedId = (int)feed; obj.TimeId = (int)time; obj.Category = category.Term; obj.Region = null; feeds.Add(obj.Id, obj); // For all browsable regions. foreach (string region in category.Browsable) { // Create a new standard feed object. obj = new DbObjectStandardFeed(); obj.Id = this.EncodeFeedKey(feed, time, category.Label, region); obj.FeedId = (int)feed; obj.TimeId = (int)time; obj.Category = category.Label; obj.Region = region; feeds.Add(obj.Id, obj); } } } } } // Raise the crawl feeds started event. if (this.FeedsCrawlStarted != null) this.FeedsCrawlStarted(this, new SpiderInfoEventArgs<CrawlInfo>(this, new CrawlInfo(feeds))); // Create a new spider asynchronous result. SpiderAsyncResult asyncResult = new SpiderAsyncResult(userState); // Set the crawl result counters. int counterSuccess = 0; int counterWarning = 0; int counterFailed = 0; int counterPending = feeds.Count; // Execute the crawl on the thread pool. ThreadPool.QueueUserWorkItem((object state) => { // Set the feed index. int index = 0; // For each feed in the feeds collection. foreach(KeyValuePair<string, DbObjectStandardFeed> feed in feeds) { // Check if the crawl has been canceled. if (asyncResult.IsCanceled) break; // Increment the feed index. index++; // Get the object. DbObjectStandardFeed obj = feed.Value; // Call the feed started event handler. if (this.FeedCrawlStarted != null) this.FeedCrawlStarted(this, new SpiderInfoEventArgs<FeedStartedInfo>(this, new FeedStartedInfo(obj, index, feeds.Count))); // Crawl the feed. CrawlResult result = this.CrawlFeed( asyncResult, (YouTubeStandardFeed)obj.FeedId, (YouTubeTimeId)obj.TimeId, obj.Category, obj.Region, ref obj); // Call the feed finished event handler. if (this.FeedCrawlFinished != null) this.FeedCrawlFinished(this, new SpiderInfoEventArgs<FeedFinishedInfo>(this, new FeedFinishedInfo(obj, index, feeds.Count, result))); } // Set the result. asyncResult.Result = feeds; // Raise the crawl feeds finished event. if (this.FeedsCrawlFinished != null) this.FeedsCrawlFinished(this, new SpiderInfoEventArgs<CrawlInfo>(this, new CrawlInfo(feeds))); // Update the spider state. base.OnFinished(); }); // Returns the spider object as the asynchronous state. return asyncResult; } catch (Exception) { // If an exception occurs, update the spider state. base.OnFinished(); // Rethrow the exception. throw; } }