/// <summary>
        /// starts the processing url
        /// </summary>
        /// <param name="initialUrls"></param>
        /// <returns></returns>
        public DistinctList <ParsedUrl> Run(IEnumerable <string> initialUrls)
        {
            var parsingManager = new ParsingManager(OnUrlProcessed);
            var processor      = new ParallelProcessInvoker(parsingManager);

            var processedUrls = new DistinctList <ParsedUrl> {
                EqualityComparer = new ParsedUrlComparer()
            };
            var urlsToProcess = initialUrls;

            var loopCounter = 1;

            while (loopCounter <= MaxProcessingDepth)
            {
                OnNewLoopStarted(loopCounter);

                var result = processor.Process(urlsToProcess,
                                               //(url, ct) => ProcessUrl((string) url, (CancellationToken)ct),
                                               CancellationToken,
                                               OnUrlProcessingErrorOccured);
                processedUrls.AddRange(result);

                //if cancellation has been requested, then stop processing
                if (CancellationToken.IsCancellationRequested)
                {
                    break;
                }

                var itemsToTake = MaxUrlsToProcess - processedUrls.Count;


                urlsToProcess = processedUrls
                                .SelectMany(url => url.FoundUrls)
                                .GetValidOnSiteUrls("hiring.monster.com") //TODO - remove the hardcoded value with a dynamic processing based on url
                                .Distinct()
                                .Except(processedUrls.Select(u => u.Url))
                                .Take(itemsToTake);

                loopCounter++;
            }

            return(processedUrls);
        }
 public ParallelProcessInvoker(ParsingManager parsingManager)
 {
     _parsingManager = parsingManager;
 }
        /// <summary>
        /// starts the processing url
        /// </summary>
        /// <param name="initialUrls"></param>
        /// <returns></returns>
        public DistinctList<ParsedUrl> Run(IEnumerable<string> initialUrls)
        {
            var parsingManager = new ParsingManager(OnUrlProcessed);
            var processor = new ParallelProcessInvoker(parsingManager);

            var processedUrls = new DistinctList<ParsedUrl> {EqualityComparer = new ParsedUrlComparer()};
            var urlsToProcess = initialUrls;

            var loopCounter = 1;
            while (loopCounter <= MaxProcessingDepth)
            {
                OnNewLoopStarted(loopCounter);

                var result = processor.Process(urlsToProcess,
                                               //(url, ct) => ProcessUrl((string) url, (CancellationToken)ct),
                                               CancellationToken,
                                               OnUrlProcessingErrorOccured);
                processedUrls.AddRange(result);

                //if cancellation has been requested, then stop processing
                if (CancellationToken.IsCancellationRequested)
                {
                    break;
                }

                var itemsToTake = MaxUrlsToProcess - processedUrls.Count;

                urlsToProcess = processedUrls
                    .SelectMany(url => url.FoundUrls)
                    .GetValidOnSiteUrls("hiring.monster.com") //TODO - remove the hardcoded value with a dynamic processing based on url
                    .Distinct()
                    .Except(processedUrls.Select(u=>u.Url))
                    .Take(itemsToTake);

                loopCounter++;
            }

            return processedUrls;
        }
Пример #4
0
 public ParallelProcessInvoker(ParsingManager parsingManager)
 {
     _parsingManager = parsingManager;
 }