/// <summary> /// starts the processing url /// </summary> /// <param name="initialUrls"></param> /// <returns></returns> public DistinctList <ParsedUrl> Run(IEnumerable <string> initialUrls) { var parsingManager = new ParsingManager(OnUrlProcessed); var processor = new ParallelProcessInvoker(parsingManager); var processedUrls = new DistinctList <ParsedUrl> { EqualityComparer = new ParsedUrlComparer() }; var urlsToProcess = initialUrls; var loopCounter = 1; while (loopCounter <= MaxProcessingDepth) { OnNewLoopStarted(loopCounter); var result = processor.Process(urlsToProcess, //(url, ct) => ProcessUrl((string) url, (CancellationToken)ct), CancellationToken, OnUrlProcessingErrorOccured); processedUrls.AddRange(result); //if cancellation has been requested, then stop processing if (CancellationToken.IsCancellationRequested) { break; } var itemsToTake = MaxUrlsToProcess - processedUrls.Count; urlsToProcess = processedUrls .SelectMany(url => url.FoundUrls) .GetValidOnSiteUrls("hiring.monster.com") //TODO - remove the hardcoded value with a dynamic processing based on url .Distinct() .Except(processedUrls.Select(u => u.Url)) .Take(itemsToTake); loopCounter++; } return(processedUrls); }
public ParallelProcessInvoker(ParsingManager parsingManager) { _parsingManager = parsingManager; }
/// <summary> /// starts the processing url /// </summary> /// <param name="initialUrls"></param> /// <returns></returns> public DistinctList<ParsedUrl> Run(IEnumerable<string> initialUrls) { var parsingManager = new ParsingManager(OnUrlProcessed); var processor = new ParallelProcessInvoker(parsingManager); var processedUrls = new DistinctList<ParsedUrl> {EqualityComparer = new ParsedUrlComparer()}; var urlsToProcess = initialUrls; var loopCounter = 1; while (loopCounter <= MaxProcessingDepth) { OnNewLoopStarted(loopCounter); var result = processor.Process(urlsToProcess, //(url, ct) => ProcessUrl((string) url, (CancellationToken)ct), CancellationToken, OnUrlProcessingErrorOccured); processedUrls.AddRange(result); //if cancellation has been requested, then stop processing if (CancellationToken.IsCancellationRequested) { break; } var itemsToTake = MaxUrlsToProcess - processedUrls.Count; urlsToProcess = processedUrls .SelectMany(url => url.FoundUrls) .GetValidOnSiteUrls("hiring.monster.com") //TODO - remove the hardcoded value with a dynamic processing based on url .Distinct() .Except(processedUrls.Select(u=>u.Url)) .Take(itemsToTake); loopCounter++; } return processedUrls; }
public ParallelProcessInvoker(ParsingManager parsingManager) { _parsingManager = parsingManager; }