public static CrawlerConfiguration DetectLanguage(this CrawlerConfiguration crawlerConfiguration, int?maxDegreeOfParallelism = null) { crawlerConfiguration.AddPipelineStep( new GoogleLanguageDetection(maxDegreeOfParallelism.GetValueOrDefault(Environment.ProcessorCount))); return(crawlerConfiguration); }
public static CrawlerConfiguration FlurlDownload(this CrawlerConfiguration crawlerConfiguration, int?maxDegreeOfParallelism = null) { crawlerConfiguration.AddPipelineStep( new FlurlDownloadPipelineStep(maxDegreeOfParallelism.GetValueOrDefault(Environment.ProcessorCount))); return(crawlerConfiguration); }
public static CrawlerConfiguration PdfTextExtractProcessor(this CrawlerConfiguration crawlerConfiguration, int?maxDegreeOfParallelism = null) { PdfBoxTextExtractorProcessorPipelineStep filterTextExtractorProcessor = new PdfBoxTextExtractorProcessorPipelineStep(maxDegreeOfParallelism.GetValueOrDefault(Environment.ProcessorCount)); crawlerConfiguration.AddPipelineStep(filterTextExtractorProcessor); return(crawlerConfiguration); }
public static CrawlerConfiguration Robots(this CrawlerConfiguration crawlerConfiguration, string searchPath = null) { crawlerConfiguration.AddPipelineStep(new RobotsPipelineStep(searchPath, crawlerConfiguration.Logger)); return(crawlerConfiguration); }