Пример #1
0
 public static CrawlerConfiguration DetectLanguage(this CrawlerConfiguration crawlerConfiguration,
                                                   int?maxDegreeOfParallelism = null)
 {
     crawlerConfiguration.AddPipelineStep(
         new GoogleLanguageDetection(maxDegreeOfParallelism.GetValueOrDefault(Environment.ProcessorCount)));
     return(crawlerConfiguration);
 }
Пример #2
0
 public static CrawlerConfiguration FlurlDownload(this CrawlerConfiguration crawlerConfiguration,
                                                  int?maxDegreeOfParallelism = null)
 {
     crawlerConfiguration.AddPipelineStep(
         new FlurlDownloadPipelineStep(maxDegreeOfParallelism.GetValueOrDefault(Environment.ProcessorCount)));
     return(crawlerConfiguration);
 }
        public static CrawlerConfiguration PdfTextExtractProcessor(this CrawlerConfiguration crawlerConfiguration,
                                                                   int?maxDegreeOfParallelism = null)
        {
            PdfBoxTextExtractorProcessorPipelineStep filterTextExtractorProcessor =
                new PdfBoxTextExtractorProcessorPipelineStep(maxDegreeOfParallelism.GetValueOrDefault(Environment.ProcessorCount));

            crawlerConfiguration.AddPipelineStep(filterTextExtractorProcessor);
            return(crawlerConfiguration);
        }
Пример #4
0
 public static CrawlerConfiguration Robots(this CrawlerConfiguration crawlerConfiguration, string searchPath = null)
 {
     crawlerConfiguration.AddPipelineStep(new RobotsPipelineStep(searchPath, crawlerConfiguration.Logger));
     return(crawlerConfiguration);
 }