public IEnumerable <Image> GetImages(CrawlerInput input) { var imagesUrls = _parser.GetImagesUrls(input.Url); var imagesPaths = _downloader.GetImagesPaths(imagesUrls, input.ImagesFolderPath); return(imagesPaths.Select(i => new Image(i))); }
// constructor public CrawlerEngine(CrawlerSettings cSetting, CrawlerInput cInput, CrawlerOutput cOutput) { this.crawlerSettings_ = cSetting; this.crawlerInput_ = cInput; this.crawlerOutput_ = cOutput; queueUrls_ = new Queue(); this.threadsRunning_ = new Thread[cSetting.maxThreadCount_]; }
static void Main(string[] args) { string strMIMETypes = @"text/richtext[0,0];text/html[0,0];audio/x-aiff[0,0];"; strMIMETypes += @"audio/basic[0,0];audio/wav[0,0];image/gif[0,0];image/jpeg[0,0];"; strMIMETypes += @"image/pjpeg[0,0];image/tiff[0,0];image/x-png[0,0];image/x-xbitmap[0,0];"; strMIMETypes += @"image/bmp[0,0];image/x-jg[0,0];image/x-emf[0,0];image/x-wmf[0,0];"; strMIMETypes += @"video/avi[0,0];video/mpeg[0,0];application/postscript[0,0];application/base64[0,0];"; strMIMETypes += @"application/macbinhex40[0,0];application/pdf[0,0];application/x-compressed[0,0];"; strMIMETypes += @"application/x-zip-compressed[0,0];application/x-gzip-compressed[0,0];"; strMIMETypes += @"application/java[0,0];application/x-msdownload[0,0];"; CrawlerSettings settings = new CrawlerSettings(); settings.allowAllMIMETypes_ = false; settings.downloadfolder_ = "downloadfolder1"; settings.excludeFiles_ = new string[] { ".gif", ".jpg", ".css", ".zip", ".exe" }; settings.excludeHosts_ = new string[] { "" }; settings.excludeWords_ = new string[] { "" }; settings.keepAlive_ = false; settings.keepSameServer_ = false; settings.lastRequestCount_ = 0; settings.allowedMIMETypes_ = strMIMETypes; settings.requestTimeout_ = 10; settings.sleepConnectTime_ = 0; settings.sleepFetchTime_ = 0; settings.threadsCount_ = 1; settings.maxThreadCount_ = 20; settings.maxDepth_ = 1; settings.filePath_ = "CrawlerConsoleSettings.txt"; settings.lastModified_ = DateTime.Now; settings.version_ = 1; settings.dataTypeName_ = "CrawlerConsoleSettings"; settings.WriteToFile(); settings.ReadFromFile("CrawlerConsoleSettings.txt"); CrawlerInput input = new CrawlerInput(); input.domain_ = "baidu.com"; input.fullUrl_ = "www.baidu.com"; CrawlerOutput output = new CrawlerOutput(); CrawlerEngine engine = new CrawlerEngine(settings, input, output); engine.RunCrawling(); }