public WebSpider(Uri startUri, WebSpiderOptions options = null) { StartUri = startUri; spiderOptions = options ?? new WebSpiderOptions(); // In future this could be null and will process cross-site, but for now must exist spiderOptions.BaseUri = spiderOptions.BaseUri ?? new Uri(StartUri.GetLeftPart(UriPartial.Authority)); webPagesPending = new Queue(); webPages = new Hashtable(); spiderOptions.WebPageProcessor.ContentHandler += HandleLinks; }
public override int Run() { Uri uri; url = url.StartsWith("http://", StringComparison.OrdinalIgnoreCase) || url.StartsWith("https://", StringComparison.OrdinalIgnoreCase) ? url : string.Format(CultureInfo.InvariantCulture, "http://{0}", url); if (!Uri.TryCreate(url, UriKind.Absolute, out uri)) { Logger.Error(CultureInfo.CurrentUICulture, Resources.ParseUrlRunCantCreateUriError); return -1; } int count = -1; if (!string.IsNullOrEmpty(number) && (!Int32.TryParse(number, out count) || count < 1)) { Logger.Error(Resources.ParseUrlRunNotIntegerError, number); return -1; } if (!string.IsNullOrEmpty(outputFileFormat) && FileFormat == Report.OutputFileFormat.None) { Logger.Error(Resources.ParseUrlRunUnsupportedFormatError, outputFileFormat); return -1; } if(string.IsNullOrEmpty(username) != string.IsNullOrEmpty(password)) { Logger.Error("Username and Password should be both set."); return -1; } WebSpiderOptions options = new WebSpiderOptions { UriProcessedCountMax = count, ShowSuccessUrls = !errorsOnly, Username = username, Password = password, Domain = domain }; Report report = new WebSpider(uri, options).Execute(); if (FileFormat != Report.OutputFileFormat.None) report.SaveReport(FileFormat, htmlTemplate); return 0; }
public WebPageProcessor(WebSpiderOptions options = null) { Options = options; }