public IEnumerable<TestMethod> Select(CrawlResult result, IEnumerable<TestDefinition> tests) { foreach (var test in tests.Where(t => t.ParsedObjectType == null)) yield return new TestMethod(test, result); if (result.ContentType != null) foreach (var test in tests.Where(t => t.ParsedObjectType == result.ContentType)) yield return new TestMethod(test, result, result.Content); }
public IEnumerable<TestMethod> Select(CrawlResult result, IEnumerable<TestDefinition> tests) { if (result.ContentType == typeof(CQ)) { var document = (CQ)result.Content; foreach (var widget in document[WidgetSelector]) foreach (var test in tests.Cast<WidgetTestDefinition>()) foreach (var selector in test.CssSelector) { if (string.IsNullOrEmpty(selector)) { yield return new TestMethod(test, result, widget); continue; } if (IsMatch(widget, selector)) { yield return new TestMethod(test, result, widget); } } } }
public IObservable<CrawlResult> Crawl() { return Observable.Create( async (IObserver<CrawlResult> observer) => { var httpClient = new HttpClient(); var tasks = new List<Task<CrawlResult>>(); var requests = new List<CrawlRequest>(); while (Uris.Count > 0) { var crawlRequest = new CrawlRequest(Uris.Dequeue()); if (requests.Any(r => r.Uri.Equals(crawlRequest.Uri))) // TODO: Add in filter checks for Depth, Domain white list, visited list continue; requests.Add(crawlRequest); var task = httpClient.GetAsync(crawlRequest.Uri, Token).ContinueWith( (responseTask, state) => { if (responseTask.IsFaulted) { observer.OnError(responseTask.Exception); // TODO: observer.OnNext(new CrawlResult(exception))?? } var response = responseTask.Result; var stopwatch = (Stopwatch)state; var crawlResult = new CrawlResult(response.RequestMessage, response, stopwatch.Elapsed); if (response.IsSuccessStatusCode) { var parsedContent = ParseContent(response.Content); crawlResult.SetContent(response.Content, parsedContent); // TODO: Find additional links in parsedContent and add to Uris } return crawlResult; }, Stopwatch.StartNew(), Token); tasks.Add(task); observer.OnNext(await task); } Task.WhenAll(tasks).ContinueWith(_ => observer.OnCompleted()); return Disposable.Empty; }); }
public abstract bool CompatibleWith(CrawlResult result);
public override bool CompatibleWith(CrawlResult result) { return result.ContentType == typeof(CQ); }
public override bool CompatibleWith(CrawlResult result) { return Regex.Any(regex => regex.IsMatch(result.Uri.AbsoluteUri)); }
public void WidgetIdTest(CrawlResult result, IDomElement widget) { PAssert.IsTrue(() => "nav".Equals(widget.GetAttribute("id"))); }
public void TwitterTest(CrawlResult result, JObject content) { PAssert.IsTrue(() => true); }
public void HtmlTest(CrawlResult result, CQ content) { PAssert.IsTrue(() => true); }
public void AllPagesTest(CrawlResult result) { PAssert.IsTrue(() => true); }