public bool CrawlSite() { if (!NoDefaultPages && UpdateSearchTemplate && _config.Searching != null && !String.IsNullOrEmpty(_config.Searching.TemplateUri)) { AddUri(new Uri(_baseUri, _config.Searching.TemplateUri)); } _excluded.ReadRobotsFile(_baseUri, "HttpClone"); _excluded.AddRange(_config.ExcludedPaths.SafeEnumeration()); if (!NoDefaultPages) { AddUrls(_baseUri, _config.IncludedPaths.SafeEnumeration()); } using (WorkQueue queue = new WorkQueue(System.Diagnostics.Debugger.IsAttached ? 1 : 10)) { queue.OnError += (o, e) => Console.Error.WriteLine(e.GetException().Message); TaskCounter httpCalls = new TaskCounter(queue.Enqueue); TaskCounter parsing = new TaskCounter(queue.Enqueue); while (true) { if (httpCalls.Count >= 5) { httpCalls.WaitOne(); } else { bool complete = httpCalls.Count == 0 && parsing.Count == 0; string path; if (_queue.TryDequeue(out path)) { string[] etag = new string[1]; if (ShouldFetch(path, etag)) { httpCalls.Run(new FetchUrl(this, path, etag[0], parsing.Run).DoWork); } } else { if (complete) { break; } parsing.WaitOne(); } } } queue.Complete(true, 1000); } //Post-crawling step(s) if (UpdateSearchTemplate && _config.Searching != null && !String.IsNullOrEmpty(_config.Searching.TemplateUri)) { new SearchTemplateBuilder(_data, _baseUri) .UpdateTemplate(); } return(Modified); }
public Decrement(TaskCounter counter, Action task) { _counter = counter; _task = task; }
public bool CrawlSite() { if (!NoDefaultPages && UpdateSearchTemplate && _config.Searching != null && !String.IsNullOrEmpty(_config.Searching.TemplateUri)) AddUri(new Uri(_baseUri, _config.Searching.TemplateUri)); _excluded.ReadRobotsFile(_baseUri, "HttpClone"); _excluded.AddRange(_config.ExcludedPaths.SafeEnumeration()); if (!NoDefaultPages) AddUrls(_baseUri, _config.IncludedPaths.SafeEnumeration()); using (WorkQueue queue = new WorkQueue(System.Diagnostics.Debugger.IsAttached ? 1 : 10)) { queue.OnError += (o, e) => Console.Error.WriteLine(e.GetException().Message); TaskCounter httpCalls = new TaskCounter(queue.Enqueue); TaskCounter parsing = new TaskCounter(queue.Enqueue); while (true) { if (httpCalls.Count >= 5) { httpCalls.WaitOne(); } else { bool complete = httpCalls.Count == 0 && parsing.Count == 0; string path; if (_queue.TryDequeue(out path)) { string[] etag = new string[1]; if (ShouldFetch(path, etag)) httpCalls.Run(new FetchUrl(this, path, etag[0], parsing.Run).DoWork); } else { if (complete) break; parsing.WaitOne(); } } } queue.Complete(true, 1000); } //Post-crawling step(s) if (UpdateSearchTemplate && _config.Searching != null && !String.IsNullOrEmpty(_config.Searching.TemplateUri)) { new SearchTemplateBuilder(_data, _baseUri) .UpdateTemplate(); } return Modified; }