Esempio n. 1
0
        public bool CrawlSite()
        {
            if (!NoDefaultPages && UpdateSearchTemplate && _config.Searching != null && !String.IsNullOrEmpty(_config.Searching.TemplateUri))
            {
                AddUri(new Uri(_baseUri, _config.Searching.TemplateUri));
            }

            _excluded.ReadRobotsFile(_baseUri, "HttpClone");
            _excluded.AddRange(_config.ExcludedPaths.SafeEnumeration());
            if (!NoDefaultPages)
            {
                AddUrls(_baseUri, _config.IncludedPaths.SafeEnumeration());
            }

            using (WorkQueue queue = new WorkQueue(System.Diagnostics.Debugger.IsAttached ? 1 : 10))
            {
                queue.OnError += (o, e) => Console.Error.WriteLine(e.GetException().Message);

                TaskCounter httpCalls = new TaskCounter(queue.Enqueue);
                TaskCounter parsing   = new TaskCounter(queue.Enqueue);

                while (true)
                {
                    if (httpCalls.Count >= 5)
                    {
                        httpCalls.WaitOne();
                    }
                    else
                    {
                        bool complete = httpCalls.Count == 0 && parsing.Count == 0;

                        string path;
                        if (_queue.TryDequeue(out path))
                        {
                            string[] etag = new string[1];
                            if (ShouldFetch(path, etag))
                            {
                                httpCalls.Run(new FetchUrl(this, path, etag[0], parsing.Run).DoWork);
                            }
                        }
                        else
                        {
                            if (complete)
                            {
                                break;
                            }

                            parsing.WaitOne();
                        }
                    }
                }

                queue.Complete(true, 1000);
            }

            //Post-crawling step(s)
            if (UpdateSearchTemplate && _config.Searching != null && !String.IsNullOrEmpty(_config.Searching.TemplateUri))
            {
                new SearchTemplateBuilder(_data, _baseUri)
                .UpdateTemplate();
            }

            return(Modified);
        }
Esempio n. 2
0
 public Decrement(TaskCounter counter, Action task)
 {
     _counter = counter;
     _task    = task;
 }
Esempio n. 3
0
 public Decrement(TaskCounter counter, Action task)
 {
     _counter = counter;
     _task = task;
 }
Esempio n. 4
0
        public bool CrawlSite()
        {
            if (!NoDefaultPages && UpdateSearchTemplate && _config.Searching != null && !String.IsNullOrEmpty(_config.Searching.TemplateUri))
                AddUri(new Uri(_baseUri, _config.Searching.TemplateUri));

            _excluded.ReadRobotsFile(_baseUri, "HttpClone");
            _excluded.AddRange(_config.ExcludedPaths.SafeEnumeration());
            if (!NoDefaultPages)
                AddUrls(_baseUri, _config.IncludedPaths.SafeEnumeration());

            using (WorkQueue queue = new WorkQueue(System.Diagnostics.Debugger.IsAttached ? 1 : 10))
            {
                queue.OnError += (o, e) => Console.Error.WriteLine(e.GetException().Message);

                TaskCounter httpCalls = new TaskCounter(queue.Enqueue);
                TaskCounter parsing = new TaskCounter(queue.Enqueue);

                while (true)
                {
                    if (httpCalls.Count >= 5)
                    {
                        httpCalls.WaitOne();
                    }
                    else
                    {
                        bool complete = httpCalls.Count == 0 && parsing.Count == 0;

                        string path;
                        if (_queue.TryDequeue(out path))
                        {
                            string[] etag = new string[1];
                            if (ShouldFetch(path, etag))
                                httpCalls.Run(new FetchUrl(this, path, etag[0], parsing.Run).DoWork);
                        }
                        else
                        {
                            if (complete)
                                break;

                            parsing.WaitOne();
                        }
                    }
                }

                queue.Complete(true, 1000);
            }

            //Post-crawling step(s)
            if (UpdateSearchTemplate && _config.Searching != null && !String.IsNullOrEmpty(_config.Searching.TemplateUri))
            {
                new SearchTemplateBuilder(_data, _baseUri)
                    .UpdateTemplate();
            }

            return Modified;
        }