Exemple #1
0
        public void Start()
        {
            // ignore ssl errors
            ServicePointManager.ServerCertificateValidationCallback = (obj, certificate, chain, errors) => (true);

            // start
            var starter = UrlObject.FromString(Frontier);

            if (!Unvisited.Any())
            {
                Unvisited.Add(starter.GetFullPath(false), starter);
            }

            // while still pages unprocessed
            while (Unvisited.Any() && Visited.Count < MaxAllowedPages)
            {
                Parallel.ForEach(Unvisited, (urlPair) =>
                {
                    try
                    {
                        try
                        {
                            var p = PageFromUrl(urlPair.Value);
                            ProcessNewPaths(p, urlPair.Value);
                        }
                        catch (ArgumentOutOfRangeException) { }

                        var unprocessed = Visited.Where(x => x.Value.Processed == false);
                        foreach (var page in unprocessed)
                        {
                            if (this.JobType == SpiderJobType.PAGE_ONLY)
                            {
                                page.Value.LinkTags = new List <LinkTag>();
                            }
                            PersistenceInserter.PersistData(page.Value);
                            page.Value.Processed = true;
                        }
                    }
                    catch (ArgumentException) { }
                    catch (Exception e)
                    {
                        Console.WriteLine(e);
                    }
                });
            }
        }