Exemple #1
0
 public void CollectorStacker()
 {
     CollectorPool stack = new CollectorPool();
     Assert.IsTrue(stack.Next() == "google.com");
     Assert.IsTrue(stack.Next() == "facebook.com");
     stack.Store("jubacs.somee.com");
     Assert.IsTrue(stack.Next() == "jubacs.somee.com");
 }
Exemple #2
0
        private static void TestRun1(int WORK_AREA_TOP, int parallelCount, Repository repository, RobotService robots, CollectorPool pool, VisitedUrls history, TldParser tldParser)
        {
            CreateCollectorPool(pool, history, parallelCount).ForEach(
                c =>
                {
                    //ManualResetEvent done = new ManualResetEvent(false);
                    Dictionary<string, object> properties = new Dictionary<string, object>();
                    properties.Add("TldParser", tldParser);

                    Uri uri = c.Link.ToUri();
                    WriteXY(0, WORK_AREA_TOP + c.SeqNo, "{0} (fetching)", uri.ToUriShort());

                    WebDownloader web = new WebDownloader(uri, properties,
                        ea =>
                        {
                            WriteXY(0, WORK_AREA_TOP + c.SeqNo,
                                "{0} [{1}] ({2})",
                                uri.ToUriShort(), ea.Status,
                                (ea.Exception.IsNull() ?
                                    "responded: " + ea.Stream.Length + " bytes" :
                                    "exception: " + ea.Exception.Message));

                            if (ea.Stream.IsNull())
                            {
                                Thread.Sleep(5000);
                            }
                            else
                            {
                                HtmlProcessor processor = new HtmlProcessor(
                                    uri.ToString(), ea.Stream,
                                    ((TldParser)ea.Properties["TldParser"]));

                                WriteXY(0, WORK_AREA_TOP + c.SeqNo,
                                    "{0} (found={1})", uri.ToUriShort(), processor.Links.Count);

                                int pushedLinks = 0;
                                int linkCounter = 1;
                                processor.Links.ForEach(l =>
                                {
                                    WriteXY(0, WORK_AREA_TOP + c.SeqNo,
                                        "{0} (found={1}, prc={2} {3} ({4}))",
                                        uri.ToUriShort(),
                                        processor.Links.Count,
                                        (l.Domain.Length > 10 ? l.Domain.Substring(0, 10) : l.Domain),
                                        l.Tld,
                                        linkCounter);

                                    if (robots.IsAllowed(string.Empty, l.Href.ToUri()))
                                    {
                                        ++pushedLinks;
                                        pool.Store(l.Href);

                                        repository.SaveLink(uri.ToString(), l.Href, string.Empty, l);
                                        repository.SaveLink(l.Href, string.Empty, uri.ToString(), l);
                                        history.Add(uri.ToString());
                                    }

                                    ++linkCounter;
                                });

                                WriteXY(0, WORK_AREA_TOP + c.SeqNo,
                                    "{0} (found={1}, added={2} links) [DONE]",
                                    uri.ToUriShort(), processor.Links.Count, pushedLinks);

                                ea.Stream.Close();
                            }

                            //((ManualResetEvent)ea.Properties["State"]).Set();
                        });

                    web.Download();
                    //done.WaitOne();
                });
        }