示例#1
0
        public void TestRobotsCache()
        {
            CollectorManager cm = new CollectorManager();

            RobotsCache cache = new RobotsCache(cm, "http://www.yahoo.com");
            Assert.IsTrue(cache.RobotsExclusion.Count > 0, "Robots not loaded");
        }
示例#2
0
        public RobotsCache(CollectorManager cm, string domainSchemeAndServer)
        {
            this.Redis = cm.PRCM.GetClient();
            this.DomainSchemeAndServer = new Uri(domainSchemeAndServer).AbsoluteUri;
            this.RobotID = "urn:domain:robots:data";
            this.RobotLastDateCrawlID = "urn:domain:robots:last-date-crawl";

            if (!TryRetrieveFromCache())
            {
                RetrieveRobots();
                this.Redis.SetEntryInHash(this.RobotID, this.DomainSchemeAndServer,
                    this.RobotsExclusion.JsonSerialize());
                this.Redis.SetEntryInHash(this.RobotLastDateCrawlID,this.DomainSchemeAndServer,
                    DateTime.Now.ToString());
            }
        }
示例#3
0
 public static Collector CrawlAndSave(
     CollectorManager cm,
     string link, string backlink, string text, string rel, string kind,
     bool crawlChildLinks, bool poolChildLinksFound)
 {
     link = LinkParser.Validate(link, backlink);
     if (string.IsNullOrEmpty(link))
     {
         // Do nothing, unparsable url
         return null;
     }
     else
     {
         Collector collector = new Collector(
             cm, link, backlink, text, rel, kind, crawlChildLinks, poolChildLinksFound);
         return collector;
     }
 }
示例#4
0
        Collector(
            CollectorManager cm,
            string link, string backlink, string text, string rel, string kind,
            bool crawlChildLinks, bool poolChildLinksFound)
        {
            this.CM = cm;
            this.Redis = cm.PRCM.GetClient();
            this.Link = link;
            this.CurrentBacklink = LinkParser.Validate(backlink, string.Empty);
            this.LinkInfo = new LinkInfo(cm, this.Link, this.CurrentBacklink, text, rel, kind);
            if (!this.LinkInfo.LinkExcludedInRobots)
            {
                SaveLink();

                if (crawlChildLinks)
                    CrawlChildLinks();
            }
        }
示例#5
0
文件: Form1.cs 项目: fcbaconguis/c
        public Form1()
        {
            InitializeComponent();

            this.CM = new CollectorManager();
        }
示例#6
0
 public LinkRegistryTest()
 {
     this.CM = new CollectorManager();
 }
示例#7
0
文件: LinkInfo.cs 项目: quartz12345/c
 public LinkInfo(CollectorManager cm, string link, string backlink, string text, string rel, string kind)
     : this(cm)
 {
     RetrieveInfo(link, backlink, text, rel, kind);
 }
示例#8
0
文件: LinkInfo.cs 项目: quartz12345/c
 public LinkInfo(CollectorManager cm, string link)
     : this(cm, link, string.Empty, string.Empty, string.Empty, string.Empty)
 {
 }
示例#9
0
文件: LinkInfo.cs 项目: quartz12345/c
 public LinkInfo(CollectorManager cm)
 {
     this.Backlinks = new List<string>();
     this.CM = cm;
 }
示例#10
0
 public void TestCrawlManagerStart()
 {
     CollectorManager mgr = new CollectorManager();
     mgr.Start("127.0.0.1");
 }