Exemple #1
0
 /// <summary>
 ///     Initializes a new instance of the <see cref = "Discovery" /> class.
 /// </summary>
 /// <param name = "discoveriesRow">The discoveries row.</param>
 internal Discovery(ArachnodeDataSet.DiscoveriesRow discoveriesRow)
 {
     CacheKey       = new Uri(discoveriesRow.AbsoluteUri.Replace("://www.", "://"));
     DiscoveryState = (DiscoveryState)discoveriesRow.DiscoveryStateID;
     DiscoveryType  = (DiscoveryType)discoveriesRow.DiscoveryTypeID;
     //ANODET: Since this number isn't stored in CrawlRequests, this count is reset.
     //HACK:!!!
     HttpWebRequestRetriesRemaining = 5;
     if (!discoveriesRow.IsIDNull())
     {
         ID = discoveriesRow.ID;
     }
     IsStorable              = true;
     InsertDiscovery         = true;
     NumberOfTimesDiscovered = discoveriesRow.NumberOfTimesDiscovered;
     Uri = new Uri(discoveriesRow.AbsoluteUri.TrimEnd('/').TrimEnd('#'));
     //WasUsingDesriedMaximumMemoryInMegabytes = MemoryManager.IsUsingDesiredMaximumMemoryInMegabytes();
 }
        public new ArachnodeDataSet.DiscoveriesRow GetDiscovery(string absoluteUri)
        {
            try
            {
                BsonDocument bsonDocument = new BsonDocument(false);

                bsonDocument.Add("absoluteUri", BsonValue.Create(absoluteUri));

                /**/

                IMongoQuery mongoQuery = Query.EQ("absoluteUri", absoluteUri);

                BsonDocument mongoResponse = _mongoDatabase.GetCollection("Discoveries").FindOne(mongoQuery);

                if (mongoResponse != null)
                {
                    ArachnodeDataSet.DiscoveriesRow discoveriesRow = _discoveriesDataTable.NewDiscoveriesRow();

                    if (mongoResponse["id"].AsNullableInt64 != BsonNull.Value)
                    {
                        discoveriesRow.ID = mongoResponse["id"].AsInt64;
                    }
                    discoveriesRow.AbsoluteUri             = mongoResponse["absoluteUri"].AsString;
                    discoveriesRow.DiscoveryStateID        = (byte)mongoResponse["discoveryStateID"].AsInt32;
                    discoveriesRow.DiscoveryTypeID         = (byte)mongoResponse["discoveryTypeID"].AsInt32;
                    discoveriesRow.ExpectFileOrImage       = mongoResponse["expectFileOrImage"].AsBoolean;
                    discoveriesRow.NumberOfTimesDiscovered = mongoResponse["numberOfTimesDiscovered"].AsInt32;
                }
            }
            catch (Exception exception)
            {
                InsertException(absoluteUri, null, exception, false);
            }

            return(null);
        }
Exemple #3
0
        private Discovery <TArachnodeDAO> GetDiscovery(string absoluteUri, string cacheKey, IArachnodeDAO arachnodeDAO)
        {
            Discovery <TArachnodeDAO> discovery = null;

            try
            {
                object o = HttpRuntime.Cache.Get(cacheKey);

                if (o != null && o is Discovery <TArachnodeDAO> )
                {
                    discovery = (Discovery <TArachnodeDAO>)o;

                    discovery.NumberOfTimesDiscovered++;
                }
                else
                {
                    //check the database...
                    ArachnodeDataSet.DiscoveriesRow discoveriesRow = arachnodeDAO.GetDiscovery(cacheKey);

                    if (discoveriesRow != null)
                    {
                        discovery = new Discovery <TArachnodeDAO>(discoveriesRow);
                        discovery.HttpWebRequestRetriesRemaining = _applicationSettings.HttpWebRequestRetries;

                        discovery.DiscoveryState    = (DiscoveryState)discoveriesRow.DiscoveryStateID;
                        discovery.DiscoveryType     = (DiscoveryType)discoveriesRow.DiscoveryTypeID;
                        discovery.ExpectFileOrImage = discoveriesRow.ExpectFileOrImage;
                        if (!discoveriesRow.IsIDNull())
                        {
                            discovery.ID = discoveriesRow.ID;
                        }
                        discovery.NumberOfTimesDiscovered = discoveriesRow.NumberOfTimesDiscovered;
                        discovery.Uri = new Uri(absoluteUri);

                        AddDiscoveryToInternalCache(cacheKey, discovery);
                    }
                    else
                    {
                        //check the CrawlerPeers...
                        if (_crawlerPeerManager != null)
                        {
                            if (_crawlerPeerManager.GetDiscovery(absoluteUri, cacheKey, arachnodeDAO))
                            {
                                discovery = new Discovery <TArachnodeDAO>(absoluteUri);

                                discovery.DiscoveryState = DiscoveryState.Discovered;
                                discovery.IsNew          = false;
                            }
                        }

                        if (discovery == null)
                        {
                            discovery = new Discovery <TArachnodeDAO>(absoluteUri);

                            discovery.IsNew = true;

                            AddDiscoveryToInternalCache(cacheKey, discovery);
                        }

                        if (_applicationSettings.InsertDisallowedDiscoveries || !_ruleManager.IsDisallowed(discovery, CrawlRuleType.PreRequest, arachnodeDAO))
                        {
                            if (_applicationSettings.InsertDiscoveries && discovery.InsertDiscovery)
                            {
                                arachnodeDAO.InsertDiscovery(discovery.ID, cacheKey, (int)discovery.DiscoveryState, (int)discovery.DiscoveryType, discovery.ExpectFileOrImage, ++discovery.NumberOfTimesDiscovered);
                            }
                        }
                    }
                }
            }
            catch (Exception exception)
            {
                arachnodeDAO.InsertException(absoluteUri, absoluteUri, exception, false);

                discovery = new Discovery <TArachnodeDAO>("http://aninvalidabsoluteuriwasrequestedasadiscovery.com");

                discovery.IsNew = true;
            }

            return(discovery);
        }