Exemple #1
0
 public void UpdateEntry(HtmlRecord record)
 {
     record.timeStamp = DateTime.UtcNow;
     jobSchedule.UpdateSchedule(record.recordid);
     Database.Instance.htmlCollection.Save(typeof(HtmlRecord), record);
     processEvent.Set();
 }
Exemple #2
0
        public HtmlRecord RetrieveEntryById(ObjectId id)
        {
            IMongoQuery queryId = Query.EQ("_id", id);
            HtmlRecord  entity  = Database.Instance.htmlCollection.FindOne(queryId);

            return(entity);
        }
Exemple #3
0
        public IHtmlRecord CreateHtmlRecord(Uri domain)
        {
            IHtmlRecord record = Database.Instance.htmlCollection.FindOneAs <HtmlRecord>
                                     (Query.EQ("url", domain.AbsoluteUri)) as IHtmlRecord;

            if (record == null)
            {
                record = new HtmlRecord(domain);
            }

            return(record);
        }
        public void DistributeWork(HtmlRecord record)
        {
            while (crawlerNodes.Count == 0)
            {
                ;
            }
            var nodes = crawlerNodes.OrderBy(x => x.Value.messageQueue.Count)
                        .OrderBy(y => y.Value.messageQueue
                                 .Where(z => z.domain.Host == record.domain.Host)
                                 .Count());
            CrawlerNode node = nodes.ElementAt(0).Value;

            jobSet[record.recordid] = node;
            node.EnqueueHtmlRecord(record);
        }
Exemple #5
0
        public ServerResponse ReturnFinishedJob(byte[] zippedBytes)
        {
            ServerResponse response = ServerResponse.ServerError;

            try
            {
                byte[]     recordBytes = RestAPI.Unzip(zippedBytes);
                HtmlRecord record      = BSON.Deserialize <HtmlRecord>(recordBytes);
                CrawlerManager.Instance.RemoveJob(record.recordid);
                DataManager.Instance.UpdateEntry(record);
                response = ServerResponse.Success;
            }
            catch (Exception ex)
            {
                System.Diagnostics.Debug.Print(ex.ToString());
                response = ServerResponse.ServerError;
            }
            return(response);
        }
            public void Send()
            {
                RestAPI api = new RestAPI();

                while (!processDestroyed)
                {
                    HtmlRecord record = messageQueue.Take();
                    foreach (HtmlResults results in record.results.Values)
                    {
                        results.links = null;
                    }
                    byte[] recordString = record.ToBson <HtmlRecord>();
                    try
                    {
                        JObject obj             = api.EnqueueJob(recordString);
                        bool    messageReceived = obj.GetValue("Successful").Value <bool>();
                        if (messageReceived)
                        {
                            ServerResponse response = api.ParseResponse(obj);
                            if (response != ServerResponse.Success)
                            {
                                messageQueue.Add(record);
                            }
                            else
                            {
                                System.Diagnostics.Debug.Print("[" + DateTime.Now.ToString() + "] Sent: " +
                                                               record.domain.AbsoluteUri);
                            }
                        }
                        else
                        {
                            messageQueue.Add(record);
                        }
                    }
                    catch (WebException ex)
                    {
                        System.Diagnostics.Debug.Print(ex.ToString());
                        messageQueue.Add(record);
                    }
                }
            }
Exemple #7
0
        private void ScheduleJobs()
        {
            try
            {
                while (true)
                {
                    KeyValuePair <DateTime, ObjectId> jobPair = jobSchedule.GetJob();
                    if (jobPair.Key > DateTime.UtcNow)
                    {
                        Thread.Sleep((int)(jobPair.Key - DateTime.UtcNow).TotalMilliseconds);
                    }

                    HtmlRecord record = RetrieveEntryById(jobPair.Value);
                    CrawlerManager.Instance.DistributeWork(record);

                    processEvent.Reset();
                }
            }
            catch (ThreadInterruptedException)
            {
                scheduleJobsProc = new Thread(ScheduleJobs);
                scheduleJobsProc.Start();
            }
        }
 public void EnqueueHtmlRecord(HtmlRecord record)
 {
     messageQueue.Add(record);
 }