Esempio n. 1
0
        private async Task RunAsync(CancellationToken cancellationToken)
        {
            CloudStorageAccount storageAccount = CloudStorageAccount.Parse(
                ConfigurationManager.AppSettings["StorageConnectionString"]
                );
            CloudQueueClient queueClient = storageAccount.CreateCloudQueueClient();

            CloudQueue commandQueue = queueClient.GetQueueReference(CommandMessage.QUEUE_COMMAND);

            commandQueue.CreateIfNotExists();

            CloudQueue urlQueue = queueClient.GetQueueReference(UrlMessage.QUEUE_URL);

            commandQueue.CreateIfNotExists();

            while (!cancellationToken.IsCancellationRequested)
            {
                CloudQueueMessage commandMessage = commandQueue.GetMessage(TimeSpan.FromMinutes(5));
                if (commandMessage != null)
                {
                    if (commandMessage.AsString == CommandMessage.COMMAND_LOAD)
                    {
                        workerStateMachine.setState(WorkerStateMachine.STATE_LOADING);
                        webLoader = new WebLoader();
                    }
                    else if (commandMessage.AsString == CommandMessage.COMMAND_IDLE)
                    {
                        workerStateMachine.setState(WorkerStateMachine.STATE_IDLE);
                    }
                    else if (commandMessage.AsString == CommandMessage.COMMAND_CRAWL)
                    {
                        workerStateMachine.setState(WorkerStateMachine.STATE_CRAWLING);
                        webCrawler = new WebCrawler(statsManager);
                    }
                    commandQueue.DeleteMessage(commandMessage);
                }

                if (workerStateMachine.getState() != WorkerStateMachine.STATE_IDLE) // in a loading or crawling state
                {
                    CloudQueueMessage urlMessage = urlQueue.GetMessage();
                    if (urlMessage != null) // got url from queue of sitemap or urlset
                    {
                        // load or crawl with UrlEntity depending on current state
                        UrlMessage urlEntity     = UrlMessage.Parse(urlMessage.AsString);
                        bool       deleteMessage = workerStateMachine.Act(urlEntity);
                        if (deleteMessage)
                        {
                            urlQueue.DeleteMessage(urlMessage);
                        }
                    }
                    else
                    {
                        workerStateMachine.Act(null); // need to call Act(null) to finish crawling one day
                    }
                }

                await Task.Delay(100);
            }
        }
Esempio n. 2
0
        private void addUrlToQueue(string url)
        {
            UrlMessage urlEntity = new UrlMessage(UrlMessage.URL_TYPE_HTML, url);

            CloudQueueMessage urlMessage = new CloudQueueMessage(urlEntity.ToString());

            urlQueue.AddMessage(urlMessage);
        }
Esempio n. 3
0
    protected void Page_Load(object sender, EventArgs e)
    {
        CheckLogin();
        UserInfo = getLoginUserInfo();
        ORG_ID   = UserInfo.ORG_ID;

        String ID = StringEx.getString(Request.QueryString["ID"]);

        String P         = StringEx.getString(Request.QueryString["P"]);
        String T         = StringEx.getString(Request.QueryString["T"]);
        String X         = StringEx.getString(Request.QueryString["X"]);
        String DEVICE_ID = StringEx.getString(Request.QueryString["DEVICE_ID"]);

        if (!String.IsNullOrEmpty(DEVICE_ID))
        {
        }

        if (ID.Length > 0)
        {
            UrlMessage vMSG = null;
            try
            {
                vMSG = Newtonsoft.Json.JsonConvert.DeserializeObject <UrlMessage>(ID);
            }
            catch
            {
            }
            if (vMSG != null)
            {
                XT_CAMERA_Dao dao       = new XT_CAMERA_Dao();
                String        cCameraID = vMSG.Code;
                XT_CAMERA     vo        = dao.FindOne(cCameraID);
                double        cX        = vo.x;
                double        cY        = vo.y;
                if ((cX > 0) && (cY > 0))
                {
                    X = cX.ToString();
                    Y = cY;
                }
            }
        }

        //LoginUserInfo vUserInf = this.getLoginUserInfo();
        //X = vUserInf.X;
        //Y = vUserInf.Y;
        TYPE_ID = StringEx.getString(Request.QueryString["TYPE_ID"]);
        if (TYPE_ID.Length > 0)
        {
            String cX = StringEx.getString(Request.QueryString["X"]);
            String cY = StringEx.getString(Request.QueryString["Y"]);
            if ((cX.Length > 0) && (cY.Length > 0))
            {
                X = StringEx.GetDouble(cX).ToString();
                Y = StringEx.GetDouble(cY);
            }
        }
    }
Esempio n. 4
0
        public void QueueSitemap(string robotsURL)
        {
            WebRequest      request = WebRequest.Create(robotsURL);
            HttpWebResponse response;

            try
            {
                response = (HttpWebResponse)request.GetResponse();
            } catch (WebException e)
            {
                Logger.Instance.Log(Logger.LOG_ERROR, "QueueSitemap web request failed");
                return;
            }

            Stream dataStream = response.GetResponseStream();

            StreamReader reader = new StreamReader(dataStream);
            string       data;

            // Read and display lines from the file until the end of
            // the file is reached.

            //urlQueue.FetchAttributes();
            //if (urlQueue.ApproximateMessageCount < 15) { return; } // don't add root sitemap urls to the queue if the queue is not empty

            while ((data = reader.ReadLine()) != null)
            {
                string directive = "";
                char   letter    = '$';
                do
                {
                    letter    = data[0];
                    directive = directive + letter;
                    data      = data.Substring(1);
                } while (letter != ' ');
                directive = directive.Substring(0, directive.Length - 2);

                if (directive == "Sitemap")
                {
                    UrlMessage urlEntity = new UrlMessage(UrlMessage.URL_TYPE_SITEMAP, data);

                    // Add message
                    CloudQueueMessage message = new CloudQueueMessage(urlEntity.ToString());
                    urlQueue.AddMessage(message);
                }
                else if (directive == "Disallow")
                {
                    // add line to disallow table
                    DisallowEntity disallow        = new DisallowEntity(data, new Uri(robotsURL).Host);
                    TableOperation insertOperation = TableOperation.InsertOrReplace(disallow);
                    disallowTable.Execute(insertOperation);
                }
            }
            dataStream.Close();
            response.Close();
        }
 public async void UrlMessage(string receiver, string url)
 {
     var message = new UrlMessage {
         Receiver = receiver,
         Media = url,
         Sender = new Sender { Name = _botOptions.Sender.Name }
     };
     var response = await _api.SendMessage(message);            
     HandleResponse(response);
 }
Esempio n. 6
0
        public void ReceiveMessage(Message message)
        {
            UrlMessage urlMessage = UrlMessage.Build(message.payload);

            if (urlMessage.description != null)
            {
                description.text = urlMessage.description.Replace("\\n", "\n");
            }
            title.text = urlMessage.title;

            panel.Link = urlMessage.refLink;

            ImageManager.StartCoroutine("DownloadImage",
                                        urlMessage.url);
        }
Esempio n. 7
0
        public void Crawl(string url)
        {
            if (!visitedUrls.ContainsKey(url))
            {
                HtmlDocument document;
                try {
                    document = web.Load(url);
                } catch
                {
                    Logger.Instance.Log(Logger.LOG_ERROR, "html dom parsing failed in WebCrawler.Crawl() for " + url);
                    return;
                }

                Link parentLink = new Link(url);

                HtmlNode[] nodes = document.DocumentNode.SelectNodes("//title").ToArray();
                string     title = "Page indexed, but no <title> tag found";
                foreach (HtmlNode item in nodes)
                {
                    title = item.InnerHtml;
                    break; // there should only be one title, if there are more then pick the 1st one arbitrarily
                }

                IndexEntity    indexEntity     = new IndexEntity(url, title); // also add page date
                TableOperation insertOperation = TableOperation.Insert(indexEntity);
                urlTable.Execute(insertOperation);

                visitedUrls.Add(url, true);

                nodes = document.DocumentNode.SelectNodes("//a").ToArray();
                foreach (HtmlNode item in nodes)
                {
                    string linkPath = item.GetAttributeValue("href", ""); // could be relative or absolute or external
                    string link     = parentLink.buildUrl(linkPath);
                    if (!visitedUrls.ContainsKey(link) && urlValidator.IsUrlValidCrawling(link))
                    {
                        visitedUrls.Add(link, true);

                        UrlMessage urlEntity = new UrlMessage(UrlMessage.URL_TYPE_HTML, link);

                        // Add message
                        CloudQueueMessage message = new CloudQueueMessage(urlEntity.ToString());
                        urlQueue.AddMessage(message);
                    }
                }
            }
        }
Esempio n. 8
0
        public async Task BotTalk(IDialogContext context, string msg)
        {
            IBotMessage message;

            if (msg.IsContainsUrl())
            {
                message = new UrlMessage(msg);
            }
            else
            {
                TextMessage _message = new TextMessage();
                _message.Title   = string.Empty;
                _message.Content = msg;
                await context.PostAsync(msg);

                await context.PostAsync("Finished. Pls continue chat with me");
            }
        }
Esempio n. 9
0
        public bool Act(UrlMessage urlMessage)
        {
            if (urlMessage == null)                          // if url queue is empty
            {
                if (getState() == STATE_CRAWLING)            // and we are in the crawling state, we must have finished crawling
                {
                    setState(WorkerStateMachine.STATE_IDLE); // so go to idle
                }
                return(false);
            }

            nUrlsCrawled += 1;

            if (getState() == STATE_LOADING) // loading code
            {
                statsManager.UpdateStats();  // manual update stats on each url during loading phase only
                if (urlMessage.UrlType == UrlMessage.URL_TYPE_SITEMAP)
                {
                    webLoader.parseSitemap(urlMessage.Url); // crawls xml, adds leaf html urls to queue
                }
                else if (urlMessage.UrlType == UrlMessage.URL_TYPE_HTML)
                {
                    // if we are in the loading state, and we receive a URL_TYPE_HTML message, that means we've finished
                    // loading and can transition to crawling state (since we've finished the sitemap queue messages, FIFO)
                    //setState(WorkerStateMachine.STATE_CRAWLING); // transition to next state
                    setState(WorkerStateMachine.STATE_IDLE);
                    // intentionally don't delete queue message, so that it gets processed when the state has been set to crawling
                    return(false);
                }
            }
            else if (getState() == STATE_CRAWLING) // crawling code
            {
                if (nUrlsCrawled % StatsManager.UPDATE_STATS_FREQ == 0)
                {
                    statsManager.UpdateStats(); // can be null
                }

                webCrawler.Crawl(urlMessage.Url);
            }
            return(true);
        }
Esempio n. 10
0
        public async Task ProcessResult()
        {
            if (RequestMessage.IsYouTubeUrl())
            {
                IBotMessage _message = new VideoMessage(Context, RequestMessage);
                await _message.ExcuteAsync();

                return;
            }
            //else if (RequestMessage.IsCsharpCode())
            //{
            //    IBotMessage _message = new CsharpMessage(Context, RequestMessage);
            //    await _message.ExcuteAsync();
            //    return;
            //}
            else if (RequestMessage.IsUrl())
            {
                IBotMessage _message = new UrlMessage(Context, RequestMessage);
                await _message.ExcuteAsync();

                return;
            }
            else if (RequestMessage.IsContainsUrl())
            {
                string      _url     = RequestMessage.GetUrls()[0];
                IBotMessage _message = new TextMessage(Context, RequestMessage);
                await _message.ExcuteAsync();

                return;
            }
            else //TextMessage
            {
                IBotMessage _message = new TextMessage(Context, RequestMessage);
                await _message.ExcuteAsync();

                return;
            }
        }