private async Task RunAsync(CancellationToken cancellationToken) { CloudStorageAccount storageAccount = CloudStorageAccount.Parse( ConfigurationManager.AppSettings["StorageConnectionString"] ); CloudQueueClient queueClient = storageAccount.CreateCloudQueueClient(); CloudQueue commandQueue = queueClient.GetQueueReference(CommandMessage.QUEUE_COMMAND); commandQueue.CreateIfNotExists(); CloudQueue urlQueue = queueClient.GetQueueReference(UrlMessage.QUEUE_URL); commandQueue.CreateIfNotExists(); while (!cancellationToken.IsCancellationRequested) { CloudQueueMessage commandMessage = commandQueue.GetMessage(TimeSpan.FromMinutes(5)); if (commandMessage != null) { if (commandMessage.AsString == CommandMessage.COMMAND_LOAD) { workerStateMachine.setState(WorkerStateMachine.STATE_LOADING); webLoader = new WebLoader(); } else if (commandMessage.AsString == CommandMessage.COMMAND_IDLE) { workerStateMachine.setState(WorkerStateMachine.STATE_IDLE); } else if (commandMessage.AsString == CommandMessage.COMMAND_CRAWL) { workerStateMachine.setState(WorkerStateMachine.STATE_CRAWLING); webCrawler = new WebCrawler(statsManager); } commandQueue.DeleteMessage(commandMessage); } if (workerStateMachine.getState() != WorkerStateMachine.STATE_IDLE) // in a loading or crawling state { CloudQueueMessage urlMessage = urlQueue.GetMessage(); if (urlMessage != null) // got url from queue of sitemap or urlset { // load or crawl with UrlEntity depending on current state UrlMessage urlEntity = UrlMessage.Parse(urlMessage.AsString); bool deleteMessage = workerStateMachine.Act(urlEntity); if (deleteMessage) { urlQueue.DeleteMessage(urlMessage); } } else { workerStateMachine.Act(null); // need to call Act(null) to finish crawling one day } } await Task.Delay(100); } }
private void addUrlToQueue(string url) { UrlMessage urlEntity = new UrlMessage(UrlMessage.URL_TYPE_HTML, url); CloudQueueMessage urlMessage = new CloudQueueMessage(urlEntity.ToString()); urlQueue.AddMessage(urlMessage); }
protected void Page_Load(object sender, EventArgs e) { CheckLogin(); UserInfo = getLoginUserInfo(); ORG_ID = UserInfo.ORG_ID; String ID = StringEx.getString(Request.QueryString["ID"]); String P = StringEx.getString(Request.QueryString["P"]); String T = StringEx.getString(Request.QueryString["T"]); String X = StringEx.getString(Request.QueryString["X"]); String DEVICE_ID = StringEx.getString(Request.QueryString["DEVICE_ID"]); if (!String.IsNullOrEmpty(DEVICE_ID)) { } if (ID.Length > 0) { UrlMessage vMSG = null; try { vMSG = Newtonsoft.Json.JsonConvert.DeserializeObject <UrlMessage>(ID); } catch { } if (vMSG != null) { XT_CAMERA_Dao dao = new XT_CAMERA_Dao(); String cCameraID = vMSG.Code; XT_CAMERA vo = dao.FindOne(cCameraID); double cX = vo.x; double cY = vo.y; if ((cX > 0) && (cY > 0)) { X = cX.ToString(); Y = cY; } } } //LoginUserInfo vUserInf = this.getLoginUserInfo(); //X = vUserInf.X; //Y = vUserInf.Y; TYPE_ID = StringEx.getString(Request.QueryString["TYPE_ID"]); if (TYPE_ID.Length > 0) { String cX = StringEx.getString(Request.QueryString["X"]); String cY = StringEx.getString(Request.QueryString["Y"]); if ((cX.Length > 0) && (cY.Length > 0)) { X = StringEx.GetDouble(cX).ToString(); Y = StringEx.GetDouble(cY); } } }
public void QueueSitemap(string robotsURL) { WebRequest request = WebRequest.Create(robotsURL); HttpWebResponse response; try { response = (HttpWebResponse)request.GetResponse(); } catch (WebException e) { Logger.Instance.Log(Logger.LOG_ERROR, "QueueSitemap web request failed"); return; } Stream dataStream = response.GetResponseStream(); StreamReader reader = new StreamReader(dataStream); string data; // Read and display lines from the file until the end of // the file is reached. //urlQueue.FetchAttributes(); //if (urlQueue.ApproximateMessageCount < 15) { return; } // don't add root sitemap urls to the queue if the queue is not empty while ((data = reader.ReadLine()) != null) { string directive = ""; char letter = '$'; do { letter = data[0]; directive = directive + letter; data = data.Substring(1); } while (letter != ' '); directive = directive.Substring(0, directive.Length - 2); if (directive == "Sitemap") { UrlMessage urlEntity = new UrlMessage(UrlMessage.URL_TYPE_SITEMAP, data); // Add message CloudQueueMessage message = new CloudQueueMessage(urlEntity.ToString()); urlQueue.AddMessage(message); } else if (directive == "Disallow") { // add line to disallow table DisallowEntity disallow = new DisallowEntity(data, new Uri(robotsURL).Host); TableOperation insertOperation = TableOperation.InsertOrReplace(disallow); disallowTable.Execute(insertOperation); } } dataStream.Close(); response.Close(); }
public async void UrlMessage(string receiver, string url) { var message = new UrlMessage { Receiver = receiver, Media = url, Sender = new Sender { Name = _botOptions.Sender.Name } }; var response = await _api.SendMessage(message); HandleResponse(response); }
public void ReceiveMessage(Message message) { UrlMessage urlMessage = UrlMessage.Build(message.payload); if (urlMessage.description != null) { description.text = urlMessage.description.Replace("\\n", "\n"); } title.text = urlMessage.title; panel.Link = urlMessage.refLink; ImageManager.StartCoroutine("DownloadImage", urlMessage.url); }
public void Crawl(string url) { if (!visitedUrls.ContainsKey(url)) { HtmlDocument document; try { document = web.Load(url); } catch { Logger.Instance.Log(Logger.LOG_ERROR, "html dom parsing failed in WebCrawler.Crawl() for " + url); return; } Link parentLink = new Link(url); HtmlNode[] nodes = document.DocumentNode.SelectNodes("//title").ToArray(); string title = "Page indexed, but no <title> tag found"; foreach (HtmlNode item in nodes) { title = item.InnerHtml; break; // there should only be one title, if there are more then pick the 1st one arbitrarily } IndexEntity indexEntity = new IndexEntity(url, title); // also add page date TableOperation insertOperation = TableOperation.Insert(indexEntity); urlTable.Execute(insertOperation); visitedUrls.Add(url, true); nodes = document.DocumentNode.SelectNodes("//a").ToArray(); foreach (HtmlNode item in nodes) { string linkPath = item.GetAttributeValue("href", ""); // could be relative or absolute or external string link = parentLink.buildUrl(linkPath); if (!visitedUrls.ContainsKey(link) && urlValidator.IsUrlValidCrawling(link)) { visitedUrls.Add(link, true); UrlMessage urlEntity = new UrlMessage(UrlMessage.URL_TYPE_HTML, link); // Add message CloudQueueMessage message = new CloudQueueMessage(urlEntity.ToString()); urlQueue.AddMessage(message); } } } }
public async Task BotTalk(IDialogContext context, string msg) { IBotMessage message; if (msg.IsContainsUrl()) { message = new UrlMessage(msg); } else { TextMessage _message = new TextMessage(); _message.Title = string.Empty; _message.Content = msg; await context.PostAsync(msg); await context.PostAsync("Finished. Pls continue chat with me"); } }
public bool Act(UrlMessage urlMessage) { if (urlMessage == null) // if url queue is empty { if (getState() == STATE_CRAWLING) // and we are in the crawling state, we must have finished crawling { setState(WorkerStateMachine.STATE_IDLE); // so go to idle } return(false); } nUrlsCrawled += 1; if (getState() == STATE_LOADING) // loading code { statsManager.UpdateStats(); // manual update stats on each url during loading phase only if (urlMessage.UrlType == UrlMessage.URL_TYPE_SITEMAP) { webLoader.parseSitemap(urlMessage.Url); // crawls xml, adds leaf html urls to queue } else if (urlMessage.UrlType == UrlMessage.URL_TYPE_HTML) { // if we are in the loading state, and we receive a URL_TYPE_HTML message, that means we've finished // loading and can transition to crawling state (since we've finished the sitemap queue messages, FIFO) //setState(WorkerStateMachine.STATE_CRAWLING); // transition to next state setState(WorkerStateMachine.STATE_IDLE); // intentionally don't delete queue message, so that it gets processed when the state has been set to crawling return(false); } } else if (getState() == STATE_CRAWLING) // crawling code { if (nUrlsCrawled % StatsManager.UPDATE_STATS_FREQ == 0) { statsManager.UpdateStats(); // can be null } webCrawler.Crawl(urlMessage.Url); } return(true); }
public async Task ProcessResult() { if (RequestMessage.IsYouTubeUrl()) { IBotMessage _message = new VideoMessage(Context, RequestMessage); await _message.ExcuteAsync(); return; } //else if (RequestMessage.IsCsharpCode()) //{ // IBotMessage _message = new CsharpMessage(Context, RequestMessage); // await _message.ExcuteAsync(); // return; //} else if (RequestMessage.IsUrl()) { IBotMessage _message = new UrlMessage(Context, RequestMessage); await _message.ExcuteAsync(); return; } else if (RequestMessage.IsContainsUrl()) { string _url = RequestMessage.GetUrls()[0]; IBotMessage _message = new TextMessage(Context, RequestMessage); await _message.ExcuteAsync(); return; } else //TextMessage { IBotMessage _message = new TextMessage(Context, RequestMessage); await _message.ExcuteAsync(); return; } }