private static string CreateDeleteToken(UrlEntity model) { var bToken = System.Text.Encoding.UTF8.GetBytes(JsonSerializer.Serialize(model)); string deleteToken = Convert.ToBase64String(bToken); return(deleteToken); }
/// <summary> /// Insert or Merge an entity to the Azure Table Storage. Both Partition key and Row key must be provided. /// Partition Key is the Shorten URL and Row Key is the long/orignial URL /// </summary> /// <param name="table">Azure Table Name</param> /// <param name="entity">Entity object</param> /// <returns></returns> public static async Task <UrlEntity> InsertOrMergeEntityAsync(CloudTable table, UrlEntity entity) { if (entity == null) { throw new ArgumentNullException(nameof(entity)); } try { // Encode long URL entity.RowKey = WebUtility.UrlEncode(entity.RowKey); // Create the InsertOrReplace table operation TableOperation insertOrMergeOperation = TableOperation.InsertOrMerge(entity); // Execute the operation. TableResult result = await table.ExecuteAsync(insertOrMergeOperation); UrlEntity shortenUrlEntity = result.Result as UrlEntity; return(shortenUrlEntity); } catch (StorageException e) { throw new StorageException(e.Message, e.InnerException); } }
/// <summary> /// Generates a collection of inlines from a tweet. /// </summary> /// <param name="tweet">The tweet to generate inlines from.</param> /// <returns>The generated inlines.</returns> private static IEnumerable <Inline> GenerateInlines(Status tweet) { var allEntities = ExtractEntities(tweet).ToArray(); if (allEntities.Any()) { int lastEnd = 0; foreach (EntityBase entity in allEntities) { if (entity.Start > lastEnd) { string text = tweet.Text.Substring(lastEnd, entity.Start - lastEnd); yield return(new Run(PrepareText(text))); } var tagEntity = entity as HashTagEntity; if (tagEntity != null) { yield return(GenerateHashTag(tagEntity)); } else if (entity is UrlEntity) { if (entity is MediaEntity) { if (!Config.Visual.InlineMedia) { MediaEntity mediaEnttiy = entity as MediaEntity; yield return(GenerateMedia(mediaEnttiy)); } } else { UrlEntity urlEntity = entity as UrlEntity; if (!TwitterHelper.IsTweetUrl(urlEntity.ExpandedUrl)) { yield return(GenerateLink(urlEntity)); } } } else if (entity is UserMentionEntity) { yield return(GenerateMention((UserMentionEntity)entity)); } lastEnd = entity.End; } if (lastEnd < tweet.Text.Length) { yield return(new Run(PrepareText(tweet.Text.Substring(lastEnd)))); } } else { yield return(new Run(PrepareText(tweet.Text))); } }
public void Edit(int id, UrlEntity urlEntity) { var dbUrl = _db.stringUrl.FirstOrDefault(c => c.Id == id); dbUrl.ShortUrl = urlEntity.ShortUrl; dbUrl.LongUrl = urlEntity.LongUrl; dbUrl.CreateDate = DateTime.Now; _db.stringUrl.Update(dbUrl); SaveChanges(); }
public async void createUrl(string identifier, string fullPath) { UrlEntity url = new UrlEntity("v1", identifier); url.fullPath = fullPath; TableOperation insertOperation = TableOperation.Insert(url); await _urltable.ExecuteAsync(insertOperation); }
public EntityViewModel(UrlEntity urlEntity) { Model = urlEntity; ExpandedUrl = urlEntity.ExpandedUrl; DisplayUrl = urlEntity.DisplayUrl; Notice = Notice.Instance; }
public void AddUrl(UrlEntity urlEntity) { var url = urlRepository.GetUrlEntity(urlEntity.URL); if (url != null) { return; } urlRepository.AddUrlEntity(urlEntity); urlRepository.SaveChanges(); }
private static ContextMenu CreateLinkContextMenu(UrlEntity entity) { var menu = new ContextMenu(); menu.Items.Add(new MenuItem { Header = Strings.CopyUrl }); return(menu); }
public IActionResult Edit(int id, UrlEntity urlEntity) { if (urlEntity is null) { throw new ArgumentOutOfRangeException(nameof(urlEntity)); } if (!ModelState.IsValid) { return(View(urlEntity)); } _urlDisplayer.Edit(id, urlEntity); return(RedirectToAction("Index")); }
private static ContextMenu CreateLinkContextMenu(UrlEntity entity) { var menu = new ContextMenu(); menu.Items.Add(new MenuItem { Header = Strings.CopyUrl, Command = GlobalCommands.CopyToClipboardCommand, CommandParameter = entity.ExpandedUrl }); return(menu); }
/// <summary> /// Generates an inline from a link entity. /// </summary> /// <param name="entity"> The entity to generate the inline from. </param> /// <returns> The generated inline. </returns> private static Inline GenerateLink(UrlEntity entity) { Hyperlink link = new Hyperlink(); link.Inlines.Add(entity.DisplayUrl); link.CommandParameter = new Uri(entity.ExpandedUrl); link.Command = GlobalCommands.OpenUrlCommand; link.ToolTip = entity.ExpandedUrl; link.SetResourceReference(TextElement.ForegroundProperty, "LinkBrush"); link.ContextMenu = CreateLinkContextMenu(entity); return(link); }
public async void deleteUrl(string version, string identifier) { UrlEntity url = retrieveUrl(version, identifier); if (url != null) { TableOperation deleteOperation = TableOperation.Delete(url); await _urltable.ExecuteAsync(deleteOperation); Console.WriteLine(url.RowKey + "\t" + url.PartitionKey + "\t" + url.fullPath + " has been deleted"); } Console.WriteLine("Url does not exist"); }
private static bool ValidateDeleteToken(string deleteToken, UrlEntity dbItem) { var bToken = Convert.FromBase64String(deleteToken); UrlEntity item = JsonSerializer.Deserialize <UrlEntity>(System.Text.Encoding.UTF8.GetString(bToken)); if (dbItem != null) { if (!String.Equals(dbItem.validationString, item.validationString)) { return(false); } } return(true); }
private void MatchWebUrl(object obj) { try { Dictionary <string, object> dic = (Dictionary <string, object>)obj; string html = dic["Result"].ToString(); string host = dic["Host"].ToString(); string webUrl = dic["Url"].ToString(); int depth = (int)dic["Depth"]; string matchStr = "<a[\\s^\\s]+href=\"(?<href>\\S+)\"";//命名分组 MatchCollection collection = Regex.Matches(html, matchStr, RegexOptions.IgnoreCase | RegexOptions.Compiled); string content = string.Empty; foreach (Match item in collection) { string href = item.Groups["href"].Value; //无效链接 if (string.IsNullOrEmpty(href) || href == "#" || href.ToLower().Contains("javascript")) { continue; } //没有域名的链接 if (!href.Contains("http") && !href.Contains("ftp")) { href = "http://" + host + href; } //外链接 if (!href.Contains("http://" + host)) { continue; } //是否已保存 if (UrlBLL.IsExistUrl(href)) { continue; } UrlEntity urlEnt = new UrlEntity(); urlEnt.AddDate = DateTime.Now; urlEnt.Depth = depth; urlEnt.Host = host; urlEnt.Url = href; urlEnt.WebUrl = webUrl; UrlBLL.Insert(urlEnt); } } catch (Exception ex) { LogHelper.LogTrace(ex.Message); } }
public void AddUrlDictionary(Dictionary <string, int> urls) { var urlsFromDB = urlRepository.GetAllUrls(); foreach (var url in urls) { if (!urlsFromDB.Any(x => x.URL == url.Key)) { var urlEntity = new UrlEntity() { URL = url.Key, IterationId = url.Value }; AddUrl(urlEntity); } } }
public async Task <string> CreateUrl(string fullUrl) { long index = await getIndexAndIncrement(); if (index == -1) { return("_failed"); } string identifier = Base62Encode.Encode(index); var url = new UrlEntity(_version, identifier); url.fullPath = fullUrl; TableOperation insertOperation = TableOperation.Insert(url); await _urltable.ExecuteAsync(insertOperation); return(identifier); }
public UrlEntityModel(UrlEntity entity) : base(entity) { if (entity.ExpandedUrl != null) { DisplayUrl = entity.ExpandedUrl.Host.Replace("www", ""); TruncatedUrl = "[" + DisplayUrl + "]"; ExpandedUrl = entity.ExpandedUrl; Url = entity.Url; } else { DisplayUrl = entity.Url.Host.Replace("www", ""); TruncatedUrl = "[" + DisplayUrl + "]"; ExpandedUrl = entity.Url; Url = entity.Url; } }
private static void ReplaceUrl(TextBlock textBlock, UrlEntity url, ICommand command) { var hyperlink = new Hyperlink() { NavigateUri = new Uri(url.Url), Foreground = new SolidColorBrush(Colors.DodgerBlue) }; hyperlink.RequestNavigate += (sender, e) => { command.Execute(url.Url); }; hyperlink.Inlines.Add(new Run() { Text = url.DisplayUrl }); textBlock.Inlines.Add(hyperlink); }
private void ParseUrl(string url) { // if url fails to get title, ignore page string pageContent = DownloadText(url); string title = searcher.GetTitle(pageContent); if (title == string.Empty) { return; } DateTime date = searcher.GetPubDate(pageContent); connection.AddToUrlTable(UrlEntity.CreateEntitiesFromString(title, url, date)); foreach (Match match in searcher.GetAllLinks(pageContent)) { string linkToAdd = match.Groups[1].Value; if (linkToAdd.StartsWith("//")) { // assume http linkToAdd = "http:" + linkToAdd; } if (linkToAdd.StartsWith("/")) { // relative url, add in root based on current domain if (url.Contains("cnn.com")) { linkToAdd = "http://cnn.com" + linkToAdd; } else if (url.Contains("blearcherreport.com")) { linkToAdd = "http://bleacherreport.com" + linkToAdd; } else { continue; } } if (!visitedUrls.Contains(linkToAdd)) { AddToUrlQueue(linkToAdd); } } }
public IEnumerable <UrlEntity> ExtractAllUrlsFromPage(string rawDocument, int iteration) { List <UrlEntity> wikiUrls = new List <UrlEntity>(); HtmlDocument document = new HtmlDocument(); document.LoadHtml(rawDocument); IEnumerable <HtmlNode> allUrls = document.DocumentNode.SelectNodes("//a[@href]"); var filtredUrls = allUrls.Where(a => a.Attributes["href"].Value.StartsWith("/wiki/")).Where((a) => { if (a.ParentNode.Attributes["class"] != null) { return(a.ParentNode.Attributes["class"].Value.Equals("mw-redirect")); } if (a.ParentNode.Attributes.Count() != 0) { return(false); } return(true); }); if (filtredUrls.Count() == 0) { return(null); } foreach (var url in filtredUrls) { if (wikiUrls.Where(u => u.URL == url.Attributes["href"].Value).Count() == 0) { UrlEntity urlEntity = new UrlEntity() { URL = url.Attributes["href"].Value, IterationId = iteration }; wikiUrls.Add(urlEntity); } } if (wikiUrls.Count() > 0) { return(wikiUrls); } return(null); }
public static IEnumerable <IEntity> ParseUrls(string text, IList <IUrlEntity> urls) { foreach (var url in urls.GroupBy(u => u.Url.AbsoluteUri).Select(g => g.First())) { var matches = Regex.Matches(text, string.Format(Const.URLPATTERN, Regex.Escape(url.Url.AbsoluteUri))); foreach (Match match in matches) { var entity = new UrlEntity { DisplayUrl = url.DisplayUrl, ExpandedUrl = url.ExpandedUrl, Url = url.Url, Indices = new int[] { match.Index, 0 } }; var model = new UrlEntityModel(entity); yield return(model); } } }
public void ProcessResults_Populates_Entity_Urls() { var searchProc = new SearchRequestProcessor <Search> { BaseUrl = "https://api.twitter.com/1.1/search/" }; List <Search> results = searchProc.ProcessResults(SearchJson); List <UrlEntity> urls = results.First().Statuses.First().Entities.UrlEntities; Assert.IsNotNull(urls); Assert.AreEqual(1, urls.Count); UrlEntity firstUrl = urls.First(); Assert.AreEqual("http://t.co/Cc85Yzpj", firstUrl.Url); Assert.AreEqual("http://bit.ly/PSOVso", firstUrl.ExpandedUrl); Assert.AreEqual("bit.ly/PSOVso", firstUrl.DisplayUrl); Assert.AreEqual(68, firstUrl.Start); Assert.AreEqual(88, firstUrl.End); }
public ActionResult <UrlEntity> Post([FromBody] string longUrl) { var shortIdCounter = Global.GetCounter(); var shortId = long.Parse(Global.Prefix + "" + shortIdCounter); var shortUrl = DataConvertor.ToBaseN(shortId); var mongoUrlEntity = _tinyUrlRepository.PostLongUrl(new MongoUrlEntity() { LongUrl = longUrl, ShortUrlId = new BsonInt64(shortId), ServerPrefix = Global.Prefix }); var urlEntity = new UrlEntity() { LongUrl = longUrl, ShortUrl = shortUrl }; return(urlEntity); }
/// <summary> /// 匹配网页链接 /// </summary> /// <param name="html"></param> private void MatchWebUrl(string html, string host, string webUrl, int depth) { string matchStr = "<a[\\s^\\s]+href=\"(?<href>\\S+)\"";//命名分组 MatchCollection collection = Regex.Matches(html, matchStr, RegexOptions.IgnoreCase | RegexOptions.Compiled); string content = string.Empty; foreach (Match item in collection) { string href = item.Groups["href"].Value; //无效链接 if (string.IsNullOrEmpty(href) || href == "#" || href.ToLower().Contains("javascript")) { continue; } //没有域名的链接 if (!href.Contains("http") && !href.Contains("ftp")) { href = "http://" + host + href; } //外链接 if (!href.Contains("http://" + host)) { continue; } //是否已保存 if (UrlBLL.IsExistUrl(href)) { continue; } UrlEntity urlEnt = new UrlEntity(); urlEnt.AddDate = DateTime.Now; urlEnt.Depth = depth; urlEnt.Host = host; urlEnt.Url = href; urlEnt.WebUrl = webUrl; UrlBLL.Insert(urlEnt); } }
public UrlModel Create(UrlModel model) { if (!Uri.TryCreate(model.LongUrl, UriKind.Absolute, out Uri result)) { return(null); } var item = new UrlEntity { LongUrl = model.LongUrl, validationString = new Random().Next().ToString(), CreationDate = DateTime.Now }; _dbContext.Links.Add(item); _dbContext.SaveChanges(); // Add to returned model "token" for delete link model.deleteToken = CreateDeleteToken(item); model.ShortPath = WebEncoders.Base64UrlEncode(BitConverter.GetBytes(item.Id)); return(model); }
public string Shorten(string originalUrl) { try { var url = new Uri(originalUrl); } catch (Exception ex) { throw new InvalidDataException("Invalid URL", ex); //TODO: } UrlEntity urlEntity = FindUrl(originalUrl); if (urlEntity != null) { return(urlEntity.ShortUrl); } string shortenedUrl = GenerateShortenedUrl(originalUrl); SaveToStore(originalUrl, shortenedUrl, DateTime.Now); return(shortenedUrl); }
public void ProcessResults_ForShow_HandlesSingleResult() { var dmReqProc = new DirectMessageEventsRequestProcessor <DirectMessageEvents> { BaseUrl = "https://api.twitter.com/1.1/", Type = DirectMessageEventsType.Show }; List <DirectMessageEvents> dms = dmReqProc.ProcessResults(TestQuerySingleResponse); Assert.IsNotNull(dms?.SingleOrDefault()); DirectMessageEvents dmEvt = dms.First(); Assert.IsNotNull(dmEvt); DirectMessageEventsValue dmVal = dmEvt.Value; Assert.IsNotNull(dmVal); DMEvent evt = dmVal.DMEvent; Assert.IsNotNull(evt); Assert.AreEqual("message_create", evt.Type); Assert.AreEqual("917929712638246916", evt.ID); Assert.AreEqual("1507686472459", evt.CreatedTimestamp); Assert.AreEqual(new DateTime(2017, 10, 11, 01, 47, 52, 459), evt.CreatedAt); DirectMessageCreate mc = evt.MessageCreate; Assert.IsNotNull(mc); Assert.AreEqual("15411837", mc.SenderID); Assert.AreEqual("472356", mc.SourceAppID); DirectMessageTarget tgt = mc.Target; Assert.IsNotNull(tgt); Assert.AreEqual("16761255", tgt.RecipientID); DirectMessageData md = mc.MessageData; Assert.IsNotNull(md); Assert.AreEqual( "DM from @JoeMayo to @Linq2Twitr of $MSFT & $TSLA with #TwitterAPI #chatbot at https://t.co/ikGq2AnAir and https://t.co/nLvwwjz40q on 10/10/2017 6:47:46 PM!'", md.Text); Entities ent = md.Entities; Assert.IsNotNull(ent); List <HashTagEntity> hashtags = ent.HashTagEntities; Assert.IsNotNull(hashtags); Assert.AreEqual(2, hashtags.Count); HashTagEntity ht1 = hashtags[0]; Assert.IsNotNull(ht1); Assert.AreEqual("TwitterAPI", ht1.Text); int[] ht1i = ht1.Indices; Assert.IsNotNull(ht1i); Assert.AreEqual(58, ht1i[0]); Assert.AreEqual(69, ht1i[1]); HashTagEntity ht2 = hashtags[1]; Assert.IsNotNull(ht2); Assert.AreEqual("chatbot", ht2.Text); int[] ht2i = ht2.Indices; Assert.IsNotNull(ht2i); Assert.AreEqual(70, ht2i[0]); Assert.AreEqual(78, ht2i[1]); List <SymbolEntity> syms = ent.SymbolEntities; Assert.IsNotNull(syms); Assert.AreEqual(2, syms.Count); SymbolEntity sym1 = syms[0]; Assert.IsNotNull(sym1); Assert.AreEqual("MSFT", sym1.Text); int[] sym1i = sym1.Indices; Assert.IsNotNull(sym1i); Assert.AreEqual(35, sym1i[0]); Assert.AreEqual(40, sym1i[1]); SymbolEntity sym2 = syms[1]; Assert.IsNotNull(sym2); Assert.AreEqual("TSLA", sym2.Text); int[] sym2i = sym2.Indices; Assert.IsNotNull(sym2i); Assert.AreEqual(47, sym2i[0]); Assert.AreEqual(52, sym2i[1]); List <UserMentionEntity> usrs = ent.UserMentionEntities; Assert.IsNotNull(usrs); Assert.AreEqual(2, usrs.Count); UserMentionEntity um1 = usrs[0]; Assert.IsNotNull(um1); Assert.AreEqual("JoeMayo", um1.ScreenName); Assert.AreEqual("Joe Mayo", um1.Name); Assert.AreEqual(15411837ul, um1.Id); Assert.AreEqual("15411837", um1.IdStr); int[] um1i = um1.Indices; Assert.IsNotNull(um1i); Assert.AreEqual(2, um1i.Length); Assert.AreEqual(8, um1i[0]); Assert.AreEqual(16, um1i[1]); UserMentionEntity um2 = usrs[1]; Assert.IsNotNull(um2); Assert.AreEqual("Linq2Twitr", um2.ScreenName); Assert.AreEqual("LINQ to Twitr", um2.Name); Assert.AreEqual(16761255ul, um2.Id); Assert.AreEqual("16761255", um2.IdStr); int[] um2i = um2.Indices; Assert.IsNotNull(um2i); Assert.AreEqual(2, um2i.Length); Assert.AreEqual(20, um2i[0]); Assert.AreEqual(31, um2i[1]); List <UrlEntity> urls = ent.UrlEntities; Assert.IsNotNull(urls); Assert.AreEqual(2, urls.Count); UrlEntity url1 = urls[0]; Assert.IsNotNull(url1); Assert.AreEqual("https://t.co/ikGq2AnAir", url1.Url); Assert.AreEqual("http://bit.ly/2xSJWJk", url1.ExpandedUrl); Assert.AreEqual("bit.ly/2xSJWJk", url1.DisplayUrl); int[] url1i = url1.Indices; Assert.IsNotNull(url1i); Assert.AreEqual(2, url1i.Length); Assert.AreEqual(82, url1i[0]); Assert.AreEqual(105, url1i[1]); UrlEntity url2 = urls[1]; Assert.IsNotNull(url2); Assert.AreEqual("https://t.co/nLvwwjz40q", url2.Url); Assert.AreEqual("http://amzn.to/2gD09X6", url2.ExpandedUrl); Assert.AreEqual("amzn.to/2gD09X6", url2.DisplayUrl); int[] url2i = url2.Indices; Assert.IsNotNull(url2i); Assert.AreEqual(2, url2i.Length); Assert.AreEqual(110, url2i[0]); Assert.AreEqual(133, url2i[1]); JsonElement app = dmVal.Apps; JsonElement appDetail = app.GetProperty("472356"); Assert.AreEqual("472356", appDetail.GetString("id")); Assert.AreEqual("LINQ to Tweeter", appDetail.GetString("name")); Assert.AreEqual("https://github.com/JoeMayo/LinqToTwitter", appDetail.GetString("url")); }
public async Task NewDirectMessageEventsAsync_WithRecipientID_ReturnsCreatedDM() { const ulong RecipientID = 1; const string Text = "Hi"; var ctx = InitializeTwitterContext(); DirectMessageEvents dmEvt = await ctx.NewDirectMessageEventAsync(RecipientID, Text); Assert.IsNotNull(dmEvt); DirectMessageEventsValue dmVal = dmEvt.Value; Assert.IsNotNull(dmVal); DMEvent evt = dmVal.DMEvent; Assert.IsNotNull(evt); Assert.AreEqual("message_create", evt.Type); Assert.AreEqual("917929712638246916", evt.ID); Assert.AreEqual("1507686472459", evt.CreatedTimestamp); Assert.AreEqual(new DateTime(2017, 10, 11, 01, 47, 52, 459), evt.CreatedAt); DirectMessageCreate mc = evt.MessageCreate; Assert.IsNotNull(mc); Assert.AreEqual("15411837", mc.SenderID); DirectMessageTarget tgt = mc.Target; Assert.IsNotNull(tgt); Assert.AreEqual("16761255", tgt.RecipientID); DirectMessageData md = mc.MessageData; Assert.IsNotNull(md); Assert.AreEqual( "DM from @JoeMayo to @Linq2Twitr of $MSFT & $TSLA with #TwitterAPI #chatbot at https://t.co/ikGq2AnAir and https://t.co/nLvwwjz40q on 10/10/2017 6:47:46 PM!'", md.Text); Entities ent = md.Entities; Assert.IsNotNull(ent); List <HashTagEntity> hashtags = ent.HashTagEntities; Assert.IsNotNull(hashtags); Assert.AreEqual(2, hashtags.Count); HashTagEntity ht1 = hashtags[0]; Assert.IsNotNull(ht1); Assert.AreEqual("TwitterAPI", ht1.Text); int[] ht1i = ht1.Indices; Assert.IsNotNull(ht1i); Assert.AreEqual(58, ht1i[0]); Assert.AreEqual(69, ht1i[1]); HashTagEntity ht2 = hashtags[1]; Assert.IsNotNull(ht2); Assert.AreEqual("chatbot", ht2.Text); int[] ht2i = ht2.Indices; Assert.IsNotNull(ht2i); Assert.AreEqual(70, ht2i[0]); Assert.AreEqual(78, ht2i[1]); List <SymbolEntity> syms = ent.SymbolEntities; Assert.IsNotNull(syms); Assert.AreEqual(2, syms.Count); SymbolEntity sym1 = syms[0]; Assert.IsNotNull(sym1); Assert.AreEqual("MSFT", sym1.Text); int[] sym1i = sym1.Indices; Assert.IsNotNull(sym1i); Assert.AreEqual(35, sym1i[0]); Assert.AreEqual(40, sym1i[1]); SymbolEntity sym2 = syms[1]; Assert.IsNotNull(sym2); Assert.AreEqual("TSLA", sym2.Text); int[] sym2i = sym2.Indices; Assert.IsNotNull(sym2i); Assert.AreEqual(47, sym2i[0]); Assert.AreEqual(52, sym2i[1]); List <UserMentionEntity> usrs = ent.UserMentionEntities; Assert.IsNotNull(usrs); Assert.AreEqual(2, usrs.Count); UserMentionEntity um1 = usrs[0]; Assert.IsNotNull(um1); Assert.AreEqual("JoeMayo", um1.ScreenName); Assert.AreEqual("Joe Mayo", um1.Name); Assert.AreEqual(15411837ul, um1.Id); Assert.AreEqual("15411837", um1.IdStr); int[] um1i = um1.Indices; Assert.IsNotNull(um1i); Assert.AreEqual(2, um1i.Length); Assert.AreEqual(8, um1i[0]); Assert.AreEqual(16, um1i[1]); UserMentionEntity um2 = usrs[1]; Assert.IsNotNull(um2); Assert.AreEqual("Linq2Twitr", um2.ScreenName); Assert.AreEqual("LINQ to Twitr", um2.Name); Assert.AreEqual(16761255ul, um2.Id); Assert.AreEqual("16761255", um2.IdStr); int[] um2i = um2.Indices; Assert.IsNotNull(um2i); Assert.AreEqual(2, um2i.Length); Assert.AreEqual(20, um2i[0]); Assert.AreEqual(31, um2i[1]); List <UrlEntity> urls = ent.UrlEntities; Assert.IsNotNull(urls); Assert.AreEqual(2, urls.Count); UrlEntity url1 = urls[0]; Assert.IsNotNull(url1); Assert.AreEqual("https://t.co/ikGq2AnAir", url1.Url); Assert.AreEqual("http://bit.ly/2xSJWJk", url1.ExpandedUrl); Assert.AreEqual("bit.ly/2xSJWJk", url1.DisplayUrl); int[] url1i = url1.Indices; Assert.IsNotNull(url1i); Assert.AreEqual(2, url1i.Length); Assert.AreEqual(82, url1i[0]); Assert.AreEqual(105, url1i[1]); UrlEntity url2 = urls[1]; Assert.IsNotNull(url2); Assert.AreEqual("https://t.co/nLvwwjz40q", url2.Url); Assert.AreEqual("http://amzn.to/2gD09X6", url2.ExpandedUrl); Assert.AreEqual("amzn.to/2gD09X6", url2.DisplayUrl); int[] url2i = url2.Indices; Assert.IsNotNull(url2i); Assert.AreEqual(2, url2i.Length); Assert.AreEqual(110, url2i[0]); Assert.AreEqual(133, url2i[1]); }
public override void Run() { while (true) { Thread.Sleep(50); CloudQueueMessage adminMsg = ic.adminQueue.GetMessage(); if (adminMsg != null) { if (adminMsg.AsString == "load") { spider = new Crawler("bleacherreport.com", "cnn.com", ic.urlQueue); ic.statusQueue.AddMessage(new CloudQueueMessage("Loading complete")); } else if (adminMsg.AsString == "start") { hasStarted = true; } else if (adminMsg.AsString == "stop") { hasStarted = false; } else if (adminMsg.AsString == "clear") { hasStarted = false; totalUrlsCrawled = 0; index = 0; } ic.adminQueue.DeleteMessage(adminMsg); } if (hasStarted) { CloudQueueMessage urlMsg = spider.toBeCrawled.GetMessage(); if (urlMsg != null) { string url = urlMsg.AsString; try { totalUrlsCrawled++; HtmlDocument doc = new HtmlWeb().Load(url); // check valid url index++; string title = doc.DocumentNode.SelectSingleNode("//title").InnerText; HtmlNode pubNode = doc.DocumentNode.SelectSingleNode("//meta[@name='pubdate']"); HtmlNode lastmodNode = doc.DocumentNode.SelectSingleNode("//meta[@name='lastmod']"); DateTime articleDate; if (pubNode != null) { articleDate = DateTime.Parse(pubNode.Attributes["content"].Value); } else if (lastmodNode != null) { articleDate = DateTime.Parse(lastmodNode.Attributes["content"].Value); } else { articleDate = DateTime.Now; } string[] pageTitleTokens = title.Trim().Split(' '); foreach (string token in pageTitleTokens) { UrlEntity urlEntity = new UrlEntity(title, token, url, articleDate); TableOperation insertOperation = TableOperation.Insert(urlEntity); ic.titlesTable.Execute(insertOperation); } string[] roots = new string[2] { "cnn.com", "bleacherreport.com/articles" }; List <string> filtered = new List <string>(); // filter the urls. only want root domain string root = ""; if (url.Contains(roots[0])) { root = roots[0]; } else { root = roots[1]; } foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]")) { string rawUrl = link.Attributes["href"].Value; if (rawUrl.StartsWith("/") && rawUrl != "/users/undefined") { filtered.Add("http://" + root + rawUrl); } else if (rawUrl.Contains(root)) { if (rawUrl.StartsWith("http://")) { filtered.Add(rawUrl); } else { filtered.Add("http://" + rawUrl); } } } // filter pt. 2: not disallowed and not already marked foreach (string filteredUrl in filtered) { if (!spider.marked.Contains(filteredUrl) && !spider.disallowed.Contains(filteredUrl)) { spider.marked.Add(filteredUrl); spider.toBeCrawled.AddMessage(new CloudQueueMessage(filteredUrl)); } } } catch (Exception e) { ic.errorQueue.AddMessage(new CloudQueueMessage(url + "|" + e.Message)); } float cpuUsage = cpuPerformance.NextValue(); float memUsage = memPerformance.NextValue(); ic.urlQueue.FetchAttributes(); ClearQueues(); string statsMessage = cpuUsage + "," + memUsage + "," + totalUrlsCrawled + "," + ic.urlQueue.ApproximateMessageCount + "," + index; ic.statsQueue.AddMessage(new CloudQueueMessage(statsMessage)); spider.toBeCrawled.DeleteMessage(urlMsg); } } } }
public UrlComplete(UrlEntity url) { _url = url; }