public IEnumerable <ExternalLink> GetExternalLinkList(bool EnableStopWord, string Text) { List <ExternalLink> ExternalLinkList = new List <ExternalLink>(); if (Ultilities.UrlCheck(Text)) { WebClient webClient = new WebClient(); try { string HTMLDoc = webClient.DownloadString(Text); MatchCollection MatchedString = Regex.Matches(HTMLDoc, @"(<a.*?>.*?</a>)", RegexOptions.Singleline); foreach (Match MatchedItem in MatchedString) { string Word = MatchedItem.Groups[1].Value; ExternalLink ExternalLinkItem = new ExternalLink(); if (!EnableStopWord) { Match MatchHref = Regex.Match(Word, @"href=\""(.*?)\""", RegexOptions.Singleline); if (MatchHref.Success) { ExternalLinkItem.Link = MatchHref.Groups[1].Value; } string t = Regex.Replace(Word, @"\s*<.*?>\s*", "", RegexOptions.Singleline); ExternalLinkItem.Name = t; ExternalLinkList.Add(ExternalLinkItem); } else { if (!(StopWord.ListStopWord().Any(x => x.SWord.Equals(Word.ToLower())))) { Match MatchHref = Regex.Match(Word, @"href=\""(.*?)\""", RegexOptions.Singleline); if (MatchHref.Success) { ExternalLinkItem.Link = MatchHref.Groups[1].Value; } string t = Regex.Replace(Word, @"\s*<.*?>\s*", "", RegexOptions.Singleline); ExternalLinkItem.Name = t; ExternalLinkList.Add(ExternalLinkItem); } } } } catch (Exception ex) { Console.Write(ex.ToString()); } } return(ExternalLinkList.OrderBy(x => x.Name)); }
public IEnumerable <WordOccurance> GetWordOccurance(bool EnableStopWord, string Text) { List <WordOccurance> wo = new List <WordOccurance>(); if (Text != string.Empty && Ultilities.UrlCheck(Text)) { WebClient w = new WebClient(); try { string s = w.DownloadString(Text); wo = Ultilities.ProcessHTMLDoc(s, EnableStopWord).ToList(); } catch (Exception ex) { Console.Write(ex.ToString()); } } else { wo = Ultilities.ProcessString(Text, EnableStopWord).ToList(); } return(wo.OrderByDescending(x => x.Count)); }
public IEnumerable <MetaTag> GetMetaTagList(bool EnableStopWord, string text) { List <MetaTag> MetaTagList = new List <MetaTag>(); if (Ultilities.UrlCheck(text)) { try { WebClient webClient = new WebClient(); string HtmlDoc = webClient.DownloadString(text); MatchCollection MatchedString = Regex.Matches(HtmlDoc, @"(<meta.*?>)", RegexOptions.Singleline); foreach (Match MatchItem in MatchedString) { string Word = MatchItem.Groups[1].Value; if (!EnableStopWord) { MetaTag MetaTagItem = new MetaTag(); Match MatchName = Regex.Match(Word, @"name=""(.*?)""", RegexOptions.Singleline); if (MatchName.Success) { if (MatchName.Groups[1].Value != String.Empty) { MetaTagItem.Name = MatchName.Groups[1].Value; } else { break; } } Match MatchContent = Regex.Match(Word, @"content=""(.*?)""", RegexOptions.Singleline); if (MatchContent.Success) { MetaTagItem.Content = MatchContent.Groups[1].Value; } MetaTagList.Add(MetaTagItem); } else { if (!(StopWord.ListStopWord().Any(x => x.SWord.Equals(Word.ToLower())))) { MetaTag MetaTagItem = new MetaTag(); Match MatchName = Regex.Match(Word, @"name=""(.*?)""", RegexOptions.Singleline); if (MatchName.Success) { if (MatchName.Groups[1].Value != String.Empty) { MetaTagItem.Name = MatchName.Groups[1].Value; } else { break; } } Match MatchContent = Regex.Match(Word, @"content=""(.*?)""", RegexOptions.Singleline); if (MatchContent.Success) { MetaTagItem.Content = MatchContent.Groups[1].Value; } MetaTagList.Add(MetaTagItem); } } } } catch (Exception ex) { Console.Write(ex.ToString()); } } return(MetaTagList); }