Ejemplo n.º 1
0
        public IEnumerable <ExternalLink> GetExternalLinkList(bool EnableStopWord, string Text)
        {
            List <ExternalLink> ExternalLinkList = new List <ExternalLink>();

            if (Ultilities.UrlCheck(Text))
            {
                WebClient webClient = new WebClient();
                try
                {
                    string HTMLDoc = webClient.DownloadString(Text);

                    MatchCollection MatchedString = Regex.Matches(HTMLDoc, @"(<a.*?>.*?</a>)", RegexOptions.Singleline);

                    foreach (Match MatchedItem in MatchedString)
                    {
                        string       Word             = MatchedItem.Groups[1].Value;
                        ExternalLink ExternalLinkItem = new ExternalLink();
                        if (!EnableStopWord)
                        {
                            Match MatchHref = Regex.Match(Word, @"href=\""(.*?)\""",
                                                          RegexOptions.Singleline);
                            if (MatchHref.Success)
                            {
                                ExternalLinkItem.Link = MatchHref.Groups[1].Value;
                            }

                            string t = Regex.Replace(Word, @"\s*<.*?>\s*", "",
                                                     RegexOptions.Singleline);
                            ExternalLinkItem.Name = t;
                            ExternalLinkList.Add(ExternalLinkItem);
                        }
                        else
                        {
                            if (!(StopWord.ListStopWord().Any(x => x.SWord.Equals(Word.ToLower()))))
                            {
                                Match MatchHref = Regex.Match(Word, @"href=\""(.*?)\""",
                                                              RegexOptions.Singleline);
                                if (MatchHref.Success)
                                {
                                    ExternalLinkItem.Link = MatchHref.Groups[1].Value;
                                }

                                string t = Regex.Replace(Word, @"\s*<.*?>\s*", "",
                                                         RegexOptions.Singleline);
                                ExternalLinkItem.Name = t;
                                ExternalLinkList.Add(ExternalLinkItem);
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    Console.Write(ex.ToString());
                }
            }

            return(ExternalLinkList.OrderBy(x => x.Name));
        }
        public IEnumerable <WordOccurance> GetWordOccurance(bool EnableStopWord, string Text)
        {
            List <WordOccurance> wo = new List <WordOccurance>();

            if (Text != string.Empty && Ultilities.UrlCheck(Text))
            {
                WebClient w = new WebClient();
                try
                {
                    string s = w.DownloadString(Text);
                    wo = Ultilities.ProcessHTMLDoc(s, EnableStopWord).ToList();
                }
                catch (Exception ex)
                {
                    Console.Write(ex.ToString());
                }
            }
            else
            {
                wo = Ultilities.ProcessString(Text, EnableStopWord).ToList();
            }
            return(wo.OrderByDescending(x => x.Count));
        }
Ejemplo n.º 3
0
        public IEnumerable <MetaTag> GetMetaTagList(bool EnableStopWord, string text)
        {
            List <MetaTag> MetaTagList = new List <MetaTag>();

            if (Ultilities.UrlCheck(text))
            {
                try
                {
                    WebClient webClient = new WebClient();
                    string    HtmlDoc   = webClient.DownloadString(text);

                    MatchCollection MatchedString = Regex.Matches(HtmlDoc, @"(<meta.*?>)", RegexOptions.Singleline);

                    foreach (Match MatchItem in MatchedString)
                    {
                        string Word = MatchItem.Groups[1].Value;
                        if (!EnableStopWord)
                        {
                            MetaTag MetaTagItem = new MetaTag();

                            Match MatchName = Regex.Match(Word, @"name=""(.*?)""", RegexOptions.Singleline);
                            if (MatchName.Success)
                            {
                                if (MatchName.Groups[1].Value != String.Empty)
                                {
                                    MetaTagItem.Name = MatchName.Groups[1].Value;
                                }
                                else
                                {
                                    break;
                                }
                            }

                            Match MatchContent = Regex.Match(Word, @"content=""(.*?)""", RegexOptions.Singleline);
                            if (MatchContent.Success)
                            {
                                MetaTagItem.Content = MatchContent.Groups[1].Value;
                            }

                            MetaTagList.Add(MetaTagItem);
                        }
                        else
                        {
                            if (!(StopWord.ListStopWord().Any(x => x.SWord.Equals(Word.ToLower()))))
                            {
                                MetaTag MetaTagItem = new MetaTag();
                                Match   MatchName   = Regex.Match(Word, @"name=""(.*?)""", RegexOptions.Singleline);
                                if (MatchName.Success)
                                {
                                    if (MatchName.Groups[1].Value != String.Empty)
                                    {
                                        MetaTagItem.Name = MatchName.Groups[1].Value;
                                    }
                                    else
                                    {
                                        break;
                                    }
                                }

                                Match MatchContent = Regex.Match(Word, @"content=""(.*?)""", RegexOptions.Singleline);
                                if (MatchContent.Success)
                                {
                                    MetaTagItem.Content = MatchContent.Groups[1].Value;
                                }

                                MetaTagList.Add(MetaTagItem);
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    Console.Write(ex.ToString());
                }
            }
            return(MetaTagList);
        }