Пример #1
0
        // GET: SayfaUrlSirala
        public ActionResult Index(string urls, string words)
        {
            SayfaUrlSiralaModel model = new SayfaUrlSiralaModel();

            model.Words = StringOperations.GetListBySplit(words, ',');
            model.Urls  = StringOperations.GetListBySplit(urls, ',');
            foreach (var url in model.Urls)
            {
                UrlDetail urldetail = new UrlDetail();
                urldetail.Url        = url;
                urldetail.SourceHtml = SiteSource.GetHtml(url).ToLower();
                urldetail.SourceHtml = HtmlPack.GetHtmlExludePopup(urldetail.SourceHtml);
                urldetail.CleanHtml  = SiteSource.GetCleanHtml(urldetail.SourceHtml).ToLower();
                foreach (var word in model.Words)
                {
                    Keyword keyword = new Keyword();
                    keyword.Url   = url;
                    keyword.Word  = word;
                    keyword.Count = 0;
                    List <string> languageCompatibles = new List <string>();
                    languageCompatibles = StringOperations.GetLanguageLowerCompatible(word);
                    languageCompatibles = StringOperations.GetDifferentWords(languageCompatibles);
                    foreach (var languageCompatible in languageCompatibles)
                    {
                        keyword.Count += StringOperations.GetCountWordInSentence(urldetail.CleanHtml, languageCompatible);
                    }
                    urldetail.Keywords.Add(keyword);
                }
                List <int> countList = new List <int>();
                foreach (var keyword in urldetail.Keywords)
                {
                    countList.Add(keyword.Count);
                }
                urldetail.PointByCount  = PageRank.GetPointByCount(countList);
                urldetail.PointByMeta   = PageRank.GetPointByMeta(urldetail.SourceHtml, model.Words);
                urldetail.PointByHeader = PageRank.GetPointByHead(urldetail.SourceHtml, model.Words);
                urldetail.PointByTitle  = PageRank.GetPointByTitle(urldetail.SourceHtml, model.Words);
                urldetail.PointSum     += urldetail.PointByCount + urldetail.PointByMeta + urldetail.PointByHeader + urldetail.PointByTitle;
                model.UrlDetails.Add(urldetail);
            }
            model.UrlDetailsAsc  = model.UrlDetails.OrderBy(u => u.PointSum).ToList();
            model.UrlDetailsDesc = model.UrlDetails.OrderByDescending(u => u.PointSum).ToList();
            return(View(model));
        }
Пример #2
0
        public static void SiteOrderByDepth(string url, int depth, int id, List <string> words)
        {
            if (depth == 3)
            {
                return;
            }
            string        html      = SiteSource.GetHtml(url).ToLower();
            string        cleanHtml = SiteSource.GetCleanHtml(html);
            List <string> htmlLinks = SiteSource.GetSubUrls(html);

            htmlLinks = SiteSource.GetUrlsWithoutExtenscions(htmlLinks, url);

            foreach (string htmlLink in htmlLinks)
            {
                if (!links.Contains(htmlLink))
                {
                    string htmlUrl      = "";
                    string cleanHtmlUrl = "";
                    if (htmlLink == html)
                    {
                        htmlUrl      = html;
                        htmlUrl      = HtmlPack.GetHtmlExludePopup(htmlUrl);
                        cleanHtmlUrl = cleanHtml;
                    }
                    else
                    {
                        htmlUrl      = SiteSource.GetHtml(htmlLink).ToLower();
                        htmlUrl      = HtmlPack.GetHtmlExludePopup(htmlUrl);
                        cleanHtmlUrl = SiteSource.GetCleanHtml(htmlUrl);
                    }
                    SubUrlDetail subUrlDetail = new SubUrlDetail();
                    subUrlDetail.Depth                = depth;
                    subUrlDetail.Id                   = id;
                    subUrlDetail.Url                  = htmlLink;
                    subUrlDetail.ParentId             = id - 1;
                    subUrlDetail.UrlDetail.Depth      = depth;
                    subUrlDetail.UrlDetail.SourceHtml = htmlUrl;
                    subUrlDetail.UrlDetail.CleanHtml  = cleanHtmlUrl;
                    subUrlDetail.UrlDetail.Url        = htmlLink;
                    foreach (var word in words)
                    {
                        Keyword keyword = new Keyword();
                        keyword.Url   = htmlLink;
                        keyword.Word  = word.ToString();
                        keyword.Count = 0;
                        List <string> languageCompatibles = new List <string>();
                        languageCompatibles = StringOperations.GetLanguageLowerCompatible(keyword.Word);
                        languageCompatibles = StringOperations.GetDifferentWords(languageCompatibles);
                        foreach (var languageCompatible in languageCompatibles)
                        {
                            keyword.Count += StringOperations.GetCountWordInSentence(cleanHtmlUrl, languageCompatible);
                        }
                        subUrlDetail.UrlDetail.Keywords.Add(keyword);
                    }
                    List <int> countList = new List <int>();
                    foreach (var keyword in subUrlDetail.UrlDetail.Keywords)
                    {
                        countList.Add(keyword.Count);
                    }
                    subUrlDetail.UrlDetail.PointByCount = PageRank.GetPointByCount(countList);
                    subUrlDetail.UrlDetail.PointSum    += subUrlDetail.UrlDetail.PointByCount;
                    subUrlDetails.Add(subUrlDetail);

                    links.Add(htmlLink);
                    SiteOrderByDepth(htmlLink, depth + 1, id + 1, words);
                }
            }
        }