// GET: SayfaUrlSirala public ActionResult Index(string urls, string words) { SayfaUrlSiralaModel model = new SayfaUrlSiralaModel(); model.Words = StringOperations.GetListBySplit(words, ','); model.Urls = StringOperations.GetListBySplit(urls, ','); foreach (var url in model.Urls) { UrlDetail urldetail = new UrlDetail(); urldetail.Url = url; urldetail.SourceHtml = SiteSource.GetHtml(url).ToLower(); urldetail.SourceHtml = HtmlPack.GetHtmlExludePopup(urldetail.SourceHtml); urldetail.CleanHtml = SiteSource.GetCleanHtml(urldetail.SourceHtml).ToLower(); foreach (var word in model.Words) { Keyword keyword = new Keyword(); keyword.Url = url; keyword.Word = word; keyword.Count = 0; List <string> languageCompatibles = new List <string>(); languageCompatibles = StringOperations.GetLanguageLowerCompatible(word); languageCompatibles = StringOperations.GetDifferentWords(languageCompatibles); foreach (var languageCompatible in languageCompatibles) { keyword.Count += StringOperations.GetCountWordInSentence(urldetail.CleanHtml, languageCompatible); } urldetail.Keywords.Add(keyword); } List <int> countList = new List <int>(); foreach (var keyword in urldetail.Keywords) { countList.Add(keyword.Count); } urldetail.PointByCount = PageRank.GetPointByCount(countList); urldetail.PointByMeta = PageRank.GetPointByMeta(urldetail.SourceHtml, model.Words); urldetail.PointByHeader = PageRank.GetPointByHead(urldetail.SourceHtml, model.Words); urldetail.PointByTitle = PageRank.GetPointByTitle(urldetail.SourceHtml, model.Words); urldetail.PointSum += urldetail.PointByCount + urldetail.PointByMeta + urldetail.PointByHeader + urldetail.PointByTitle; model.UrlDetails.Add(urldetail); } model.UrlDetailsAsc = model.UrlDetails.OrderBy(u => u.PointSum).ToList(); model.UrlDetailsDesc = model.UrlDetails.OrderByDescending(u => u.PointSum).ToList(); return(View(model)); }
public static void SiteOrderByDepth(string url, int depth, int id, List <string> words) { if (depth == 3) { return; } string html = SiteSource.GetHtml(url).ToLower(); string cleanHtml = SiteSource.GetCleanHtml(html); List <string> htmlLinks = SiteSource.GetSubUrls(html); htmlLinks = SiteSource.GetUrlsWithoutExtenscions(htmlLinks, url); foreach (string htmlLink in htmlLinks) { if (!links.Contains(htmlLink)) { string htmlUrl = ""; string cleanHtmlUrl = ""; if (htmlLink == html) { htmlUrl = html; htmlUrl = HtmlPack.GetHtmlExludePopup(htmlUrl); cleanHtmlUrl = cleanHtml; } else { htmlUrl = SiteSource.GetHtml(htmlLink).ToLower(); htmlUrl = HtmlPack.GetHtmlExludePopup(htmlUrl); cleanHtmlUrl = SiteSource.GetCleanHtml(htmlUrl); } SubUrlDetail subUrlDetail = new SubUrlDetail(); subUrlDetail.Depth = depth; subUrlDetail.Id = id; subUrlDetail.Url = htmlLink; subUrlDetail.ParentId = id - 1; subUrlDetail.UrlDetail.Depth = depth; subUrlDetail.UrlDetail.SourceHtml = htmlUrl; subUrlDetail.UrlDetail.CleanHtml = cleanHtmlUrl; subUrlDetail.UrlDetail.Url = htmlLink; foreach (var word in words) { Keyword keyword = new Keyword(); keyword.Url = htmlLink; keyword.Word = word.ToString(); keyword.Count = 0; List <string> languageCompatibles = new List <string>(); languageCompatibles = StringOperations.GetLanguageLowerCompatible(keyword.Word); languageCompatibles = StringOperations.GetDifferentWords(languageCompatibles); foreach (var languageCompatible in languageCompatibles) { keyword.Count += StringOperations.GetCountWordInSentence(cleanHtmlUrl, languageCompatible); } subUrlDetail.UrlDetail.Keywords.Add(keyword); } List <int> countList = new List <int>(); foreach (var keyword in subUrlDetail.UrlDetail.Keywords) { countList.Add(keyword.Count); } subUrlDetail.UrlDetail.PointByCount = PageRank.GetPointByCount(countList); subUrlDetail.UrlDetail.PointSum += subUrlDetail.UrlDetail.PointByCount; subUrlDetails.Add(subUrlDetail); links.Add(htmlLink); SiteOrderByDepth(htmlLink, depth + 1, id + 1, words); } } }