public void TestPageRankWithSelfRing() { double[] state = new double[2] { 0.5, 0.5 }; double[,] transform = new double[2, 2] { { 0.5, 0.5 }, { 0, 1.0 } }; var algo = new PageRank(); algo.DampingFactor = 1; PageRankResult result = algo.Perform(state, transform); // Check element structure of result matrix. Assert.AreEqual(2, result.Matrix.Rank); Assert.AreEqual(2, result.Matrix.Length); Assert.AreEqual(1, result.Matrix.GetLength(0)); Assert.AreEqual(2, result.Matrix.GetLength(1)); // Check element data of result matrix. Assert.AreEqual(0, result.Matrix[0, 0], algo.IterationThreshold); Assert.AreEqual(1, result.Matrix[0, 1], algo.IterationThreshold); }
/// <summary> /// The create report. /// </summary> /// <param name="graph"> /// The graph. /// </param> public void CreateReport(Graph graph) { foreach (var node in graph.Neighborhood) { foreach (var subnode in node.Value.Neighbours) { // InDegree if (subnode.Status != NodeStatus.Invalid) { ++graph.Neighborhood[subnode.Uri].InDegree; } // OutDegree ++graph.Neighborhood[node.Key].OutDegree; } } var floyd = new FloydWarshall(); var sw = new Stopwatch(); sw.Start(); floyd.DoWork(graph); sw.Stop(); graph.FloydTime = sw.Elapsed; var PageRank = new PageRank(graph); graph.Iterations = PageRank.DoWork(); }
public void TestPageRankWithDamping() { double[] state = new double[4] { 0.25, 0.25, 0.25, 0.25 }; double[,] transform = new double[4, 4] { { 0, 1.0 / 3, 1.0 / 3, 1.0 / 3 }, { 0, 0, 0, 1 }, { 0.5, 0, 0, 0.5 }, { 0, 1, 0, 0 } }; var algo = new PageRank(); algo.IterationThreshold = 1e-8; algo.IterationLimit = 50; algo.DampingFactor = 0.8; PageRankResult result = algo.Perform(state, transform); // Check element structure of result matrix. Assert.AreEqual(2, result.Matrix.Rank); Assert.AreEqual(4, result.Matrix.Length); Assert.AreEqual(1, result.Matrix.GetLength(0)); Assert.AreEqual(4, result.Matrix.GetLength(1)); // Check element data of result matrix. Assert.AreEqual(0.078, result.Matrix[0, 0], 0.001); Assert.AreEqual(0.418, result.Matrix[0, 1], 0.001); Assert.AreEqual(0.071, result.Matrix[0, 2], 0.001); Assert.AreEqual(0.433, result.Matrix[0, 3], 0.001); }
private void CalculateAmeliPageRank(object sender, DoWorkEventArgs e) { ulong nbIterations; var pageRank = new PageRank(TransitionMatrix, PageRank.DefaultDampingFactor); e.Result = pageRank.GetAmelioratedPageRankVector(InitialPageRankVector, AprPrecision, out nbIterations); }
public string ExtractParagraphSummary(string paragraph, int wordLength = 100) { var taggedSentences = SentencePOSTagger.GetTaggedSentences(paragraph); var directedGraph = GraphUtil.GraphInstance.BuildPOSGraph <string>(taggedSentences); var rankedDictionary = new PageRank <string>().Rank(directedGraph); var rankedSentencesList = rankedDictionary?.ToList().OrderByDescending(p => p.Value).Select(x => x.Key).ToList(); var top_sentences = new List <string>(); if (rankedSentencesList != null) { int wordCount = 0; int index = 0; foreach (var sentences in rankedSentencesList) { wordCount = sentences.Split(null).Length + wordCount; if (wordCount >= wordLength) { break; } index++; } if (wordCount < wordLength) { index--; } top_sentences = rankedSentencesList.Take(index).ToList(); //if (top_sentences.Count == 0 ) // top_sentences = rankedSentencesList.Take(1).ToList(); } StringBuilder summry = new StringBuilder(""); foreach (var sent in taggedSentences) { foreach (var summary_sentence in top_sentences) { if (summary_sentence.Equals(sent)) { summry.Append(summary_sentence); //summry.Append("<br/><br/>"); } } } var orig_length = paragraph.Length; var summary = summry.ToString(); //string.Join(" ", summary_word); var summarized_length = summry.Length; return(summary); }
public void RankProfilesTest() { PageRank.RankProfiles(10); /* * double OneNorm = v.OneNorm(); * Assert.IsTrue(Math.Abs(OneNorm - 1) < 0.00000001); * * int i; * for (i = 0; i < v.Rows; i++) * { * Assert.IsTrue(v.GetValue(i) >= 0); * Assert.IsTrue(v.GetValue(i) < 1); * } */ }
public List <string> GetKeyWordsList(string sentence, string lang) { var taggedList = WordPOSTagger.GetPosTaggedTokens(sentence, lang); var directedGraph = GraphUtil.GraphInstance.BuildPOSGraph <string>(taggedList); var rank = new PageRank <string>(); var rankedDictionary = rank.Rank(directedGraph); var word_list = ExtractUtil.instance.GetNormalizedUniqueWordList(taggedList); var keywords = rankedDictionary?.OrderByDescending(p => p.Value).Take(rankedDictionary.Count / 3).Select(p => p.Key).ToList(); IList <string> joinedKeywords = null; if (keywords != null) { joinedKeywords = ExtractUtil.instance.JoinAdjacentWords(word_list, keywords); } return(joinedKeywords as List <string>); }
public override void onStartIteration() { var matrix = wRecord.context.targets.GetLinkMatrixRotated(); if (matrix != null) { pageRank = new PageRank(matrix, alpha, convergence, checkSteps); double[] dbl = pageRank.ComputePageRank(); List <int> pri = new List <int>(); foreach (double db in dbl) { pri.Add(Convert.ToInt32(db * scoreUnit)); } ranks = wRecord.context.targets.linkMatrix.MapToX(pri); } }
// GET: SayfaUrlSirala public ActionResult Index(string urls, string words) { SayfaUrlSiralaModel model = new SayfaUrlSiralaModel(); model.Words = StringOperations.GetListBySplit(words, ','); model.Urls = StringOperations.GetListBySplit(urls, ','); foreach (var url in model.Urls) { UrlDetail urldetail = new UrlDetail(); urldetail.Url = url; urldetail.SourceHtml = SiteSource.GetHtml(url).ToLower(); urldetail.SourceHtml = HtmlPack.GetHtmlExludePopup(urldetail.SourceHtml); urldetail.CleanHtml = SiteSource.GetCleanHtml(urldetail.SourceHtml).ToLower(); foreach (var word in model.Words) { Keyword keyword = new Keyword(); keyword.Url = url; keyword.Word = word; keyword.Count = 0; List <string> languageCompatibles = new List <string>(); languageCompatibles = StringOperations.GetLanguageLowerCompatible(word); languageCompatibles = StringOperations.GetDifferentWords(languageCompatibles); foreach (var languageCompatible in languageCompatibles) { keyword.Count += StringOperations.GetCountWordInSentence(urldetail.CleanHtml, languageCompatible); } urldetail.Keywords.Add(keyword); } List <int> countList = new List <int>(); foreach (var keyword in urldetail.Keywords) { countList.Add(keyword.Count); } urldetail.PointByCount = PageRank.GetPointByCount(countList); urldetail.PointByMeta = PageRank.GetPointByMeta(urldetail.SourceHtml, model.Words); urldetail.PointByHeader = PageRank.GetPointByHead(urldetail.SourceHtml, model.Words); urldetail.PointByTitle = PageRank.GetPointByTitle(urldetail.SourceHtml, model.Words); urldetail.PointSum += urldetail.PointByCount + urldetail.PointByMeta + urldetail.PointByHeader + urldetail.PointByTitle; model.UrlDetails.Add(urldetail); } model.UrlDetailsAsc = model.UrlDetails.OrderBy(u => u.PointSum).ToList(); model.UrlDetailsDesc = model.UrlDetails.OrderByDescending(u => u.PointSum).ToList(); return(View(model)); }
/// <summary> /// Prepares the specified context. /// </summary> /// <param name="context">The context.</param> /// <param name="log">The log.</param> public override void Prepare(DocumentSelectResult context, ILogBuilder log) { var byDomain = context.GetByDomain(log); foreach (var pair in byDomain) { WebSiteGraph webSiteGraph = context.domainNameToGraph[pair.Key]; var matrix = webSiteGraph.GetIDMatrix(scoreUnit); p_matrix.Add(pair.Key, matrix); switch (algorithm) { case GraphFactorAlgorithm.HITS: HITSRank hits = new HITSRank(); hits.recalculate(matrix, convergence, steps); p_hits.Add(pair.Key, hits); break; case GraphFactorAlgorithm.PageRank: var pageRank = new PageRank(matrix.GetMatrix(), alpha, convergence, steps); double[] dbl = pageRank.ComputePageRank(); List <Int32> pri = new List <Int32>(); foreach (Double db in dbl) { pri.Add(Convert.ToInt32(db * scoreUnit)); } var ranks = new Dictionary <String, Int32>(); ranks = matrix.MapToX(pri); p_rank.Add(pair.Key, ranks); break; } } }
public static void PageRankTopNTest() { // PageRank algirthm could be converged in top-N recommendation. List <Link> baseLinks = Tools.GetLinks(BaseRatingFile); List <Rating> baseRatings = Tools.GetRatings(BaseRatingFile); List <Rating> testRatings = Tools.GetRatings(TestRatingFile); Tools.UpdateIndexesToZeroBased(baseLinks); Tools.UpdateIndexesToZeroBased(baseRatings); Tools.UpdateIndexesToZeroBased(testRatings); var maxIds = Tools.GetMaxNodeId(baseLinks); int nodes = Tools.TransformLinkedToId(baseLinks, maxIds.Item1, maxIds.Item2); PageRank pr = new PageRank(nodes); double[] steps = { 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.8, 0.9 }; foreach (double step in steps) { pr.TryTopN(baseLinks, baseRatings, testRatings, maxIds.Item1, 50, step, 1e-6); Console.WriteLine(); } pr.TryTopN(baseLinks, baseRatings, testRatings, maxIds.Item1, 50, 0.9, 1e-6); }
static void Main(string[] args) { DirectedGraph <string> s = new DirectedGraph <string>(); var a = new PageRank <string>(); }
public void ThreadProc() { try { bool flags = false; string tempstr = ""; string myurl = txtMyURL.Text.Trim(); string mydomain; if (myurl.Substring(myurl.Length - 1) == "/") { myurl = myurl.Substring(0, myurl.Length - 1); } mydomain = myurl; mydomain = Regex.Match(mydomain, "http://[^/]+").Value; //友情链接检测工具http://regexlib.com/RETester.aspx //<a[^<>]+href\s*=\s*["']*([^"' ]*)["']*[^<>]*?>([\s\S]+?)</a> // <a[\s\S]*?href=("(?<href>[^"]*)"|'(?<href>[^']*)'|(?<href>[^>\s]*))[^>]*?>(?<title>[\s\S]*?)</a> // @"<a[\s\S]*?href=(""(?<href>[^""]*)""|'(?<href>[^']*)'|(?<href>[^>\s]*))[^>]*?>(?<title>[\s\S]*?)</a>",RegexOptions.IgnoreCase | RegexOptions.Compiled); //string pattion = "<a[^<>]+href\\s*=\\s*[\"']*(" + mydomain + "/*)[\"']*[^<>]*?>([\\s\\S]+?)</a>"; //string pattion = @"<a[\s\S]+?href=(""[^""]*"")|('[^']*')|([^>\s]*)[^>]*?>[\s\S]*?</a>"; string pattion = "<a[^<>]+href\\s*=\\s*[\"']*(\"" + mydomain + "/*\")|('" + mydomain + "/*')|(" + mydomain + "/*)[^<>]*?>([\\s\\S]+?)</a>"; for (int i = 0; i < this.dataGridView1.RowCount; i++) { if (Convert.ToBoolean(this.dataGridView1.Rows[i].Cells["blFlag"].Value) != true) { flags = true; string nowurl; this.dataGridView1.Rows[i].Cells["blFlag"].Value = !Convert.ToBoolean(this.dataGridView1.Rows[i].Cells["blFlag"].Value); nowurl = Convert.ToString(this.dataGridView1.Rows[i].Cells["txtWebURL"].Value).ToLower(); tempstr = CaiJi.GetHtmlSource(nowurl); if (Regex.IsMatch(tempstr, pattion, RegexOptions.IgnoreCase)) { this.dataGridView1.Rows[i].Cells["txtStatus"].Value = "正常"; } else { this.dataGridView1.Rows[i].Cells["txtStatus"].Value = "不正常"; } if (chkPR.Checked) { this.dataGridView1.Rows[i].Cells["txtPR"].Value = PageRank.GetGooglePR(nowurl); } } } if (flags) { ThreadProc(); } } catch (Exception ex) { MessageBox.Show(ex.Message); } }
public static void SiteOrderByDepth(string url, int depth, int id, List <string> words) { if (depth == 3) { return; } string html = SiteSource.GetHtml(url).ToLower(); string cleanHtml = SiteSource.GetCleanHtml(html); List <string> htmlLinks = SiteSource.GetSubUrls(html); htmlLinks = SiteSource.GetUrlsWithoutExtenscions(htmlLinks, url); foreach (string htmlLink in htmlLinks) { if (!links.Contains(htmlLink)) { string htmlUrl = ""; string cleanHtmlUrl = ""; if (htmlLink == html) { htmlUrl = html; htmlUrl = HtmlPack.GetHtmlExludePopup(htmlUrl); cleanHtmlUrl = cleanHtml; } else { htmlUrl = SiteSource.GetHtml(htmlLink).ToLower(); htmlUrl = HtmlPack.GetHtmlExludePopup(htmlUrl); cleanHtmlUrl = SiteSource.GetCleanHtml(htmlUrl); } SubUrlDetail subUrlDetail = new SubUrlDetail(); subUrlDetail.Depth = depth; subUrlDetail.Id = id; subUrlDetail.Url = htmlLink; subUrlDetail.ParentId = id - 1; subUrlDetail.UrlDetail.Depth = depth; subUrlDetail.UrlDetail.SourceHtml = htmlUrl; subUrlDetail.UrlDetail.CleanHtml = cleanHtmlUrl; subUrlDetail.UrlDetail.Url = htmlLink; foreach (var word in words) { Keyword keyword = new Keyword(); keyword.Url = htmlLink; keyword.Word = word.ToString(); keyword.Count = 0; List <string> languageCompatibles = new List <string>(); languageCompatibles = StringOperations.GetLanguageLowerCompatible(keyword.Word); languageCompatibles = StringOperations.GetDifferentWords(languageCompatibles); foreach (var languageCompatible in languageCompatibles) { keyword.Count += StringOperations.GetCountWordInSentence(cleanHtmlUrl, languageCompatible); } subUrlDetail.UrlDetail.Keywords.Add(keyword); } List <int> countList = new List <int>(); foreach (var keyword in subUrlDetail.UrlDetail.Keywords) { countList.Add(keyword.Count); } subUrlDetail.UrlDetail.PointByCount = PageRank.GetPointByCount(countList); subUrlDetail.UrlDetail.PointSum += subUrlDetail.UrlDetail.PointByCount; subUrlDetails.Add(subUrlDetail); links.Add(htmlLink); SiteOrderByDepth(htmlLink, depth + 1, id + 1, words); } } }