public void TestPageRankWithSelfRing()
        {
            double[] state = new double[2] {
                0.5, 0.5
            };

            double[,] transform = new double[2, 2]
            {
                { 0.5, 0.5 },
                { 0, 1.0 }
            };

            var algo = new PageRank();

            algo.DampingFactor = 1;
            PageRankResult result = algo.Perform(state, transform);

            // Check element structure of result matrix.
            Assert.AreEqual(2, result.Matrix.Rank);
            Assert.AreEqual(2, result.Matrix.Length);
            Assert.AreEqual(1, result.Matrix.GetLength(0));
            Assert.AreEqual(2, result.Matrix.GetLength(1));

            // Check element data of result matrix.
            Assert.AreEqual(0, result.Matrix[0, 0], algo.IterationThreshold);
            Assert.AreEqual(1, result.Matrix[0, 1], algo.IterationThreshold);
        }
        /// <summary>
        /// The create report.
        /// </summary>
        /// <param name="graph">
        /// The graph.
        /// </param>
        public void CreateReport(Graph graph)
        {
            foreach (var node in graph.Neighborhood)
            {
                foreach (var subnode in node.Value.Neighbours)
                {
                    // InDegree
                    if (subnode.Status != NodeStatus.Invalid)
                    {
                        ++graph.Neighborhood[subnode.Uri].InDegree;
                    }

                    // OutDegree
                    ++graph.Neighborhood[node.Key].OutDegree;
                }
            }

            var floyd = new FloydWarshall();
            var sw    = new Stopwatch();

            sw.Start();
            floyd.DoWork(graph);
            sw.Stop();
            graph.FloydTime = sw.Elapsed;
            var PageRank = new PageRank(graph);

            graph.Iterations = PageRank.DoWork();
        }
        public void TestPageRankWithDamping()
        {
            double[] state = new double[4] {
                0.25, 0.25, 0.25, 0.25
            };

            double[,] transform = new double[4, 4]
            {
                { 0, 1.0 / 3, 1.0 / 3, 1.0 / 3 },
                { 0, 0, 0, 1 },
                { 0.5, 0, 0, 0.5 },
                { 0, 1, 0, 0 }
            };

            var algo = new PageRank();

            algo.IterationThreshold = 1e-8;
            algo.IterationLimit     = 50;
            algo.DampingFactor      = 0.8;
            PageRankResult result = algo.Perform(state, transform);

            // Check element structure of result matrix.
            Assert.AreEqual(2, result.Matrix.Rank);
            Assert.AreEqual(4, result.Matrix.Length);
            Assert.AreEqual(1, result.Matrix.GetLength(0));
            Assert.AreEqual(4, result.Matrix.GetLength(1));

            // Check element data of result matrix.
            Assert.AreEqual(0.078, result.Matrix[0, 0], 0.001);
            Assert.AreEqual(0.418, result.Matrix[0, 1], 0.001);
            Assert.AreEqual(0.071, result.Matrix[0, 2], 0.001);
            Assert.AreEqual(0.433, result.Matrix[0, 3], 0.001);
        }
Beispiel #4
0
        private void CalculateAmeliPageRank(object sender, DoWorkEventArgs e)
        {
            ulong nbIterations;
            var   pageRank = new PageRank(TransitionMatrix, PageRank.DefaultDampingFactor);

            e.Result = pageRank.GetAmelioratedPageRankVector(InitialPageRankVector,
                                                             AprPrecision, out nbIterations);
        }
        public string ExtractParagraphSummary(string paragraph, int wordLength = 100)
        {
            var taggedSentences = SentencePOSTagger.GetTaggedSentences(paragraph);

            var directedGraph = GraphUtil.GraphInstance.BuildPOSGraph <string>(taggedSentences);


            var rankedDictionary = new PageRank <string>().Rank(directedGraph);

            var rankedSentencesList = rankedDictionary?.ToList().OrderByDescending(p => p.Value).Select(x => x.Key).ToList();
            var top_sentences       = new List <string>();

            if (rankedSentencesList != null)
            {
                int wordCount = 0;
                int index     = 0;
                foreach (var sentences in rankedSentencesList)
                {
                    wordCount = sentences.Split(null).Length + wordCount;
                    if (wordCount >= wordLength)
                    {
                        break;
                    }
                    index++;
                }

                if (wordCount < wordLength)
                {
                    index--;
                }


                top_sentences = rankedSentencesList.Take(index).ToList();

                //if (top_sentences.Count == 0 )
                //    top_sentences = rankedSentencesList.Take(1).ToList();
            }

            StringBuilder summry = new StringBuilder("");

            foreach (var sent in taggedSentences)
            {
                foreach (var summary_sentence in top_sentences)
                {
                    if (summary_sentence.Equals(sent))
                    {
                        summry.Append(summary_sentence);
                        //summry.Append("<br/><br/>");
                    }
                }
            }

            var orig_length       = paragraph.Length;
            var summary           = summry.ToString(); //string.Join(" ", summary_word);
            var summarized_length = summry.Length;

            return(summary);
        }
        public void RankProfilesTest()
        {
            PageRank.RankProfiles(10);

            /*
             * double OneNorm = v.OneNorm();
             * Assert.IsTrue(Math.Abs(OneNorm - 1) < 0.00000001);
             *
             * int i;
             * for (i = 0; i < v.Rows; i++)
             * {
             *  Assert.IsTrue(v.GetValue(i) >= 0);
             *  Assert.IsTrue(v.GetValue(i) < 1);
             * }
             */
        }
Beispiel #7
0
        public List <string> GetKeyWordsList(string sentence, string lang)
        {
            var taggedList       = WordPOSTagger.GetPosTaggedTokens(sentence, lang);
            var directedGraph    = GraphUtil.GraphInstance.BuildPOSGraph <string>(taggedList);
            var rank             = new PageRank <string>();
            var rankedDictionary = rank.Rank(directedGraph);

            var word_list = ExtractUtil.instance.GetNormalizedUniqueWordList(taggedList);

            var            keywords       = rankedDictionary?.OrderByDescending(p => p.Value).Take(rankedDictionary.Count / 3).Select(p => p.Key).ToList();
            IList <string> joinedKeywords = null;

            if (keywords != null)
            {
                joinedKeywords = ExtractUtil.instance.JoinAdjacentWords(word_list, keywords);
            }
            return(joinedKeywords as List <string>);
        }
Beispiel #8
0
        public override void onStartIteration()
        {
            var matrix = wRecord.context.targets.GetLinkMatrixRotated();

            if (matrix != null)
            {
                pageRank = new PageRank(matrix, alpha, convergence, checkSteps);

                double[]   dbl = pageRank.ComputePageRank();
                List <int> pri = new List <int>();
                foreach (double db in dbl)
                {
                    pri.Add(Convert.ToInt32(db * scoreUnit));
                }

                ranks = wRecord.context.targets.linkMatrix.MapToX(pri);
            }
        }
Beispiel #9
0
        // GET: SayfaUrlSirala
        public ActionResult Index(string urls, string words)
        {
            SayfaUrlSiralaModel model = new SayfaUrlSiralaModel();

            model.Words = StringOperations.GetListBySplit(words, ',');
            model.Urls  = StringOperations.GetListBySplit(urls, ',');
            foreach (var url in model.Urls)
            {
                UrlDetail urldetail = new UrlDetail();
                urldetail.Url        = url;
                urldetail.SourceHtml = SiteSource.GetHtml(url).ToLower();
                urldetail.SourceHtml = HtmlPack.GetHtmlExludePopup(urldetail.SourceHtml);
                urldetail.CleanHtml  = SiteSource.GetCleanHtml(urldetail.SourceHtml).ToLower();
                foreach (var word in model.Words)
                {
                    Keyword keyword = new Keyword();
                    keyword.Url   = url;
                    keyword.Word  = word;
                    keyword.Count = 0;
                    List <string> languageCompatibles = new List <string>();
                    languageCompatibles = StringOperations.GetLanguageLowerCompatible(word);
                    languageCompatibles = StringOperations.GetDifferentWords(languageCompatibles);
                    foreach (var languageCompatible in languageCompatibles)
                    {
                        keyword.Count += StringOperations.GetCountWordInSentence(urldetail.CleanHtml, languageCompatible);
                    }
                    urldetail.Keywords.Add(keyword);
                }
                List <int> countList = new List <int>();
                foreach (var keyword in urldetail.Keywords)
                {
                    countList.Add(keyword.Count);
                }
                urldetail.PointByCount  = PageRank.GetPointByCount(countList);
                urldetail.PointByMeta   = PageRank.GetPointByMeta(urldetail.SourceHtml, model.Words);
                urldetail.PointByHeader = PageRank.GetPointByHead(urldetail.SourceHtml, model.Words);
                urldetail.PointByTitle  = PageRank.GetPointByTitle(urldetail.SourceHtml, model.Words);
                urldetail.PointSum     += urldetail.PointByCount + urldetail.PointByMeta + urldetail.PointByHeader + urldetail.PointByTitle;
                model.UrlDetails.Add(urldetail);
            }
            model.UrlDetailsAsc  = model.UrlDetails.OrderBy(u => u.PointSum).ToList();
            model.UrlDetailsDesc = model.UrlDetails.OrderByDescending(u => u.PointSum).ToList();
            return(View(model));
        }
Beispiel #10
0
        /// <summary>
        /// Prepares the specified context.
        /// </summary>
        /// <param name="context">The context.</param>
        /// <param name="log">The log.</param>
        public override void Prepare(DocumentSelectResult context, ILogBuilder log)
        {
            var byDomain = context.GetByDomain(log);

            foreach (var pair in byDomain)
            {
                WebSiteGraph webSiteGraph = context.domainNameToGraph[pair.Key];

                var matrix = webSiteGraph.GetIDMatrix(scoreUnit);
                p_matrix.Add(pair.Key, matrix);

                switch (algorithm)
                {
                case GraphFactorAlgorithm.HITS:
                    HITSRank hits = new HITSRank();
                    hits.recalculate(matrix, convergence, steps);
                    p_hits.Add(pair.Key, hits);
                    break;

                case GraphFactorAlgorithm.PageRank:



                    var pageRank = new PageRank(matrix.GetMatrix(), alpha, convergence, steps);

                    double[]     dbl = pageRank.ComputePageRank();
                    List <Int32> pri = new List <Int32>();
                    foreach (Double db in dbl)
                    {
                        pri.Add(Convert.ToInt32(db * scoreUnit));
                    }
                    var ranks = new Dictionary <String, Int32>();
                    ranks = matrix.MapToX(pri);

                    p_rank.Add(pair.Key, ranks);


                    break;
                }
            }
        }
Beispiel #11
0
        public static void PageRankTopNTest()
        {
            // PageRank algirthm could be converged in top-N recommendation.
            List <Link>   baseLinks   = Tools.GetLinks(BaseRatingFile);
            List <Rating> baseRatings = Tools.GetRatings(BaseRatingFile);
            List <Rating> testRatings = Tools.GetRatings(TestRatingFile);

            Tools.UpdateIndexesToZeroBased(baseLinks);
            Tools.UpdateIndexesToZeroBased(baseRatings);
            Tools.UpdateIndexesToZeroBased(testRatings);

            var maxIds = Tools.GetMaxNodeId(baseLinks);
            int nodes  = Tools.TransformLinkedToId(baseLinks, maxIds.Item1, maxIds.Item2);

            PageRank pr = new PageRank(nodes);

            double[] steps = { 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.8, 0.9 };
            foreach (double step in steps)
            {
                pr.TryTopN(baseLinks, baseRatings, testRatings, maxIds.Item1, 50, step, 1e-6);
                Console.WriteLine();
            }
            pr.TryTopN(baseLinks, baseRatings, testRatings, maxIds.Item1, 50, 0.9, 1e-6);
        }
Beispiel #12
0
 static void Main(string[] args)
 {
     DirectedGraph <string> s = new DirectedGraph <string>();
     var a = new PageRank <string>();
 }
Beispiel #13
0
        public void ThreadProc()
        {
            try
            {
                bool   flags   = false;
                string tempstr = "";
                string myurl   = txtMyURL.Text.Trim();
                string mydomain;
                if (myurl.Substring(myurl.Length - 1) == "/")
                {
                    myurl = myurl.Substring(0, myurl.Length - 1);
                }
                mydomain = myurl;
                mydomain = Regex.Match(mydomain, "http://[^/]+").Value;

                //友情链接检测工具http://regexlib.com/RETester.aspx
                //<a[^<>]+href\s*=\s*["']*([^"' ]*)["']*[^<>]*?>([\s\S]+?)</a>
                // <a[\s\S]*?href=("(?<href>[^"]*)"|'(?<href>[^']*)'|(?<href>[^>\s]*))[^>]*?>(?<title>[\s\S]*?)</a>
                // @"<a[\s\S]*?href=(""(?<href>[^""]*)""|'(?<href>[^']*)'|(?<href>[^>\s]*))[^>]*?>(?<title>[\s\S]*?)</a>",RegexOptions.IgnoreCase   |   RegexOptions.Compiled);
                //string pattion = "<a[^<>]+href\\s*=\\s*[\"']*(" + mydomain + "/*)[\"']*[^<>]*?>([\\s\\S]+?)</a>";
                //string pattion = @"<a[\s\S]+?href=(""[^""]*"")|('[^']*')|([^>\s]*)[^>]*?>[\s\S]*?</a>";

                string pattion = "<a[^<>]+href\\s*=\\s*[\"']*(\"" + mydomain + "/*\")|('" + mydomain + "/*')|(" + mydomain + "/*)[^<>]*?>([\\s\\S]+?)</a>";

                for (int i = 0; i < this.dataGridView1.RowCount; i++)
                {
                    if (Convert.ToBoolean(this.dataGridView1.Rows[i].Cells["blFlag"].Value) != true)
                    {
                        flags = true;
                        string nowurl;

                        this.dataGridView1.Rows[i].Cells["blFlag"].Value = !Convert.ToBoolean(this.dataGridView1.Rows[i].Cells["blFlag"].Value);

                        nowurl  = Convert.ToString(this.dataGridView1.Rows[i].Cells["txtWebURL"].Value).ToLower();
                        tempstr = CaiJi.GetHtmlSource(nowurl);



                        if (Regex.IsMatch(tempstr, pattion, RegexOptions.IgnoreCase))
                        {
                            this.dataGridView1.Rows[i].Cells["txtStatus"].Value = "正常";
                        }
                        else
                        {
                            this.dataGridView1.Rows[i].Cells["txtStatus"].Value = "不正常";
                        }
                        if (chkPR.Checked)
                        {
                            this.dataGridView1.Rows[i].Cells["txtPR"].Value = PageRank.GetGooglePR(nowurl);
                        }
                    }
                }

                if (flags)
                {
                    ThreadProc();
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message);
            }
        }
Beispiel #14
0
        public static void SiteOrderByDepth(string url, int depth, int id, List <string> words)
        {
            if (depth == 3)
            {
                return;
            }
            string        html      = SiteSource.GetHtml(url).ToLower();
            string        cleanHtml = SiteSource.GetCleanHtml(html);
            List <string> htmlLinks = SiteSource.GetSubUrls(html);

            htmlLinks = SiteSource.GetUrlsWithoutExtenscions(htmlLinks, url);

            foreach (string htmlLink in htmlLinks)
            {
                if (!links.Contains(htmlLink))
                {
                    string htmlUrl      = "";
                    string cleanHtmlUrl = "";
                    if (htmlLink == html)
                    {
                        htmlUrl      = html;
                        htmlUrl      = HtmlPack.GetHtmlExludePopup(htmlUrl);
                        cleanHtmlUrl = cleanHtml;
                    }
                    else
                    {
                        htmlUrl      = SiteSource.GetHtml(htmlLink).ToLower();
                        htmlUrl      = HtmlPack.GetHtmlExludePopup(htmlUrl);
                        cleanHtmlUrl = SiteSource.GetCleanHtml(htmlUrl);
                    }
                    SubUrlDetail subUrlDetail = new SubUrlDetail();
                    subUrlDetail.Depth                = depth;
                    subUrlDetail.Id                   = id;
                    subUrlDetail.Url                  = htmlLink;
                    subUrlDetail.ParentId             = id - 1;
                    subUrlDetail.UrlDetail.Depth      = depth;
                    subUrlDetail.UrlDetail.SourceHtml = htmlUrl;
                    subUrlDetail.UrlDetail.CleanHtml  = cleanHtmlUrl;
                    subUrlDetail.UrlDetail.Url        = htmlLink;
                    foreach (var word in words)
                    {
                        Keyword keyword = new Keyword();
                        keyword.Url   = htmlLink;
                        keyword.Word  = word.ToString();
                        keyword.Count = 0;
                        List <string> languageCompatibles = new List <string>();
                        languageCompatibles = StringOperations.GetLanguageLowerCompatible(keyword.Word);
                        languageCompatibles = StringOperations.GetDifferentWords(languageCompatibles);
                        foreach (var languageCompatible in languageCompatibles)
                        {
                            keyword.Count += StringOperations.GetCountWordInSentence(cleanHtmlUrl, languageCompatible);
                        }
                        subUrlDetail.UrlDetail.Keywords.Add(keyword);
                    }
                    List <int> countList = new List <int>();
                    foreach (var keyword in subUrlDetail.UrlDetail.Keywords)
                    {
                        countList.Add(keyword.Count);
                    }
                    subUrlDetail.UrlDetail.PointByCount = PageRank.GetPointByCount(countList);
                    subUrlDetail.UrlDetail.PointSum    += subUrlDetail.UrlDetail.PointByCount;
                    subUrlDetails.Add(subUrlDetail);

                    links.Add(htmlLink);
                    SiteOrderByDepth(htmlLink, depth + 1, id + 1, words);
                }
            }
        }