예제 #1
0
        /**
         * This method returns a rank for the anchor url
         */
        private int getRankOfAnchor(LinkItem item)
        {
            //These variables will contain the max and avg of the match levels of the Anchor Url
            int maxMatchLevelForAnchor = 0;
            int avgMatchLevelForAnchor = 0;

            if (item.getAnchor() == null)
            {
                return(0);
            }

            StreamWriter sw = null;

            if (LogDebuggerControl.getInstance().debugRanker)
            {
                sw = new
                     StreamWriter("DataForRank" + System.Threading.Thread.CurrentThread.ManagedThreadId + ".txt", true);
                sw.WriteLine(" ***** REQUEST FOR ANCHOR URL RANK************************************ ");
                sw.WriteLine(" URL : " + item.getLink());
                sw.WriteLine(" CONTENT OF ANCHOR:");
                sw.WriteLine(item.getAnchor());
                sw.Close();
            }

            //calculate the min and max of the match levels of the anchor url to the categories.
            if (anchorOptions == null)
            {
                anchorOptions = getOptions("anchor");
            }

            List <int> matchLevelsForAnchor = categorizer.classifyContentToAllCategories(item.getAnchor(), anchorOptions);

            maxMatchLevelForAnchor = calculateMax(matchLevelsForAnchor);
            avgMatchLevelForAnchor = calculateAvg(matchLevelsForAnchor);

            if (LogDebuggerControl.getInstance().debugRanker)
            {
                sw = new StreamWriter("DataForRank" + System.Threading.Thread.CurrentThread.ManagedThreadId + ".txt", true);
                sw.WriteLine(" .MAX MATCH LEVEL OF ANCHOR: ");
                sw.WriteLine(maxMatchLevelForAnchor);
                sw.WriteLine(" .AVG MATCH LEVEL OF ANCHOR: ");
                sw.WriteLine(avgMatchLevelForAnchor);
                //sw.WriteLine(" .RANK OF ANCHOR: ");
                //sw.WriteLine((int)(RankParams.MinAndMaxRATIO * maxMatchLevelForAnchor + (1 - RankParams.MinAndMaxRATIO) * avgMatchLevelForAnchor));
                //sw.WriteLine(" * END ****************************************************************** ");
                sw.Close();
            }

            return((int)(RankerOptions.MinAndMaxRATIO * maxMatchLevelForAnchor + (1 - RankerOptions.MinAndMaxRATIO) * avgMatchLevelForAnchor));
        }
예제 #2
0
        /**
         * This method calculates the rank of a given url and returns it.
         */
        public int rankUrl(ResourceContent parentResource, LinkItem item)
        {
            //These variables will contain the ranks for the whole content match and nearby text match and
            //anchor match and the parentrank.
            int rankParentUrl = parentResource.getRankOfUrl();
            int anchorRank    = 0;
            //int wholePageRank = 0;
            int nearbyTextRank = 0;

            int neighborhood = 0;
            int context      = 0;
            int inherited    = 0;

            char[] separators = { ' ', '\t', '\n' };
            NumOfLinks++;
            sumOfTotalNearbyWords += item.getText().Split(separators).Length;
            sumOfTotalAnchorWords += item.getAnchor().Split(separators, StringSplitOptions.RemoveEmptyEntries).Length;

            StreamWriter sw = null;

            if (LogDebuggerControl.getInstance().debugRanker)
            {
                sw = new
                     StreamWriter("DataForRank" + System.Threading.Thread.CurrentThread.ManagedThreadId + ".txt", true);
                sw.WriteLine(" *********HEAD REQUEST *********************************************");
                sw.WriteLine(" ***** DATA FOR RANKER******************************************** ");
                sw.WriteLine(" URL : " + item.getLink());
                sw.WriteLine(" PARENT URL : " + item.getParentUrl());
                sw.Close();
            }

            //rank of the whole page
            if (!((lastResourceContent != null) && (lastResourceContent.Equals(parentResource.getResourceContent()))))
            {
                lastResourceContent = parentResource.getResourceContent();
                wholePageRank       = getRankOfWholeContent(parentResource);
            }

            //rank of the nearby text
            nearbyTextRank = getRankOfNearbyText(item);

            DateTime endTimeOfNearby = DateTime.Now;

            //rank of the anchor url
            anchorRank = getRankOfAnchor(item);

            //rank of the neighborhood,that includes rank of the anchor and the nearby text
            if (anchorRank > RankerOptions.ConfidenceLevelOfAnchor)
            {
                context = 100;
            }
            else
            {
                //nearbyTextRank = getRankOfNearbyText(item);
                context = nearbyTextRank;
            }
            neighborhood = (int)(RankerOptions.BETTA * anchorRank + (1 - RankerOptions.BETTA) * context);

            //rank of the inherited,that includes the rank of the parentUrl and paren content
            inherited = (int)(RankerOptions.ALPHA * rankParentUrl + (1 - RankerOptions.ALPHA) * wholePageRank);

            if (LogDebuggerControl.getInstance().debugRanker)
            {
                sw = new StreamWriter("DataForRank" + System.Threading.Thread.CurrentThread.ManagedThreadId + ".txt", true);
                sw.WriteLine("************************DATA CONCLUSION*************************");
                sw.WriteLine(" .PARENT RANK: ");
                sw.WriteLine(rankParentUrl);
                sw.WriteLine(" .RANK OF NEARBY TEXT: ");
                sw.WriteLine(nearbyTextRank);
                sw.WriteLine(" .AVG OF NEARBY WORDS");
                sw.WriteLine((int)(sumOfTotalNearbyWords / NumOfLinks));
                sw.WriteLine(" .RANK OF ANCHOR: ");
                sw.WriteLine(anchorRank);
                sw.WriteLine(" .AVG OF ANCHOR TEXT");
                sw.WriteLine((int)(sumOfTotalAnchorWords / NumOfLinks));
                sw.WriteLine(" .NEIGHBORHOOD: ");
                sw.WriteLine(neighborhood);
                sw.WriteLine(" .RANK OF WHOLE CONTENT: ");
                sw.WriteLine(wholePageRank);
                sw.WriteLine(" .INHERITED: ");
                sw.WriteLine(inherited);
                sw.WriteLine(" .RANK OF THE URL: ");
                sw.WriteLine((int)(RankerOptions.GAMMA * inherited + (1 - RankerOptions.GAMMA) * neighborhood));
                // sw.WriteLine(" * END ****************************************************************** ");
                sw.Close();
            }

            //Console.WriteLine(totalRankingTime.TotalSeconds);
            return((int)(RankerOptions.GAMMA * inherited + (1 - RankerOptions.GAMMA) * neighborhood));
        }