/** * This method returns a rank for the anchor url */ private int getRankOfAnchor(LinkItem item) { //These variables will contain the max and avg of the match levels of the Anchor Url int maxMatchLevelForAnchor = 0; int avgMatchLevelForAnchor = 0; if (item.getAnchor() == null) { return(0); } StreamWriter sw = null; if (LogDebuggerControl.getInstance().debugRanker) { sw = new StreamWriter("DataForRank" + System.Threading.Thread.CurrentThread.ManagedThreadId + ".txt", true); sw.WriteLine(" ***** REQUEST FOR ANCHOR URL RANK************************************ "); sw.WriteLine(" URL : " + item.getLink()); sw.WriteLine(" CONTENT OF ANCHOR:"); sw.WriteLine(item.getAnchor()); sw.Close(); } //calculate the min and max of the match levels of the anchor url to the categories. if (anchorOptions == null) { anchorOptions = getOptions("anchor"); } List <int> matchLevelsForAnchor = categorizer.classifyContentToAllCategories(item.getAnchor(), anchorOptions); maxMatchLevelForAnchor = calculateMax(matchLevelsForAnchor); avgMatchLevelForAnchor = calculateAvg(matchLevelsForAnchor); if (LogDebuggerControl.getInstance().debugRanker) { sw = new StreamWriter("DataForRank" + System.Threading.Thread.CurrentThread.ManagedThreadId + ".txt", true); sw.WriteLine(" .MAX MATCH LEVEL OF ANCHOR: "); sw.WriteLine(maxMatchLevelForAnchor); sw.WriteLine(" .AVG MATCH LEVEL OF ANCHOR: "); sw.WriteLine(avgMatchLevelForAnchor); //sw.WriteLine(" .RANK OF ANCHOR: "); //sw.WriteLine((int)(RankParams.MinAndMaxRATIO * maxMatchLevelForAnchor + (1 - RankParams.MinAndMaxRATIO) * avgMatchLevelForAnchor)); //sw.WriteLine(" * END ****************************************************************** "); sw.Close(); } return((int)(RankerOptions.MinAndMaxRATIO * maxMatchLevelForAnchor + (1 - RankerOptions.MinAndMaxRATIO) * avgMatchLevelForAnchor)); }
/** * This method calculates the rank of a given url and returns it. */ public int rankUrl(ResourceContent parentResource, LinkItem item) { //These variables will contain the ranks for the whole content match and nearby text match and //anchor match and the parentrank. int rankParentUrl = parentResource.getRankOfUrl(); int anchorRank = 0; //int wholePageRank = 0; int nearbyTextRank = 0; int neighborhood = 0; int context = 0; int inherited = 0; char[] separators = { ' ', '\t', '\n' }; NumOfLinks++; sumOfTotalNearbyWords += item.getText().Split(separators).Length; sumOfTotalAnchorWords += item.getAnchor().Split(separators, StringSplitOptions.RemoveEmptyEntries).Length; StreamWriter sw = null; if (LogDebuggerControl.getInstance().debugRanker) { sw = new StreamWriter("DataForRank" + System.Threading.Thread.CurrentThread.ManagedThreadId + ".txt", true); sw.WriteLine(" *********HEAD REQUEST *********************************************"); sw.WriteLine(" ***** DATA FOR RANKER******************************************** "); sw.WriteLine(" URL : " + item.getLink()); sw.WriteLine(" PARENT URL : " + item.getParentUrl()); sw.Close(); } //rank of the whole page if (!((lastResourceContent != null) && (lastResourceContent.Equals(parentResource.getResourceContent())))) { lastResourceContent = parentResource.getResourceContent(); wholePageRank = getRankOfWholeContent(parentResource); } //rank of the nearby text nearbyTextRank = getRankOfNearbyText(item); DateTime endTimeOfNearby = DateTime.Now; //rank of the anchor url anchorRank = getRankOfAnchor(item); //rank of the neighborhood,that includes rank of the anchor and the nearby text if (anchorRank > RankerOptions.ConfidenceLevelOfAnchor) { context = 100; } else { //nearbyTextRank = getRankOfNearbyText(item); context = nearbyTextRank; } neighborhood = (int)(RankerOptions.BETTA * anchorRank + (1 - RankerOptions.BETTA) * context); //rank of the inherited,that includes the rank of the parentUrl and paren content inherited = (int)(RankerOptions.ALPHA * rankParentUrl + (1 - RankerOptions.ALPHA) * wholePageRank); if (LogDebuggerControl.getInstance().debugRanker) { sw = new StreamWriter("DataForRank" + System.Threading.Thread.CurrentThread.ManagedThreadId + ".txt", true); sw.WriteLine("************************DATA CONCLUSION*************************"); sw.WriteLine(" .PARENT RANK: "); sw.WriteLine(rankParentUrl); sw.WriteLine(" .RANK OF NEARBY TEXT: "); sw.WriteLine(nearbyTextRank); sw.WriteLine(" .AVG OF NEARBY WORDS"); sw.WriteLine((int)(sumOfTotalNearbyWords / NumOfLinks)); sw.WriteLine(" .RANK OF ANCHOR: "); sw.WriteLine(anchorRank); sw.WriteLine(" .AVG OF ANCHOR TEXT"); sw.WriteLine((int)(sumOfTotalAnchorWords / NumOfLinks)); sw.WriteLine(" .NEIGHBORHOOD: "); sw.WriteLine(neighborhood); sw.WriteLine(" .RANK OF WHOLE CONTENT: "); sw.WriteLine(wholePageRank); sw.WriteLine(" .INHERITED: "); sw.WriteLine(inherited); sw.WriteLine(" .RANK OF THE URL: "); sw.WriteLine((int)(RankerOptions.GAMMA * inherited + (1 - RankerOptions.GAMMA) * neighborhood)); // sw.WriteLine(" * END ****************************************************************** "); sw.Close(); } //Console.WriteLine(totalRankingTime.TotalSeconds); return((int)(RankerOptions.GAMMA * inherited + (1 - RankerOptions.GAMMA) * neighborhood)); }