/** * This method returns a rank for the nearby textof the url */ private int getRankOfNearbyText(LinkItem item) { //These variables will contain the max and avg of the match levels of the nearby //text of the extracted url. int maxMatchLevelForNearby = 0; int avgMatchLevelForNearby = 0; StreamWriter sw = null; if (LogDebuggerControl.getInstance().debugRanker) { sw = new StreamWriter("DataForRank" + System.Threading.Thread.CurrentThread.ManagedThreadId + ".txt", true); sw.WriteLine(" ***** REQUEST FOR NEARBY TEXT RANK************************************ "); sw.WriteLine(" URL : " + item.getLink()); sw.WriteLine(" CONTENT OF NEARBY TEXT:"); sw.WriteLine(item.getText()); sw.Close(); } //calculate the min and max of the match levels of the nearby text to the categories. if (nearbyOptions == null) { nearbyOptions = getOptions("nearby"); } List <int> matchLevelsForNearby = categorizer.classifyContentToAllCategories(item.getText(), nearbyOptions); maxMatchLevelForNearby = calculateMax(matchLevelsForNearby); avgMatchLevelForNearby = calculateAvg(matchLevelsForNearby); if (LogDebuggerControl.getInstance().debugRanker) { sw = new StreamWriter("DataForRank" + System.Threading.Thread.CurrentThread.ManagedThreadId + ".txt", true); sw.WriteLine(" .MAX MATCH LEVEL OF NEARBY TEXT: "); sw.WriteLine(maxMatchLevelForNearby); sw.WriteLine(" .AVG MATCH LEVEL OF NEARBY TEXT: "); sw.WriteLine(avgMatchLevelForNearby); //sw.WriteLine(" .RANK OF NEARBY TEXT: "); //sw.WriteLine((int)(RankParams.MinAndMaxRATIO * maxMatchLevelForNearby + (1 - RankParams.MinAndMaxRATIO) * avgMatchLevelForNearby)); // sw.WriteLine(" * END ****************************************************************** "); sw.Close(); } return((int)(RankerOptions.MinAndMaxRATIO * maxMatchLevelForNearby + (1 - RankerOptions.MinAndMaxRATIO) * avgMatchLevelForNearby)); }
/** * This method calculates the rank of a given url and returns it. */ public int rankUrl(ResourceContent parentResource, LinkItem item) { //These variables will contain the ranks for the whole content match and nearby text match and //anchor match and the parentrank. int rankParentUrl = parentResource.getRankOfUrl(); int anchorRank = 0; //int wholePageRank = 0; int nearbyTextRank = 0; int neighborhood = 0; int context = 0; int inherited = 0; char[] separators = { ' ', '\t', '\n' }; NumOfLinks++; sumOfTotalNearbyWords += item.getText().Split(separators).Length; sumOfTotalAnchorWords += item.getAnchor().Split(separators, StringSplitOptions.RemoveEmptyEntries).Length; StreamWriter sw = null; if (LogDebuggerControl.getInstance().debugRanker) { sw = new StreamWriter("DataForRank" + System.Threading.Thread.CurrentThread.ManagedThreadId + ".txt", true); sw.WriteLine(" *********HEAD REQUEST *********************************************"); sw.WriteLine(" ***** DATA FOR RANKER******************************************** "); sw.WriteLine(" URL : " + item.getLink()); sw.WriteLine(" PARENT URL : " + item.getParentUrl()); sw.Close(); } //rank of the whole page if (!((lastResourceContent != null) && (lastResourceContent.Equals(parentResource.getResourceContent())))) { lastResourceContent = parentResource.getResourceContent(); wholePageRank = getRankOfWholeContent(parentResource); } //rank of the nearby text nearbyTextRank = getRankOfNearbyText(item); DateTime endTimeOfNearby = DateTime.Now; //rank of the anchor url anchorRank = getRankOfAnchor(item); //rank of the neighborhood,that includes rank of the anchor and the nearby text if (anchorRank > RankerOptions.ConfidenceLevelOfAnchor) { context = 100; } else { //nearbyTextRank = getRankOfNearbyText(item); context = nearbyTextRank; } neighborhood = (int)(RankerOptions.BETTA * anchorRank + (1 - RankerOptions.BETTA) * context); //rank of the inherited,that includes the rank of the parentUrl and paren content inherited = (int)(RankerOptions.ALPHA * rankParentUrl + (1 - RankerOptions.ALPHA) * wholePageRank); if (LogDebuggerControl.getInstance().debugRanker) { sw = new StreamWriter("DataForRank" + System.Threading.Thread.CurrentThread.ManagedThreadId + ".txt", true); sw.WriteLine("************************DATA CONCLUSION*************************"); sw.WriteLine(" .PARENT RANK: "); sw.WriteLine(rankParentUrl); sw.WriteLine(" .RANK OF NEARBY TEXT: "); sw.WriteLine(nearbyTextRank); sw.WriteLine(" .AVG OF NEARBY WORDS"); sw.WriteLine((int)(sumOfTotalNearbyWords / NumOfLinks)); sw.WriteLine(" .RANK OF ANCHOR: "); sw.WriteLine(anchorRank); sw.WriteLine(" .AVG OF ANCHOR TEXT"); sw.WriteLine((int)(sumOfTotalAnchorWords / NumOfLinks)); sw.WriteLine(" .NEIGHBORHOOD: "); sw.WriteLine(neighborhood); sw.WriteLine(" .RANK OF WHOLE CONTENT: "); sw.WriteLine(wholePageRank); sw.WriteLine(" .INHERITED: "); sw.WriteLine(inherited); sw.WriteLine(" .RANK OF THE URL: "); sw.WriteLine((int)(RankerOptions.GAMMA * inherited + (1 - RankerOptions.GAMMA) * neighborhood)); // sw.WriteLine(" * END ****************************************************************** "); sw.Close(); } //Console.WriteLine(totalRankingTime.TotalSeconds); return((int)(RankerOptions.GAMMA * inherited + (1 - RankerOptions.GAMMA) * neighborhood)); }