public override spiderEvalRuleResult evaluate(spiderLink link) { spiderEvalRuleResult result = new spiderEvalRuleResult(this); spiderTarget target = wRecord.context.targets.GetOrCreateTarget(link, false, false); termQueryDocument query = target.getQuery(expansionSteps, wRecord.logBuilder); wRecord.logBuilder.AppendLine("Target [" + link.url + "] query => [" + query.GetAllTermString().toCsvInLine(",") + "]"); weightTableMatchCollection <termSpark, termSpark> matchLinks = query.GetSparkMatchAgainst <termSpark>((termDocument)wRecord.context.targets.dlTargetLinkTokens.AggregateDocument); weightTableMatchCollection <termSpark, termSpark> matchPage = query.GetSparkMatchAgainst <termSpark>((termDocument)wRecord.context.targets.dlTargetPageTokens.AggregateDocument); if ((!matchLinks.Any()) && (!matchPage.Any())) { result.score = scoreUnit; wRecord.logBuilder.AppendLine("D[" + link.url + "][" + target.tokens.GetAllTermString().toCsvInLine(",") + "] = no matches with query"); return(result); } else { wRecord.logBuilder.AppendLine("matchLinks => " + matchLinks.ToString()); wRecord.logBuilder.AppendLine("matchPage => " + matchPage.ToString()); } double pLSim = matchLinks.GetSemanticSimilarity() * target_sd; double pPSim = matchPage.GetSemanticSimilarity() * page_sd; double sim = (pLSim + pPSim); double sc = sim * (double)scoreUnit; double score = ((double)scoreUnit) - sc; if (doAdjustScoreByLanguageDetection) { // < ---- modification of diversity score List <string> tkns = new List <string>(); foreach (IWeightTableTerm spark in query) { tkns.Add(spark.nominalForm); } textEvaluation evaluation = new textEvaluation(wRecord.aJob.langTextEvaluator, null); evaluation.evaluateTokens(tkns, null, false); double evalAdj = Math.Pow(evaluation.ratioA, 2); result.score = Convert.ToInt32((double)score * evalAdj); //Convert.ToInt32(sim_inv * (Double) scoreUnit); wRecord.logBuilder.AppendLine(); wRecord.logBuilder.AppendLine("Score is adjusted by language evaluation ratioA ^ 2: " + evalAdj); } wRecord.logBuilder.AppendLine("D[" + link.url + "][" + target.tokens.GetAllTermString().toCsvInLine(",") + "]=[pL:" + pLSim.ToString("P2") + "][pP:" + pPSim.ToString("P2") + "]=" + sim.ToString("#0.0000") + " (" + result.score + ")"); return(result); }
public termQueryDocument getQuery(int expansionSteps, ILogBuilder loger) { if (query == null) { query = new termQueryDocument(); List <string> tkns = new List <string>(); foreach (IWeightTableTerm term in tokens) { tkns.Add(term.nominalForm); } query.AddQueryTerms(tkns, expansionSteps, loger); } return(query); }