Пример #1
0
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult result = new spiderEvalRuleResult(this);
            spiderTarget         target = wRecord.context.targets.GetOrCreateTarget(link, false, false);

            termQueryDocument query = target.getQuery(expansionSteps, wRecord.logBuilder);


            wRecord.logBuilder.AppendLine("Target [" + link.url + "] query => [" + query.GetAllTermString().toCsvInLine(",") + "]");

            weightTableMatchCollection <termSpark, termSpark> matchLinks = query.GetSparkMatchAgainst <termSpark>((termDocument)wRecord.context.targets.dlTargetLinkTokens.AggregateDocument);
            weightTableMatchCollection <termSpark, termSpark> matchPage  = query.GetSparkMatchAgainst <termSpark>((termDocument)wRecord.context.targets.dlTargetPageTokens.AggregateDocument);

            if ((!matchLinks.Any()) && (!matchPage.Any()))
            {
                result.score = scoreUnit;
                wRecord.logBuilder.AppendLine("D[" + link.url + "][" + target.tokens.GetAllTermString().toCsvInLine(",") + "] = no matches with query");
                return(result);
            }
            else
            {
                wRecord.logBuilder.AppendLine("matchLinks => " + matchLinks.ToString());

                wRecord.logBuilder.AppendLine("matchPage => " + matchPage.ToString());
            }


            double pLSim = matchLinks.GetSemanticSimilarity() * target_sd;
            double pPSim = matchPage.GetSemanticSimilarity() * page_sd;

            double sim = (pLSim + pPSim);

            double sc = sim * (double)scoreUnit;

            double score = ((double)scoreUnit) - sc;

            if (doAdjustScoreByLanguageDetection)
            {
                // < ---- modification of diversity score
                List <string> tkns = new List <string>();
                foreach (IWeightTableTerm spark in query)
                {
                    tkns.Add(spark.nominalForm);
                }
                textEvaluation evaluation = new textEvaluation(wRecord.aJob.langTextEvaluator, null);
                evaluation.evaluateTokens(tkns, null, false);

                double evalAdj = Math.Pow(evaluation.ratioA, 2);
                result.score = Convert.ToInt32((double)score * evalAdj);  //Convert.ToInt32(sim_inv * (Double) scoreUnit);
                wRecord.logBuilder.AppendLine();
                wRecord.logBuilder.AppendLine("Score is adjusted by language evaluation ratioA ^ 2: " + evalAdj);
            }

            wRecord.logBuilder.AppendLine("D[" + link.url + "][" + target.tokens.GetAllTermString().toCsvInLine(",") + "]=[pL:" + pLSim.ToString("P2") + "][pP:" + pPSim.ToString("P2") + "]=" + sim.ToString("#0.0000") + " (" + result.score + ")");


            return(result);
        }
Пример #2
0
 public termQueryDocument getQuery(int expansionSteps, ILogBuilder loger)
 {
     if (query == null)
     {
         query = new termQueryDocument();
         List <string> tkns = new List <string>();
         foreach (IWeightTableTerm term in tokens)
         {
             tkns.Add(term.nominalForm);
         }
         query.AddQueryTerms(tkns, expansionSteps, loger);
     }
     return(query);
 }