/// <summary> /// Computes score for given entry /// </summary> /// <param name="entry">The entry.</param> /// <param name="context">The context.</param> /// <param name="log">The log.</param> /// <returns></returns> public override double Score(DocumentSelectResultEntry entry, DocumentSelectResult context, ILogBuilder log) { Double output = 0; foreach (String term in entry.spaceDocument.terms.GetTokens()) { Boolean isOk = true; if (context.selectedFeatures != null) { if (context.selectedFeatures.Count > 0) { if (!context.selectedFeatures.ContainsKey(term)) { isOk = false; } } } if (isOk) { if (queryTerms.Any()) { if (queryTerms.Contains(term)) { output += weightDictionary.GetValue(term); // TermWeightModel.GetWeight(term, entry.spaceDocument, context.spaceModel); } } else { output += weightDictionary.GetValue(term); // TermWeightModel.GetWeight(term, entry.spaceDocument, context.spaceModel); } } } return(output); }
/// <summary> /// Scores the specified entry. /// </summary> /// <param name="entry">The entry.</param> /// <param name="context">The context.</param> /// <param name="log">The log.</param> /// <returns></returns> public override double Score(DocumentSelectResultEntry entry, DocumentSelectResult context, ILogBuilder log) { if (useMachineLearning) { WeightDictionary dc_vec = TermWeightModel.GetWeights(SelectedTerms.GetKeys(), entry.spaceDocument, context.spaceModel); var n_vec = fvConstructor.ConstructFeatureVector(dc_vec, entry.AssignedID); Double score = 0; Int32 l_id = -1; if (sc_id.ContainsKey(entry.AssignedID)) { l_id = sc_id[entry.AssignedID]; } score = classifier.DoScore(n_vec, log, l_id); return(score); } else { if (scoreDictionary.ContainsKey(entry.AssignedID)) { var fv = scoreDictionary[entry.AssignedID]; return(fv.CompressNumericVector(vectorCompression)); } else { return(0); } } }
public static TokenFrequencyAndScoreDictionary ProjectPrimaryTermsToScores(ProjectionDictionary projection, DocumentSelectResult scores, ILogBuilder logger) { var scoreByAssignedID = scores.GetByAssignedID(logger); TokenFrequencyAndScoreDictionary tokenFrequencyAndScoreDictionary = new TokenFrequencyAndScoreDictionary(); foreach (var pair in projection) { DocumentSelectResultEntry entry = null; //drmContext.items.FirstOrDefault(x => x.AssignedID == pair.Key); if (scoreByAssignedID.ContainsKey(pair.Key)) { entry = scoreByAssignedID[pair.Key]; } if (entry != null) { Double score = entry.score; tokenFrequencyAndScoreDictionary.Add(pair.Value.primary.terms, score); } } return(tokenFrequencyAndScoreDictionary); }
/// <summary> /// Scores the specified entry. /// </summary> /// <param name="entry">The entry.</param> /// <param name="context">The context.</param> /// <param name="log">The log.</param> /// <returns></returns> public override double Score(DocumentSelectResultEntry entry, DocumentSelectResult context, ILogBuilder log) { Double score = 0; WebSiteGraph webSiteGraph = context.domainNameToGraph[entry.DomainID]; // GraphRegistry[entry.DomainID]; freeGraphNodeAndLinks outLinks = webSiteGraph.GetLinks(entry.AssignedID, true, false); freeGraphNodeAndLinks inLinks = webSiteGraph.GetLinks(entry.AssignedID, false, true); if (functionFlags.HasFlag(GraphFactorFunctionEnum.count_outbound)) { score += outLinks.Count; } if (functionFlags.HasFlag(GraphFactorFunctionEnum.count_inbound)) { score += inLinks.Count; } if (score == 0) { return(score); } if (functionFlags.HasFlag(GraphFactorFunctionEnum.divide_by_graphlinks)) { score = score / webSiteGraph.CountLinks(); } if (functionFlags.HasFlag(GraphFactorFunctionEnum.divide_by_graphnodes)) { score = score / webSiteGraph.CountNodes(); } if (functionFlags.HasFlag(GraphFactorFunctionEnum.divide_by_inbound)) { score = score / inLinks.Count; } if (functionFlags.HasFlag(GraphFactorFunctionEnum.divide_by_outbound)) { score = score / outLinks.Count; } if (functionFlags.HasFlag(GraphFactorFunctionEnum.divide_by_linkCount)) { score = score / (inLinks.Count + outLinks.Count); } return(score); }
/// <summary> /// Scores the specified entry. /// </summary> /// <param name="entry">The entry.</param> /// <param name="context">The context.</param> /// <param name="log">The log.</param> /// <returns></returns> public override Double Score(DocumentSelectResultEntry entry, DocumentSelectResult context, ILogBuilder log) { var entry_stats = statsByAssignedID[entry.AssignedID]; entry_stats.reCalculate(instanceCountCollection <string> .preCalculateTasks.all); Double score = 0; switch (functionName) { case ScoreModelMetricFactorEnum.varianceFreq: score = entry_stats.varianceFreq; break; case ScoreModelMetricFactorEnum.TotalScore: score = entry_stats.TotalScore; break; case ScoreModelMetricFactorEnum.standardDeviation: score = entry_stats.standardDeviation; break; case ScoreModelMetricFactorEnum.entropyFreq: score = entry_stats.entropyFreq; break; case ScoreModelMetricFactorEnum.avgFreq: score = entry_stats.avgFreq; break; case ScoreModelMetricFactorEnum.Count: score = entry_stats.Count; break; case ScoreModelMetricFactorEnum.Ordinal: score = assignedIDs.Count - assignedIDs.IndexOf(entry.AssignedID); break; default: score = entry_stats.Count; break; } return(score); }
public override double Score(DocumentSelectResultEntry entry, DocumentSelectResult context, ILogBuilder log) { FeatureVectorWithLabelID fv = vectorDictionary.Get(entry.DomainID, entry.AssignedID); if (fv == null) { log.log("Can't find vector dictionary entry for [" + entry.DomainID + "]>[" + entry.AssignedID + "]"); return(0); } Double sc = 0; if (computation.HasFlag(ScoreComputationModeEnum.offset)) { sc = fv.CompressByTrueDimension(fv.labelID); } else if (computation.HasFlag(ScoreComputationModeEnum.variance)) { sc = fv.dimensions.GetVarianceCoefficient(); } else if (computation.HasFlag(ScoreComputationModeEnum.distance)) { sc = fv.CompressNumericVector(imbSCI.Core.enums.operation.max); } else { sc = fv.dimensions[0]; } if (computation.HasFlag(ScoreComputationModeEnum.inverse)) { sc = -sc; } if (sc == Double.NaN) { sc = 0; } else { } return(sc); }
public override double Score(DocumentSelectResultEntry entry, DocumentSelectResult context, ILogBuilder log) { // WebSiteGraph webSiteGraph = context.domainNameToGraph[entry.DomainID]; Double score = 0; //var matrix = webSiteGraph.GetIDMatrix(); switch (algorithm) { case GraphFactorAlgorithm.HITS: if (p_hits.ContainsKey(entry.DomainID)) { HITSRank hits = p_hits[entry.DomainID]; if (hits.ContainsKey(entry.AssignedID)) { score = hits[entry.AssignedID] * scoreUnit; } } break; case GraphFactorAlgorithm.PageRank: if (p_rank[entry.DomainID].ContainsKey(entry.AssignedID)) { score = p_rank[entry.DomainID][entry.AssignedID]; } break; } return(score); }
public abstract Double Score(DocumentSelectResultEntry entry, DocumentSelectResult context, ILogBuilder log);
/// <summary> /// Prepares the context. /// </summary> /// <param name="context">The context.</param> /// <param name="log">The log.</param> /// <returns></returns> public static DocumentSelectResult PrepareContext(this OperationContext context, DocumentRankingMethod ranking, folderNode folder, ILogBuilder log) { DocumentSelectResult selectContext = new DocumentSelectResult(); selectContext.stemmingContext = context.stemmContext; selectContext.spaceModel = context.spaceModel; selectContext.folder = folder; if (ranking != null) { selectContext.name = ranking.model.GetSignature(); selectContext.query = ranking.query; builderForText builder = new builderForText(); ranking.Describe(builder); builder.AppendLine("Selected features [" + selectContext.selectedFeatures.description + "]."); selectContext.description = builder.GetContent().Replace(Environment.NewLine, ""); } selectContext.selectedFeatures = context.SelectedFeatures; foreach (KeyValuePair <string, WebSiteDocuments> pair in context.webSiteByDomain) { selectContext.domainNameToGraph.Add(pair.Key, pair.Value?.extensions?.graph); foreach (WebSiteDocument doc in pair.Value.documents) { DocumentSelectResultEntry entry = new DocumentSelectResultEntry(); TextDocument text = null; string err = ""; //if (context.textDocuments.ContainsKey(doc.AssignedID)) //{ // text = context.textDocuments[doc.AssignedID]; //} //else //{ // err += "Failed to find text document for [" + doc.AssignedID + "]"; //} SpaceDocumentModel spaceDocument = context.spaceModel.documents.FirstOrDefault(x => x.name == doc.AssignedID); if (spaceDocument == null) { err += "Failed to find space model document for [" + doc.AssignedID + "]"; } string dn = pair.Value.domain; entry.SetEntry(dn, doc, spaceDocument, text); if (!entry.HasTextOrSpaceModel) { log.log(err); } selectContext.items.Add(entry); //entry.SetEntry( context.context.webDocumentByAssignedID[pair.Key], webDocIDToDomain[aID], webDocumentRegistry[aID], spaceDocumentRegistry[aID], textDocumentRegistry[aID]); } } // PREPARATION OF MODEL if (ranking != null) { ranking.model.Prepare(selectContext, log); } return(selectContext); }
/// <summary> /// Executes the specified context. /// </summary> /// <param name="context">The context.</param> /// <param name="log">The log.</param> /// <returns></returns> public static DocumentSelectResult ExecuteLimit(this DocumentSelectQuery query, DocumentSelectResult context, ILogBuilder log) { if (query.options.HasFlag(DocumentSelectQueryOptions.DomainLevelNormalization)) { log.log("DS Scores normalized on website / domain level"); context.items.NormalizeWithinDomain(log); } // QUERY LIMITS List <DocumentSelectResultEntry> sortedList = context.items.OrderByDescending(x => x.score).ToList(); context.items.Clear(); context.items.AddRange(sortedList); if (query.TrasholdLimit != 0.0) { List <DocumentSelectResultEntry> underTrashold = new List <DocumentSelectResultEntry>(); foreach (DocumentSelectResultEntry entry in context.items) { if (entry.score < query.TrasholdLimit) { underTrashold.Add(entry); } } foreach (DocumentSelectResultEntry entry in underTrashold) { context.items.Remove(entry); } } if (query.SizeLimit > 0) { if (query.options.HasFlag(DocumentSelectQueryOptions.ApplyDomainLevelLimits)) { List <DocumentSelectResultEntry> overLimit = new List <DocumentSelectResultEntry>(); var byDomain = context.GetByDomain(log); foreach (var pair in byDomain) { Int32 count = 0; List <DocumentSelectResultEntry> domainSortedList = pair.Value.OrderByDescending(x => x.score).ToList(); if (query.options.HasFlag(DocumentSelectQueryOptions.ForceHomePage)) { DocumentSelectResultEntry homeEntry = domainSortedList.GetDocWithShortestID(); if (domainSortedList.Remove(homeEntry)) { count++; } } foreach (DocumentSelectResultEntry entry in domainSortedList) { if (count >= query.SizeLimit) { overLimit.Add(entry); } count++; } } foreach (DocumentSelectResultEntry entry in overLimit) { context.Remove(entry); } } else { if (context.Count > query.SizeLimit) { context.RemoveRange(query.SizeLimit, context.Count - query.SizeLimit); } } } return(context); }