/// <summary>
        /// Computes score for given entry
        /// </summary>
        /// <param name="entry">The entry.</param>
        /// <param name="context">The context.</param>
        /// <param name="log">The log.</param>
        /// <returns></returns>
        public override double Score(DocumentSelectResultEntry entry, DocumentSelectResult context, ILogBuilder log)
        {
            Double output = 0;

            foreach (String term in entry.spaceDocument.terms.GetTokens())
            {
                Boolean isOk = true;
                if (context.selectedFeatures != null)
                {
                    if (context.selectedFeatures.Count > 0)
                    {
                        if (!context.selectedFeatures.ContainsKey(term))
                        {
                            isOk = false;
                        }
                    }
                }

                if (isOk)
                {
                    if (queryTerms.Any())
                    {
                        if (queryTerms.Contains(term))
                        {
                            output += weightDictionary.GetValue(term); // TermWeightModel.GetWeight(term, entry.spaceDocument, context.spaceModel);
                        }
                    }
                    else
                    {
                        output += weightDictionary.GetValue(term); // TermWeightModel.GetWeight(term, entry.spaceDocument, context.spaceModel);
                    }
                }
            }
            return(output);
        }
        /// <summary>
        /// Scores the specified entry.
        /// </summary>
        /// <param name="entry">The entry.</param>
        /// <param name="context">The context.</param>
        /// <param name="log">The log.</param>
        /// <returns></returns>
        public override double Score(DocumentSelectResultEntry entry, DocumentSelectResult context, ILogBuilder log)
        {
            if (useMachineLearning)
            {
                WeightDictionary dc_vec = TermWeightModel.GetWeights(SelectedTerms.GetKeys(), entry.spaceDocument, context.spaceModel);

                var n_vec = fvConstructor.ConstructFeatureVector(dc_vec, entry.AssignedID);


                Double score = 0;
                Int32  l_id  = -1;
                if (sc_id.ContainsKey(entry.AssignedID))
                {
                    l_id = sc_id[entry.AssignedID];
                }

                score = classifier.DoScore(n_vec, log, l_id);

                return(score);
            }
            else
            {
                if (scoreDictionary.ContainsKey(entry.AssignedID))
                {
                    var fv = scoreDictionary[entry.AssignedID];
                    return(fv.CompressNumericVector(vectorCompression));
                }
                else
                {
                    return(0);
                }
            }
        }
示例#3
0
        public static TokenFrequencyAndScoreDictionary ProjectPrimaryTermsToScores(ProjectionDictionary projection, DocumentSelectResult scores, ILogBuilder logger)
        {
            var scoreByAssignedID = scores.GetByAssignedID(logger);


            TokenFrequencyAndScoreDictionary tokenFrequencyAndScoreDictionary = new TokenFrequencyAndScoreDictionary();

            foreach (var pair in projection)
            {
                DocumentSelectResultEntry entry = null; //drmContext.items.FirstOrDefault(x => x.AssignedID == pair.Key);

                if (scoreByAssignedID.ContainsKey(pair.Key))
                {
                    entry = scoreByAssignedID[pair.Key];
                }


                if (entry != null)
                {
                    Double score = entry.score;
                    tokenFrequencyAndScoreDictionary.Add(pair.Value.primary.terms, score);
                }
            }

            return(tokenFrequencyAndScoreDictionary);
        }
示例#4
0
        /// <summary>
        /// Scores the specified entry.
        /// </summary>
        /// <param name="entry">The entry.</param>
        /// <param name="context">The context.</param>
        /// <param name="log">The log.</param>
        /// <returns></returns>
        public override double Score(DocumentSelectResultEntry entry, DocumentSelectResult context, ILogBuilder log)
        {
            Double score = 0;



            WebSiteGraph webSiteGraph = context.domainNameToGraph[entry.DomainID]; // GraphRegistry[entry.DomainID];

            freeGraphNodeAndLinks outLinks = webSiteGraph.GetLinks(entry.AssignedID, true, false);
            freeGraphNodeAndLinks inLinks  = webSiteGraph.GetLinks(entry.AssignedID, false, true);

            if (functionFlags.HasFlag(GraphFactorFunctionEnum.count_outbound))
            {
                score += outLinks.Count;
            }

            if (functionFlags.HasFlag(GraphFactorFunctionEnum.count_inbound))
            {
                score += inLinks.Count;
            }

            if (score == 0)
            {
                return(score);
            }

            if (functionFlags.HasFlag(GraphFactorFunctionEnum.divide_by_graphlinks))
            {
                score = score / webSiteGraph.CountLinks();
            }

            if (functionFlags.HasFlag(GraphFactorFunctionEnum.divide_by_graphnodes))
            {
                score = score / webSiteGraph.CountNodes();
            }

            if (functionFlags.HasFlag(GraphFactorFunctionEnum.divide_by_inbound))
            {
                score = score / inLinks.Count;
            }

            if (functionFlags.HasFlag(GraphFactorFunctionEnum.divide_by_outbound))
            {
                score = score / outLinks.Count;
            }
            if (functionFlags.HasFlag(GraphFactorFunctionEnum.divide_by_linkCount))
            {
                score = score / (inLinks.Count + outLinks.Count);
            }
            return(score);
        }
        /// <summary>
        /// Scores the specified entry.
        /// </summary>
        /// <param name="entry">The entry.</param>
        /// <param name="context">The context.</param>
        /// <param name="log">The log.</param>
        /// <returns></returns>
        public override Double Score(DocumentSelectResultEntry entry, DocumentSelectResult context, ILogBuilder log)
        {
            var entry_stats = statsByAssignedID[entry.AssignedID];

            entry_stats.reCalculate(instanceCountCollection <string> .preCalculateTasks.all);

            Double score = 0;

            switch (functionName)
            {
            case ScoreModelMetricFactorEnum.varianceFreq:
                score = entry_stats.varianceFreq;
                break;

            case ScoreModelMetricFactorEnum.TotalScore:
                score = entry_stats.TotalScore;
                break;

            case ScoreModelMetricFactorEnum.standardDeviation:
                score = entry_stats.standardDeviation;
                break;

            case ScoreModelMetricFactorEnum.entropyFreq:
                score = entry_stats.entropyFreq;
                break;

            case ScoreModelMetricFactorEnum.avgFreq:
                score = entry_stats.avgFreq;
                break;

            case ScoreModelMetricFactorEnum.Count:
                score = entry_stats.Count;
                break;

            case ScoreModelMetricFactorEnum.Ordinal:
                score = assignedIDs.Count - assignedIDs.IndexOf(entry.AssignedID);
                break;

            default:
                score = entry_stats.Count;
                break;
            }


            return(score);
        }
        public override double Score(DocumentSelectResultEntry entry, DocumentSelectResult context, ILogBuilder log)
        {
            FeatureVectorWithLabelID fv = vectorDictionary.Get(entry.DomainID, entry.AssignedID);

            if (fv == null)
            {
                log.log("Can't find vector dictionary entry for [" + entry.DomainID + "]>[" + entry.AssignedID + "]");
                return(0);
            }
            Double sc = 0;

            if (computation.HasFlag(ScoreComputationModeEnum.offset))
            {
                sc = fv.CompressByTrueDimension(fv.labelID);
            }
            else if (computation.HasFlag(ScoreComputationModeEnum.variance))
            {
                sc = fv.dimensions.GetVarianceCoefficient();
            }
            else if (computation.HasFlag(ScoreComputationModeEnum.distance))
            {
                sc = fv.CompressNumericVector(imbSCI.Core.enums.operation.max);
            }
            else
            {
                sc = fv.dimensions[0];
            }

            if (computation.HasFlag(ScoreComputationModeEnum.inverse))
            {
                sc = -sc;
            }

            if (sc == Double.NaN)
            {
                sc = 0;
            }
            else
            {
            }

            return(sc);
        }
示例#7
0
        public override double Score(DocumentSelectResultEntry entry, DocumentSelectResult context, ILogBuilder log)
        {
            // WebSiteGraph webSiteGraph = context.domainNameToGraph[entry.DomainID];

            Double score = 0;

            //var matrix = webSiteGraph.GetIDMatrix();

            switch (algorithm)
            {
            case GraphFactorAlgorithm.HITS:
                if (p_hits.ContainsKey(entry.DomainID))
                {
                    HITSRank hits = p_hits[entry.DomainID];

                    if (hits.ContainsKey(entry.AssignedID))
                    {
                        score = hits[entry.AssignedID] * scoreUnit;
                    }
                }

                break;

            case GraphFactorAlgorithm.PageRank:

                if (p_rank[entry.DomainID].ContainsKey(entry.AssignedID))
                {
                    score = p_rank[entry.DomainID][entry.AssignedID];
                }


                break;
            }


            return(score);
        }
 public abstract Double Score(DocumentSelectResultEntry entry, DocumentSelectResult context, ILogBuilder log);
示例#9
0
        /// <summary>
        /// Prepares the context.
        /// </summary>
        /// <param name="context">The context.</param>
        /// <param name="log">The log.</param>
        /// <returns></returns>
        public static DocumentSelectResult PrepareContext(this OperationContext context, DocumentRankingMethod ranking, folderNode folder, ILogBuilder log)
        {
            DocumentSelectResult selectContext = new DocumentSelectResult();

            selectContext.stemmingContext = context.stemmContext;
            selectContext.spaceModel      = context.spaceModel;
            selectContext.folder          = folder;
            if (ranking != null)
            {
                selectContext.name  = ranking.model.GetSignature();
                selectContext.query = ranking.query;

                builderForText builder = new builderForText();
                ranking.Describe(builder);

                builder.AppendLine("Selected features [" + selectContext.selectedFeatures.description + "].");

                selectContext.description = builder.GetContent().Replace(Environment.NewLine, "");
            }

            selectContext.selectedFeatures = context.SelectedFeatures;



            foreach (KeyValuePair <string, WebSiteDocuments> pair in context.webSiteByDomain)
            {
                selectContext.domainNameToGraph.Add(pair.Key, pair.Value?.extensions?.graph);

                foreach (WebSiteDocument doc in pair.Value.documents)
                {
                    DocumentSelectResultEntry entry = new DocumentSelectResultEntry();
                    TextDocument text = null;

                    string err = "";


                    //if (context.textDocuments.ContainsKey(doc.AssignedID))
                    //{
                    //    text = context.textDocuments[doc.AssignedID];
                    //}
                    //else
                    //{
                    //    err += "Failed to find text document for [" + doc.AssignedID + "]";
                    //}

                    SpaceDocumentModel spaceDocument = context.spaceModel.documents.FirstOrDefault(x => x.name == doc.AssignedID);


                    if (spaceDocument == null)
                    {
                        err += "Failed to find space model document for [" + doc.AssignedID + "]";
                    }


                    string dn = pair.Value.domain;
                    entry.SetEntry(dn, doc, spaceDocument, text);

                    if (!entry.HasTextOrSpaceModel)
                    {
                        log.log(err);
                    }

                    selectContext.items.Add(entry);
                    //entry.SetEntry( context.context.webDocumentByAssignedID[pair.Key], webDocIDToDomain[aID], webDocumentRegistry[aID], spaceDocumentRegistry[aID], textDocumentRegistry[aID]);
                }
            }

            // PREPARATION OF MODEL
            if (ranking != null)
            {
                ranking.model.Prepare(selectContext, log);
            }
            return(selectContext);
        }
        /// <summary>
        /// Executes the specified context.
        /// </summary>
        /// <param name="context">The context.</param>
        /// <param name="log">The log.</param>
        /// <returns></returns>
        public static DocumentSelectResult ExecuteLimit(this DocumentSelectQuery query, DocumentSelectResult context, ILogBuilder log)
        {
            if (query.options.HasFlag(DocumentSelectQueryOptions.DomainLevelNormalization))
            {
                log.log("DS Scores normalized on website / domain level");
                context.items.NormalizeWithinDomain(log);
            }


            // QUERY LIMITS
            List <DocumentSelectResultEntry> sortedList = context.items.OrderByDescending(x => x.score).ToList();

            context.items.Clear();

            context.items.AddRange(sortedList);


            if (query.TrasholdLimit != 0.0)
            {
                List <DocumentSelectResultEntry> underTrashold = new List <DocumentSelectResultEntry>();
                foreach (DocumentSelectResultEntry entry in context.items)
                {
                    if (entry.score < query.TrasholdLimit)
                    {
                        underTrashold.Add(entry);
                    }
                }
                foreach (DocumentSelectResultEntry entry in underTrashold)
                {
                    context.items.Remove(entry);
                }
            }



            if (query.SizeLimit > 0)
            {
                if (query.options.HasFlag(DocumentSelectQueryOptions.ApplyDomainLevelLimits))
                {
                    List <DocumentSelectResultEntry> overLimit = new List <DocumentSelectResultEntry>();

                    var byDomain = context.GetByDomain(log);

                    foreach (var pair in byDomain)
                    {
                        Int32 count = 0;
                        List <DocumentSelectResultEntry> domainSortedList = pair.Value.OrderByDescending(x => x.score).ToList();

                        if (query.options.HasFlag(DocumentSelectQueryOptions.ForceHomePage))
                        {
                            DocumentSelectResultEntry homeEntry = domainSortedList.GetDocWithShortestID();
                            if (domainSortedList.Remove(homeEntry))
                            {
                                count++;
                            }
                        }

                        foreach (DocumentSelectResultEntry entry in domainSortedList)
                        {
                            if (count >= query.SizeLimit)
                            {
                                overLimit.Add(entry);
                            }
                            count++;
                        }
                    }


                    foreach (DocumentSelectResultEntry entry in overLimit)
                    {
                        context.Remove(entry);
                    }
                }
                else
                {
                    if (context.Count > query.SizeLimit)
                    {
                        context.RemoveRange(query.SizeLimit, context.Count - query.SizeLimit);
                    }
                }
            }



            return(context);
        }