Exemple #1
0
        public rangeFinder GetMinSimilarityRange()
        {
            rangeFinder minSimilarityRange = new rangeFinder();

            minSimilarityRange.Learn(MinScoreInRangeCriterion);
            minSimilarityRange.Learn(MinScoreInRangeMaxCriterion);
            return(minSimilarityRange);
        }
        /// <summary>
        /// Sets the ranger
        /// </summary>
        public rangeFinder DetectMinMax()
        {
            ranger = new rangeFinder();

            if (ArtificialMaximum != 0)
            {
                ranger.Learn(ArtificialMaximum);
            }
            for (int x = 0; x < this.Count; x++)
            {
                for (int y = 0; y < this[x].Count; y++)
                {
                    ranger.Learn(this[x][y]);
                }
            }

            return(ranger);
        }
Exemple #3
0
        /// <summary>
        /// Gets the range.
        /// </summary>
        /// <returns></returns>
        public rangeFinder GetRange()
        {
            rangeFinder output = new rangeFinder();

            foreach (var pair in frequency)
            {
                output.Learn(pair.Value);
            }
            return(output);
        }
        protected void Prepare()
        {
            var pointA = new ColorHSVPoint(HexColorA);
            var pointB = new ColorHSVPoint(HexColorB);

            RangeA.Learn(pointA.A);
            RangeA.Learn(pointB.A);

            RangeS.Learn(pointA.S);
            RangeS.Learn(pointB.S);

            RangeV.Learn(pointA.V);
            RangeV.Learn(pointB.V);

            RangeH.Learn(pointA.H);
            RangeH.Learn(pointB.H);

            BaseColor = pointA;
        }
Exemple #5
0
        /*
         * /// <summary>
         * /// Prepares the context.
         * /// </summary>
         * /// <param name="context">The context.</param>
         * /// <param name="log">The log.</param>
         * /// <returns></returns>
         * public DocumentSelectResult PrepareContext(OperationContext context, ILogBuilder log)
         * {
         *  DocumentSelectResult selectContext = new DocumentSelectResult();
         *  selectContext.stemmingContext = context.stemmContext;
         *  selectContext.spaceModel = context.spaceModel;
         *  selectContext.query = query;
         *
         *  selectContext.selectedFeatures = context.SelectedFeatures;
         *
         *  foreach (KeyValuePair<string, WebSiteDocuments> pair in context.webSiteByDomain)
         *  {
         *      selectContext.domainNameToGraph.Add(pair.Key, pair.Value.extensions.graph);
         *
         *      foreach (WebSiteDocument doc in pair.Value.documents)
         *      {
         *          DocumentSelectResultEntry entry = new DocumentSelectResultEntry();
         *          TextDocument text = context.textDocuments[doc.AssociatedID];
         *          SpaceDocumentModel spaceDocument = context.spaceModel.documents.FirstOrDefault(x => x.name == doc.AssociatedID);
         *
         *          string dn = pair.Value.domain;
         *          entry.SetEntry(dn, doc, spaceDocument, text);
         *          selectContext.Add(entry);
         *          //entry.SetEntry( context.context.webDocumentByAssignedID[pair.Key], webDocIDToDomain[aID], webDocumentRegistry[aID], spaceDocumentRegistry[aID], textDocumentRegistry[aID]);
         *      }
         *
         *  }
         *
         *  // PREPARATION OF MODEL
         *
         *  model.Prepare(selectContext, log);
         *
         *  return selectContext;
         *
         * }
         */
        /*
         * /// <summary>
         * /// Prepares the context.
         * /// </summary>
         * /// <param name="space">The space.</param>
         * /// <param name="sites">The sites.</param>
         * /// <param name="documents">The documents.</param>
         * /// <param name="stemmingContext">The stemming context.</param>
         * /// <returns></returns>
         * public DocumentSelectResult PrepareContext(SpaceModel space, IEnumerable<WebSiteDocuments> sites, IEnumerable<TextDocument> documents, StemmingContext stemmingContext)
         * {
         *  DocumentSelectResult context = new DocumentSelectResult();
         *  context.query = query;
         *
         *  context.stemmingContext = stemmingContext;
         *  context.spaceModel = space;
         *
         *  List<String> associatedIDs = new List<string>();
         *
         *  Dictionary<String, TextDocument> textDocumentRegistry = new Dictionary<string, TextDocument>();
         *  foreach (TextDocument textDocument in documents)
         *  {
         *      textDocumentRegistry.Add(textDocument.name, textDocument);
         *  }
         *
         *  Dictionary<String, SpaceDocumentModel> spaceDocumentRegistry = new Dictionary<string, SpaceDocumentModel>();
         *  foreach (var textDocument in space.documents)
         *  {
         *      spaceDocumentRegistry.Add(textDocument.name, textDocument);
         *  }
         *
         *
         *  Dictionary<String, String> webDocIDToDomain = new Dictionary<string, string>();
         *
         *  Dictionary<String, WebSiteDocument> webDocumentRegistry = new Dictionary<string, WebSiteDocument>();
         *
         *  foreach (WebSiteDocuments site in sites)
         *  {
         *      context.domainNameToGraph.Add(site.domain, site.extensions.graph);
         *
         *      foreach (WebSiteDocument webDocument in site.documents)
         *      {
         *          webDocumentRegistry.Add(webDocument.AssociatedID, webDocument);
         *          associatedIDs.Add(webDocument.AssociatedID);
         *          webDocIDToDomain.Add(webDocument.AssociatedID, site.domain);
         *      }
         *  }
         *
         *  foreach (String aID in associatedIDs)
         *  {
         *      DocumentSelectResultEntry entry = new DocumentSelectResultEntry();
         *      entry.SetEntry(webDocIDToDomain[aID], webDocumentRegistry[aID], spaceDocumentRegistry[aID], textDocumentRegistry[aID]);
         *      context.Add(entry);
         *  }
         *
         *  return context;
         * }
         */


        public DocumentSelectResult ExecuteEvaluation(DocumentSelectResult context, ILogBuilder log)
        {
            // SCORE COMPUTATION
            foreach (IScoreModelFactor factor in model.Factors)
            {
                rangeFinder ranger = new rangeFinder();

                foreach (DocumentSelectResultEntry entry in context.items)
                {
                    Double score = factor.Score(entry, context, log);
                    entry.SetScore(factor, score);
                    if (score != Double.NaN)
                    {
                        if (factor.doNormalize)
                        {
                            ranger.Learn(score);
                        }
                    }
                }

                foreach (DocumentSelectResultEntry entry in context.items)
                {
                    Double score = entry.GetScore(factor);

                    if (ranger.Range != Double.NaN)
                    {
                        if (factor.doNormalize)
                        {
                            score = score - ranger.Minimum;

                            score = score / ranger.Range;
                        }
                    }
                    score = score * factor.weight;

                    entry.SetScore(factor, score, false);
                }
            }

            foreach (DocumentSelectResultEntry entry in context.items)
            {
                entry.SumFactorScores();
            }



            return(context);
        }
Exemple #6
0
 public void DoIndexNormalization(ILogBuilder log)
 {
     log.log("Global factor normalization [" + DoFactorNormalization + "]");
     if (DoFactorNormalization)
     {
         rangeFinder ranger = new rangeFinder();
         foreach (var pair in index)
         {
             ranger.Learn(pair.Value);
         }
         foreach (var pair in index.ToList())
         {
             index[pair.Key] = ranger.GetPositionInRange(pair.Value);
         }
         log.log("Global factor [" + shortName + "] range [" + ranger.Range.ToString("F5") + "] before normalization.");
     }
     else
     {
     }
 }
        /// <summary>
        /// Normalizes score within domain
        /// </summary>
        /// <param name="context">The context.</param>
        /// <param name="log">The log.</param>
        /// <returns></returns>
        public static Dictionary <String, rangeFinder> NormalizeWithinDomain(this IEnumerable <DocumentSelectResultEntry> context, ILogBuilder log)
        {
            Dictionary <String, List <DocumentSelectResultEntry> > byDomain = context.GetByDomain(log);
            Dictionary <String, rangeFinder> output = new Dictionary <string, rangeFinder>();

            foreach (var pair in byDomain)
            {
                rangeFinder ranger = new rangeFinder(pair.Key);

                foreach (DocumentSelectResultEntry entry in pair.Value)
                {
                    ranger.Learn(entry.score);
                }

                foreach (DocumentSelectResultEntry entry in pair.Value)
                {
                    entry.score = ranger.GetPositionInRange(entry.score);
                }

                output.Add(ranger.id, ranger);
            }
            return(output);
        }
        public static void SetReportDataFields(this classificationReport report, OperationContext context, Boolean afterFeatureSelection = false)
        {
            if (!afterFeatureSelection)
            {
                report.data.Add(nameof(ReportDataFieldEnum.labeled_terms), context.spaceModel.terms_known_label.Count.ToString(), "Number of labeled input terms");
                report.data.Add(nameof(ReportDataFieldEnum.unlabeled_terms), context.spaceModel.terms_unknown_label.Count.ToString(), "Number of unlabeled input terms");
            }
            else
            {
                report.data.Add(nameof(ReportDataFieldEnum.labeled_selected_terms), context.spaceModel.terms_known_label.Count.ToString(), "Number of labeled selected terms");
                report.data.Add(nameof(ReportDataFieldEnum.unlabeled_selected_terms), context.spaceModel.terms_unknown_label.Count.ToString(), "Number of unlabeled selected terms");

                report.data.Add(nameof(ReportDataFieldEnum.SelectedFeatures), context.SelectedFeatures.Count.ToString(), "Number of selected features");

                rangeFinder ranger = new rangeFinder();

                foreach (var pair in context.SelectedFeatures.index)
                {
                    ranger.Learn(pair.Value.weight);
                }

                report.data.Add(nameof(ReportDataFieldEnum.SelectedFeatureMin), ranger.Minimum.ToString("F5"), "Smallest weight of a selected feature");
            }
        }
Exemple #9
0
        /// <summary>
        /// Gets cluster collection
        /// </summary>
        /// <param name="collectionName">Name for the collection.</param>
        /// <param name="result">The result.</param>
        /// <param name="scoreSelector">The score selector.</param>
        /// <returns></returns>
        public DocumentClusterCollection GetClusters(DocumentSimilarityResult result, String collectionName = "Clusters", Func <DocumentSimilarityResultPair, Double> scoreSelector = null, Double minSimScore = Double.MinValue)
        {
            if (minSimScore == Double.MinValue)
            {
                minSimScore = settings.MinScoreInRangeCriterion;
            }

            if (scoreSelector == null)
            {
                scoreSelector = settings.SimilarityScoreSource.GetSelector();
            }

            DocumentClusterCollection output = new DocumentClusterCollection()
            {
                name = collectionName
            };

            var documents     = result.GetDocuments();
            var sortedResults = result.GetAllResults().OrderByDescending(x => scoreSelector).ToList();

            rangeFinder similarityRange = new rangeFinder();

            foreach (var pair in sortedResults)
            {
                similarityRange.Learn(scoreSelector(pair));
            }


            Int32 limit = documents.Count;
            Int32 i     = 0;

            while (documents.Any())
            {
                i++;
                var doc = documents.FirstOrDefault();
                if (doc == null)
                {
                    break;
                }

                var results = result.GetResultsFor(doc);

                DocumentCluster currentCluster = output.NewCluster <DocumentCluster>(); //new DocumentCluster();
                currentCluster.ClusterSeed = doc;

                foreach (KeyValuePair <HtmlNode, DocumentSimilarityResultPair> pair in results)
                {
                    Double scoreAtRange = similarityRange.GetPositionInRange(scoreSelector(pair.Value));
                    if (scoreAtRange > minSimScore)
                    {
                        currentCluster.Add(pair.Key, scoreAtRange);
                        documents.Remove(pair.Key);
                    }
                }

                if (currentCluster.items.Count == 0)
                {
                    output.NullCluster.Add(doc, 0);
                    documents.Remove(doc);
                }
                else
                {
                    documents.Remove(doc);
                    currentCluster.items.Add(doc);
                    output.AddCluster(currentCluster);
                }

                if (i > limit)
                {
                    break;
                }
            }

            foreach (var item in output.NullCluster.items)
            {
                var             results         = result.GetResultsFor(item);
                Double          maxScore        = Double.MinValue;
                DocumentCluster selectedCluster = null;

                foreach (var cluster in output.GetClusters <DocumentCluster>(false))
                {
                    Double score = scoreSelector(results[cluster.ClusterSeed]);
                    if (score > maxScore)
                    {
                        maxScore        = score;
                        selectedCluster = cluster;
                    }
                }

                if (similarityRange.GetPositionInRange(maxScore) > minSimScore)
                {
                    selectedCluster.Add(item, maxScore);
                    output.NullCluster.Remove(item);
                }
                else
                {
                }
            }



            if (settings.ExclusiveClusterMembership)
            {
                var itemToCluster = output.GetItemToClusterAssociations <DocumentCluster>();

                foreach (var pair in itemToCluster)
                {
                    if (pair.Value.Count > 1)
                    {
                        Dictionary <HtmlNode, DocumentSimilarityResultPair> results = result.GetResultsFor(pair.Key);
                        Double          maxScore        = Double.MinValue;
                        DocumentCluster selectedCluster = null;

                        foreach (var cluster in pair.Value)
                        {
                            Double score = scoreSelector(results[cluster.ClusterSeed]);
                            if (score > maxScore)
                            {
                                maxScore        = score;
                                selectedCluster = cluster;
                            }
                        }

                        foreach (var cluster in pair.Value)
                        {
                            if (cluster != selectedCluster)
                            {
                                cluster.Remove(pair.Key);
                            }
                        }
                    }
                }
            }

            output.RemoveEmptyClusters();


            return(output);
        }