コード例 #1
0
        public rangeFinder GetMinSimilarityRange()
        {
            rangeFinder minSimilarityRange = new rangeFinder();

            minSimilarityRange.Learn(MinScoreInRangeCriterion);
            minSimilarityRange.Learn(MinScoreInRangeMaxCriterion);
            return(minSimilarityRange);
        }
コード例 #2
0
        /// <summary>
        /// Gets the range.
        /// </summary>
        /// <returns></returns>
        public rangeFinder GetRange()
        {
            rangeFinder output = new rangeFinder();

            foreach (var pair in frequency)
            {
                output.Learn(pair.Value);
            }
            return(output);
        }
コード例 #3
0
 public void Report(rangeFinder ranger, ITextRender output, String rangeName, String prefix)
 {
     output.AppendLine("Range [" + rangeName + "]");
     output.nextTabLevel();
     foreach (var pair in ranger.GetDictionary(prefix))
     {
         output.AppendPair(pair.Key, pair.Value.ToString("F3"));
     }
     output.prevTabLevel();
 }
コード例 #4
0
        /*
         * /// <summary>
         * /// Prepares the context.
         * /// </summary>
         * /// <param name="context">The context.</param>
         * /// <param name="log">The log.</param>
         * /// <returns></returns>
         * public DocumentSelectResult PrepareContext(OperationContext context, ILogBuilder log)
         * {
         *  DocumentSelectResult selectContext = new DocumentSelectResult();
         *  selectContext.stemmingContext = context.stemmContext;
         *  selectContext.spaceModel = context.spaceModel;
         *  selectContext.query = query;
         *
         *  selectContext.selectedFeatures = context.SelectedFeatures;
         *
         *  foreach (KeyValuePair<string, WebSiteDocuments> pair in context.webSiteByDomain)
         *  {
         *      selectContext.domainNameToGraph.Add(pair.Key, pair.Value.extensions.graph);
         *
         *      foreach (WebSiteDocument doc in pair.Value.documents)
         *      {
         *          DocumentSelectResultEntry entry = new DocumentSelectResultEntry();
         *          TextDocument text = context.textDocuments[doc.AssociatedID];
         *          SpaceDocumentModel spaceDocument = context.spaceModel.documents.FirstOrDefault(x => x.name == doc.AssociatedID);
         *
         *          string dn = pair.Value.domain;
         *          entry.SetEntry(dn, doc, spaceDocument, text);
         *          selectContext.Add(entry);
         *          //entry.SetEntry( context.context.webDocumentByAssignedID[pair.Key], webDocIDToDomain[aID], webDocumentRegistry[aID], spaceDocumentRegistry[aID], textDocumentRegistry[aID]);
         *      }
         *
         *  }
         *
         *  // PREPARATION OF MODEL
         *
         *  model.Prepare(selectContext, log);
         *
         *  return selectContext;
         *
         * }
         */
        /*
         * /// <summary>
         * /// Prepares the context.
         * /// </summary>
         * /// <param name="space">The space.</param>
         * /// <param name="sites">The sites.</param>
         * /// <param name="documents">The documents.</param>
         * /// <param name="stemmingContext">The stemming context.</param>
         * /// <returns></returns>
         * public DocumentSelectResult PrepareContext(SpaceModel space, IEnumerable<WebSiteDocuments> sites, IEnumerable<TextDocument> documents, StemmingContext stemmingContext)
         * {
         *  DocumentSelectResult context = new DocumentSelectResult();
         *  context.query = query;
         *
         *  context.stemmingContext = stemmingContext;
         *  context.spaceModel = space;
         *
         *  List<String> associatedIDs = new List<string>();
         *
         *  Dictionary<String, TextDocument> textDocumentRegistry = new Dictionary<string, TextDocument>();
         *  foreach (TextDocument textDocument in documents)
         *  {
         *      textDocumentRegistry.Add(textDocument.name, textDocument);
         *  }
         *
         *  Dictionary<String, SpaceDocumentModel> spaceDocumentRegistry = new Dictionary<string, SpaceDocumentModel>();
         *  foreach (var textDocument in space.documents)
         *  {
         *      spaceDocumentRegistry.Add(textDocument.name, textDocument);
         *  }
         *
         *
         *  Dictionary<String, String> webDocIDToDomain = new Dictionary<string, string>();
         *
         *  Dictionary<String, WebSiteDocument> webDocumentRegistry = new Dictionary<string, WebSiteDocument>();
         *
         *  foreach (WebSiteDocuments site in sites)
         *  {
         *      context.domainNameToGraph.Add(site.domain, site.extensions.graph);
         *
         *      foreach (WebSiteDocument webDocument in site.documents)
         *      {
         *          webDocumentRegistry.Add(webDocument.AssociatedID, webDocument);
         *          associatedIDs.Add(webDocument.AssociatedID);
         *          webDocIDToDomain.Add(webDocument.AssociatedID, site.domain);
         *      }
         *  }
         *
         *  foreach (String aID in associatedIDs)
         *  {
         *      DocumentSelectResultEntry entry = new DocumentSelectResultEntry();
         *      entry.SetEntry(webDocIDToDomain[aID], webDocumentRegistry[aID], spaceDocumentRegistry[aID], textDocumentRegistry[aID]);
         *      context.Add(entry);
         *  }
         *
         *  return context;
         * }
         */


        public DocumentSelectResult ExecuteEvaluation(DocumentSelectResult context, ILogBuilder log)
        {
            // SCORE COMPUTATION
            foreach (IScoreModelFactor factor in model.Factors)
            {
                rangeFinder ranger = new rangeFinder();

                foreach (DocumentSelectResultEntry entry in context.items)
                {
                    Double score = factor.Score(entry, context, log);
                    entry.SetScore(factor, score);
                    if (score != Double.NaN)
                    {
                        if (factor.doNormalize)
                        {
                            ranger.Learn(score);
                        }
                    }
                }

                foreach (DocumentSelectResultEntry entry in context.items)
                {
                    Double score = entry.GetScore(factor);

                    if (ranger.Range != Double.NaN)
                    {
                        if (factor.doNormalize)
                        {
                            score = score - ranger.Minimum;

                            score = score / ranger.Range;
                        }
                    }
                    score = score * factor.weight;

                    entry.SetScore(factor, score, false);
                }
            }

            foreach (DocumentSelectResultEntry entry in context.items)
            {
                entry.SumFactorScores();
            }



            return(context);
        }
コード例 #5
0
        /// <summary>
        /// Sets the ranger
        /// </summary>
        public rangeFinder DetectMinMax()
        {
            ranger = new rangeFinder();

            if (ArtificialMaximum != 0)
            {
                ranger.Learn(ArtificialMaximum);
            }
            for (int x = 0; x < this.Count; x++)
            {
                for (int y = 0; y < this[x].Count; y++)
                {
                    ranger.Learn(this[x][y]);
                }
            }

            return(ranger);
        }
コード例 #6
0
 public void DoIndexNormalization(ILogBuilder log)
 {
     log.log("Global factor normalization [" + DoFactorNormalization + "]");
     if (DoFactorNormalization)
     {
         rangeFinder ranger = new rangeFinder();
         foreach (var pair in index)
         {
             ranger.Learn(pair.Value);
         }
         foreach (var pair in index.ToList())
         {
             index[pair.Key] = ranger.GetPositionInRange(pair.Value);
         }
         log.log("Global factor [" + shortName + "] range [" + ranger.Range.ToString("F5") + "] before normalization.");
     }
     else
     {
     }
 }
コード例 #7
0
        /// <summary>
        /// Normalizes score within domain
        /// </summary>
        /// <param name="context">The context.</param>
        /// <param name="log">The log.</param>
        /// <returns></returns>
        public static Dictionary <String, rangeFinder> NormalizeWithinDomain(this IEnumerable <DocumentSelectResultEntry> context, ILogBuilder log)
        {
            Dictionary <String, List <DocumentSelectResultEntry> > byDomain = context.GetByDomain(log);
            Dictionary <String, rangeFinder> output = new Dictionary <string, rangeFinder>();

            foreach (var pair in byDomain)
            {
                rangeFinder ranger = new rangeFinder(pair.Key);

                foreach (DocumentSelectResultEntry entry in pair.Value)
                {
                    ranger.Learn(entry.score);
                }

                foreach (DocumentSelectResultEntry entry in pair.Value)
                {
                    entry.score = ranger.GetPositionInRange(entry.score);
                }

                output.Add(ranger.id, ranger);
            }
            return(output);
        }
コード例 #8
0
        public static void SetReportDataFields(this classificationReport report, OperationContext context, Boolean afterFeatureSelection = false)
        {
            if (!afterFeatureSelection)
            {
                report.data.Add(nameof(ReportDataFieldEnum.labeled_terms), context.spaceModel.terms_known_label.Count.ToString(), "Number of labeled input terms");
                report.data.Add(nameof(ReportDataFieldEnum.unlabeled_terms), context.spaceModel.terms_unknown_label.Count.ToString(), "Number of unlabeled input terms");
            }
            else
            {
                report.data.Add(nameof(ReportDataFieldEnum.labeled_selected_terms), context.spaceModel.terms_known_label.Count.ToString(), "Number of labeled selected terms");
                report.data.Add(nameof(ReportDataFieldEnum.unlabeled_selected_terms), context.spaceModel.terms_unknown_label.Count.ToString(), "Number of unlabeled selected terms");

                report.data.Add(nameof(ReportDataFieldEnum.SelectedFeatures), context.SelectedFeatures.Count.ToString(), "Number of selected features");

                rangeFinder ranger = new rangeFinder();

                foreach (var pair in context.SelectedFeatures.index)
                {
                    ranger.Learn(pair.Value.weight);
                }

                report.data.Add(nameof(ReportDataFieldEnum.SelectedFeatureMin), ranger.Minimum.ToString("F5"), "Smallest weight of a selected feature");
            }
        }
コード例 #9
0
        /// <summary>
        /// Renders the specified <see cref="HeatMapModel"/>, optionally saves the output SVG
        /// </summary>
        /// <param name="model">The model.</param>
        /// <param name="filePath">The file path.</param>
        /// <returns></returns>
        public Svg.SvgDocument Render(HeatMapModel model, String filePath = "")
        {
            rangeFinder valueRange = model.DetectMinMax();

            var lColor = style.LowColor.GetColorVersionWithAlpha(style.MinOpacity);  //.ColorToHex();
            var hColor = style.HighColor.GetColorVersionWithAlpha(style.MaxOpacity); //.ColorToHex();



            ColorGradient colorGradient = new ColorGradient(lColor, hColor, ColorGradientFunction.AllAToB);


            cursorZoneSpatialSettings format = style.fieldContainer.GetFormatSetup();

            format.spatialUnit       = 8;
            format.spatialUnitHeight = 10;

            Int32 width  = (model.weight * format.width) + format.margin.right;
            Int32 height = (model.height * format.height) + format.margin.bottom;

            Svg.SvgDocument output = new Svg.SvgDocument
            {
                Width  = width,
                Height = height,
                Ppi    = 100
            };

            var mainContainer = new SvgGroup();

            output.Children.Add(mainContainer);

            //(new SvgLength(width), new SvgLength(height));

            // output.ViewBox = new SvgViewBox(-100, -100, width+100, height+100);

            var group = new SvgGroup();

            mainContainer.Children.Add(group);

            var layerTwo = new SvgGroup();

            mainContainer.Children.Add(layerTwo);

            prepareLabels(model);

            var hor = new SvgGroup();

            if (style.options.HasFlag(HeatMapRenderOptions.addHorizontalLabels))
            {
                layerTwo.Children.Add(hor);
            }

            for (int x = 0; x < model.weight; x++)
            {
                Int32 xStart = x * format.width;

                Svg.SvgText label = xLabels[x].GetSvgText(format, x, -1);

                //Svg.SvgText label = new SvgText(xLabels[x])
                //{
                //    X = (xStart + (format.width / 2) - format.margin.right).Get_px(),
                //    Y = (-format.height / 2).Get_px(),
                //    Color = new SvgColourServer(Color.Black),
                //    Font = "Gulliver"

                //};

                hor.Children.Add(label);

                var vert = new SvgGroup();
                layerTwo.Children.Add(vert);

                var vertLabels = new SvgGroup();
                var vertValues = new SvgGroup();
                var vertScale  = new SvgGroup();

                if (style.options.HasFlag(HeatMapRenderOptions.addVerticalLabels))
                {
                    vert.Children.Add(vertLabels);
                }
                if (style.options.HasFlag(HeatMapRenderOptions.addVerticalValueScale))
                {
                    vert.Children.Add(vertScale);
                }
                if (style.options.HasFlag(HeatMapRenderOptions.addVerticalValueScale))
                {
                    vert.Children.Add(vertValues);
                }

                for (int y = 0; y < model.height; y++)
                {
                    Int32 yStart = y * format.height;

                    if (x == 0)
                    {
                        Double ratio        = valueRange.GetPositionInRange(y); //model.GetRatioForScale(y, style.minimalOpacity, model.height); //(1+ style.minimalOpacity).GetRatio(y+1);
                        Double scaleFactor2 = ratio;
                        if (!style.options.HasFlag(HeatMapRenderOptions.resizeFields))
                        {
                            scaleFactor2 = 1;
                        }

                        if (ratio > 1)
                        {
                            ratio = 1;
                        }
                        var lbl2 = format.GetRectangle((-format.width * 2), yStart, colorGradient.GetColor(ratio), Convert.ToSingle(ratio), scaleFactor2);
                        vertScale.Children.Add(lbl2);

                        Svg.SvgText label2 = yLabels[y].GetSvgText(format, -1, y);
                        vertLabels.Children.Add(label2);

                        //Svg.SvgText label = xLabels[x].GetSvgText(format, x, -1);

                        //Svg.SvgText label2 = new SvgText(yLabels[y])
                        //{
                        //    X = (format.margin.left - format.width).Get_px(),
                        //    Y = (yStart+(format.height / 2)).Get_px(),
                        //    Color = new SvgColourServer(Color.Black),

                        //    Font = "Gulliver"
                        //};

                        Int32 xp = Convert.ToInt32((-Convert.ToDouble(format.width) * 2.5) + format.margin.left);

                        //Double vl = (1.GetRatio(y + 1)) * model.ranger.Maximum;

                        Double vl = model.GetValueForScaleY(y);

                        Svg.SvgText value = vl.ToString(style.valueFormat).GetSvgText(format, -3, y);

                        //Svg.SvgText value = new SvgText()
                        //{
                        //    X = (xp- format.margin.right).Get_px(),
                        //    Y = (yStart + (format.height / 2) ).Get_px(),
                        //    Fill = new SvgColourServer(Color.Black),
                        //   // Color = new SvgColourServer(Color.White),
                        //    Font = "Gulliver"
                        //};

                        vertValues.Children.Add(value);
                    }

                    Double val  = valueRange.GetPositionInRange(model[x, y]); // model.GetRatioValue(x, y, style.minimalOpacity);
                    Color  valC = colorGradient.GetColor(val);


                    Double scaleFactor = val;

                    if (!style.options.HasFlag(HeatMapRenderOptions.resizeFields))
                    {
                        scaleFactor = 1;
                    }
                    var rct = format.GetRectangle(xStart, yStart, valC, Convert.ToSingle(val), scaleFactor);

                    group.Children.Add(rct);
                }
            }

            if (!filePath.isNullOrEmpty())
            {
                if (!filePath.EndsWith(".svg", true, CultureInfo.CurrentCulture))
                {
                    filePath += ".svg";
                }


                output.Save(filePath);

                //  throw new NotImplementedException();

                /* var code = output.GetXML();  //Encoding.UTF8.GetString(stream.GetBuffer());
                 *
                 */
            }

            return(output);
        }
コード例 #10
0
        /// <summary>
        /// Gets cluster collection
        /// </summary>
        /// <param name="collectionName">Name for the collection.</param>
        /// <param name="result">The result.</param>
        /// <param name="scoreSelector">The score selector.</param>
        /// <returns></returns>
        public DocumentClusterCollection GetClusters(DocumentSimilarityResult result, String collectionName = "Clusters", Func <DocumentSimilarityResultPair, Double> scoreSelector = null, Double minSimScore = Double.MinValue)
        {
            if (minSimScore == Double.MinValue)
            {
                minSimScore = settings.MinScoreInRangeCriterion;
            }

            if (scoreSelector == null)
            {
                scoreSelector = settings.SimilarityScoreSource.GetSelector();
            }

            DocumentClusterCollection output = new DocumentClusterCollection()
            {
                name = collectionName
            };

            var documents     = result.GetDocuments();
            var sortedResults = result.GetAllResults().OrderByDescending(x => scoreSelector).ToList();

            rangeFinder similarityRange = new rangeFinder();

            foreach (var pair in sortedResults)
            {
                similarityRange.Learn(scoreSelector(pair));
            }


            Int32 limit = documents.Count;
            Int32 i     = 0;

            while (documents.Any())
            {
                i++;
                var doc = documents.FirstOrDefault();
                if (doc == null)
                {
                    break;
                }

                var results = result.GetResultsFor(doc);

                DocumentCluster currentCluster = output.NewCluster <DocumentCluster>(); //new DocumentCluster();
                currentCluster.ClusterSeed = doc;

                foreach (KeyValuePair <HtmlNode, DocumentSimilarityResultPair> pair in results)
                {
                    Double scoreAtRange = similarityRange.GetPositionInRange(scoreSelector(pair.Value));
                    if (scoreAtRange > minSimScore)
                    {
                        currentCluster.Add(pair.Key, scoreAtRange);
                        documents.Remove(pair.Key);
                    }
                }

                if (currentCluster.items.Count == 0)
                {
                    output.NullCluster.Add(doc, 0);
                    documents.Remove(doc);
                }
                else
                {
                    documents.Remove(doc);
                    currentCluster.items.Add(doc);
                    output.AddCluster(currentCluster);
                }

                if (i > limit)
                {
                    break;
                }
            }

            foreach (var item in output.NullCluster.items)
            {
                var             results         = result.GetResultsFor(item);
                Double          maxScore        = Double.MinValue;
                DocumentCluster selectedCluster = null;

                foreach (var cluster in output.GetClusters <DocumentCluster>(false))
                {
                    Double score = scoreSelector(results[cluster.ClusterSeed]);
                    if (score > maxScore)
                    {
                        maxScore        = score;
                        selectedCluster = cluster;
                    }
                }

                if (similarityRange.GetPositionInRange(maxScore) > minSimScore)
                {
                    selectedCluster.Add(item, maxScore);
                    output.NullCluster.Remove(item);
                }
                else
                {
                }
            }



            if (settings.ExclusiveClusterMembership)
            {
                var itemToCluster = output.GetItemToClusterAssociations <DocumentCluster>();

                foreach (var pair in itemToCluster)
                {
                    if (pair.Value.Count > 1)
                    {
                        Dictionary <HtmlNode, DocumentSimilarityResultPair> results = result.GetResultsFor(pair.Key);
                        Double          maxScore        = Double.MinValue;
                        DocumentCluster selectedCluster = null;

                        foreach (var cluster in pair.Value)
                        {
                            Double score = scoreSelector(results[cluster.ClusterSeed]);
                            if (score > maxScore)
                            {
                                maxScore        = score;
                                selectedCluster = cluster;
                            }
                        }

                        foreach (var cluster in pair.Value)
                        {
                            if (cluster != selectedCluster)
                            {
                                cluster.Remove(pair.Key);
                            }
                        }
                    }
                }
            }

            output.RemoveEmptyClusters();


            return(output);
        }