public rangeFinder GetMinSimilarityRange() { rangeFinder minSimilarityRange = new rangeFinder(); minSimilarityRange.Learn(MinScoreInRangeCriterion); minSimilarityRange.Learn(MinScoreInRangeMaxCriterion); return(minSimilarityRange); }
/// <summary> /// Sets the ranger /// </summary> public rangeFinder DetectMinMax() { ranger = new rangeFinder(); if (ArtificialMaximum != 0) { ranger.Learn(ArtificialMaximum); } for (int x = 0; x < this.Count; x++) { for (int y = 0; y < this[x].Count; y++) { ranger.Learn(this[x][y]); } } return(ranger); }
/// <summary> /// Gets the range. /// </summary> /// <returns></returns> public rangeFinder GetRange() { rangeFinder output = new rangeFinder(); foreach (var pair in frequency) { output.Learn(pair.Value); } return(output); }
protected void Prepare() { var pointA = new ColorHSVPoint(HexColorA); var pointB = new ColorHSVPoint(HexColorB); RangeA.Learn(pointA.A); RangeA.Learn(pointB.A); RangeS.Learn(pointA.S); RangeS.Learn(pointB.S); RangeV.Learn(pointA.V); RangeV.Learn(pointB.V); RangeH.Learn(pointA.H); RangeH.Learn(pointB.H); BaseColor = pointA; }
/* * /// <summary> * /// Prepares the context. * /// </summary> * /// <param name="context">The context.</param> * /// <param name="log">The log.</param> * /// <returns></returns> * public DocumentSelectResult PrepareContext(OperationContext context, ILogBuilder log) * { * DocumentSelectResult selectContext = new DocumentSelectResult(); * selectContext.stemmingContext = context.stemmContext; * selectContext.spaceModel = context.spaceModel; * selectContext.query = query; * * selectContext.selectedFeatures = context.SelectedFeatures; * * foreach (KeyValuePair<string, WebSiteDocuments> pair in context.webSiteByDomain) * { * selectContext.domainNameToGraph.Add(pair.Key, pair.Value.extensions.graph); * * foreach (WebSiteDocument doc in pair.Value.documents) * { * DocumentSelectResultEntry entry = new DocumentSelectResultEntry(); * TextDocument text = context.textDocuments[doc.AssociatedID]; * SpaceDocumentModel spaceDocument = context.spaceModel.documents.FirstOrDefault(x => x.name == doc.AssociatedID); * * string dn = pair.Value.domain; * entry.SetEntry(dn, doc, spaceDocument, text); * selectContext.Add(entry); * //entry.SetEntry( context.context.webDocumentByAssignedID[pair.Key], webDocIDToDomain[aID], webDocumentRegistry[aID], spaceDocumentRegistry[aID], textDocumentRegistry[aID]); * } * * } * * // PREPARATION OF MODEL * * model.Prepare(selectContext, log); * * return selectContext; * * } */ /* * /// <summary> * /// Prepares the context. * /// </summary> * /// <param name="space">The space.</param> * /// <param name="sites">The sites.</param> * /// <param name="documents">The documents.</param> * /// <param name="stemmingContext">The stemming context.</param> * /// <returns></returns> * public DocumentSelectResult PrepareContext(SpaceModel space, IEnumerable<WebSiteDocuments> sites, IEnumerable<TextDocument> documents, StemmingContext stemmingContext) * { * DocumentSelectResult context = new DocumentSelectResult(); * context.query = query; * * context.stemmingContext = stemmingContext; * context.spaceModel = space; * * List<String> associatedIDs = new List<string>(); * * Dictionary<String, TextDocument> textDocumentRegistry = new Dictionary<string, TextDocument>(); * foreach (TextDocument textDocument in documents) * { * textDocumentRegistry.Add(textDocument.name, textDocument); * } * * Dictionary<String, SpaceDocumentModel> spaceDocumentRegistry = new Dictionary<string, SpaceDocumentModel>(); * foreach (var textDocument in space.documents) * { * spaceDocumentRegistry.Add(textDocument.name, textDocument); * } * * * Dictionary<String, String> webDocIDToDomain = new Dictionary<string, string>(); * * Dictionary<String, WebSiteDocument> webDocumentRegistry = new Dictionary<string, WebSiteDocument>(); * * foreach (WebSiteDocuments site in sites) * { * context.domainNameToGraph.Add(site.domain, site.extensions.graph); * * foreach (WebSiteDocument webDocument in site.documents) * { * webDocumentRegistry.Add(webDocument.AssociatedID, webDocument); * associatedIDs.Add(webDocument.AssociatedID); * webDocIDToDomain.Add(webDocument.AssociatedID, site.domain); * } * } * * foreach (String aID in associatedIDs) * { * DocumentSelectResultEntry entry = new DocumentSelectResultEntry(); * entry.SetEntry(webDocIDToDomain[aID], webDocumentRegistry[aID], spaceDocumentRegistry[aID], textDocumentRegistry[aID]); * context.Add(entry); * } * * return context; * } */ public DocumentSelectResult ExecuteEvaluation(DocumentSelectResult context, ILogBuilder log) { // SCORE COMPUTATION foreach (IScoreModelFactor factor in model.Factors) { rangeFinder ranger = new rangeFinder(); foreach (DocumentSelectResultEntry entry in context.items) { Double score = factor.Score(entry, context, log); entry.SetScore(factor, score); if (score != Double.NaN) { if (factor.doNormalize) { ranger.Learn(score); } } } foreach (DocumentSelectResultEntry entry in context.items) { Double score = entry.GetScore(factor); if (ranger.Range != Double.NaN) { if (factor.doNormalize) { score = score - ranger.Minimum; score = score / ranger.Range; } } score = score * factor.weight; entry.SetScore(factor, score, false); } } foreach (DocumentSelectResultEntry entry in context.items) { entry.SumFactorScores(); } return(context); }
public void DoIndexNormalization(ILogBuilder log) { log.log("Global factor normalization [" + DoFactorNormalization + "]"); if (DoFactorNormalization) { rangeFinder ranger = new rangeFinder(); foreach (var pair in index) { ranger.Learn(pair.Value); } foreach (var pair in index.ToList()) { index[pair.Key] = ranger.GetPositionInRange(pair.Value); } log.log("Global factor [" + shortName + "] range [" + ranger.Range.ToString("F5") + "] before normalization."); } else { } }
/// <summary> /// Normalizes score within domain /// </summary> /// <param name="context">The context.</param> /// <param name="log">The log.</param> /// <returns></returns> public static Dictionary <String, rangeFinder> NormalizeWithinDomain(this IEnumerable <DocumentSelectResultEntry> context, ILogBuilder log) { Dictionary <String, List <DocumentSelectResultEntry> > byDomain = context.GetByDomain(log); Dictionary <String, rangeFinder> output = new Dictionary <string, rangeFinder>(); foreach (var pair in byDomain) { rangeFinder ranger = new rangeFinder(pair.Key); foreach (DocumentSelectResultEntry entry in pair.Value) { ranger.Learn(entry.score); } foreach (DocumentSelectResultEntry entry in pair.Value) { entry.score = ranger.GetPositionInRange(entry.score); } output.Add(ranger.id, ranger); } return(output); }
public static void SetReportDataFields(this classificationReport report, OperationContext context, Boolean afterFeatureSelection = false) { if (!afterFeatureSelection) { report.data.Add(nameof(ReportDataFieldEnum.labeled_terms), context.spaceModel.terms_known_label.Count.ToString(), "Number of labeled input terms"); report.data.Add(nameof(ReportDataFieldEnum.unlabeled_terms), context.spaceModel.terms_unknown_label.Count.ToString(), "Number of unlabeled input terms"); } else { report.data.Add(nameof(ReportDataFieldEnum.labeled_selected_terms), context.spaceModel.terms_known_label.Count.ToString(), "Number of labeled selected terms"); report.data.Add(nameof(ReportDataFieldEnum.unlabeled_selected_terms), context.spaceModel.terms_unknown_label.Count.ToString(), "Number of unlabeled selected terms"); report.data.Add(nameof(ReportDataFieldEnum.SelectedFeatures), context.SelectedFeatures.Count.ToString(), "Number of selected features"); rangeFinder ranger = new rangeFinder(); foreach (var pair in context.SelectedFeatures.index) { ranger.Learn(pair.Value.weight); } report.data.Add(nameof(ReportDataFieldEnum.SelectedFeatureMin), ranger.Minimum.ToString("F5"), "Smallest weight of a selected feature"); } }
/// <summary> /// Gets cluster collection /// </summary> /// <param name="collectionName">Name for the collection.</param> /// <param name="result">The result.</param> /// <param name="scoreSelector">The score selector.</param> /// <returns></returns> public DocumentClusterCollection GetClusters(DocumentSimilarityResult result, String collectionName = "Clusters", Func <DocumentSimilarityResultPair, Double> scoreSelector = null, Double minSimScore = Double.MinValue) { if (minSimScore == Double.MinValue) { minSimScore = settings.MinScoreInRangeCriterion; } if (scoreSelector == null) { scoreSelector = settings.SimilarityScoreSource.GetSelector(); } DocumentClusterCollection output = new DocumentClusterCollection() { name = collectionName }; var documents = result.GetDocuments(); var sortedResults = result.GetAllResults().OrderByDescending(x => scoreSelector).ToList(); rangeFinder similarityRange = new rangeFinder(); foreach (var pair in sortedResults) { similarityRange.Learn(scoreSelector(pair)); } Int32 limit = documents.Count; Int32 i = 0; while (documents.Any()) { i++; var doc = documents.FirstOrDefault(); if (doc == null) { break; } var results = result.GetResultsFor(doc); DocumentCluster currentCluster = output.NewCluster <DocumentCluster>(); //new DocumentCluster(); currentCluster.ClusterSeed = doc; foreach (KeyValuePair <HtmlNode, DocumentSimilarityResultPair> pair in results) { Double scoreAtRange = similarityRange.GetPositionInRange(scoreSelector(pair.Value)); if (scoreAtRange > minSimScore) { currentCluster.Add(pair.Key, scoreAtRange); documents.Remove(pair.Key); } } if (currentCluster.items.Count == 0) { output.NullCluster.Add(doc, 0); documents.Remove(doc); } else { documents.Remove(doc); currentCluster.items.Add(doc); output.AddCluster(currentCluster); } if (i > limit) { break; } } foreach (var item in output.NullCluster.items) { var results = result.GetResultsFor(item); Double maxScore = Double.MinValue; DocumentCluster selectedCluster = null; foreach (var cluster in output.GetClusters <DocumentCluster>(false)) { Double score = scoreSelector(results[cluster.ClusterSeed]); if (score > maxScore) { maxScore = score; selectedCluster = cluster; } } if (similarityRange.GetPositionInRange(maxScore) > minSimScore) { selectedCluster.Add(item, maxScore); output.NullCluster.Remove(item); } else { } } if (settings.ExclusiveClusterMembership) { var itemToCluster = output.GetItemToClusterAssociations <DocumentCluster>(); foreach (var pair in itemToCluster) { if (pair.Value.Count > 1) { Dictionary <HtmlNode, DocumentSimilarityResultPair> results = result.GetResultsFor(pair.Key); Double maxScore = Double.MinValue; DocumentCluster selectedCluster = null; foreach (var cluster in pair.Value) { Double score = scoreSelector(results[cluster.ClusterSeed]); if (score > maxScore) { maxScore = score; selectedCluster = cluster; } } foreach (var cluster in pair.Value) { if (cluster != selectedCluster) { cluster.Remove(pair.Key); } } } } } output.RemoveEmptyClusters(); return(output); }