public Tuple <bool, string> GetTrainingData() { //get tags from taxonomy mapping folder field var tagItems = ContentSearchService.GetTagsByTemplate(TaxonomyFolderId, LanguageCode, InnerItem.Database.Name, TaxonomyItemTemplateIds); var results = new Dictionary <Guid, string>(); var trainingData = new List <string>(); foreach (var tag in tagItems) { var contentTypes = ContentMappings .Select(a => a.TemplateField.Guid).ToDictionary(a => a); //show on images that have no face item pointing to them var links = Globals.LinkDatabase.GetItemReferrers(tag.GetItem(), false); var contentItems = links .Select(a => a.GetSourceItem()) .Where(a => contentTypes.ContainsKey(a.TemplateID.Guid)) .Take(ItemTrainingCount) .ToList(); //todo: if there aren't enough content items for any given tag, we could log that and mention it as an issue for training foreach (var c in contentItems) { //dedupe if (results.ContainsKey(c.ID.Guid)) { continue; } results.Add(c.ID.Guid, string.Empty); //pull content for the items var contentMap = ContentMappings.First(a => a.TemplateField.Guid == c.TemplateID.Guid); var trainingRow = ContentService.GetTrainingData(c, SourceTagsFieldId, contentMap.ContentFields); if (string.IsNullOrWhiteSpace(trainingRow)) { continue; } trainingData.Add(trainingRow); } } if (trainingData.Count == 0) { return(Tuple.Create(false, "There was no training data")); } var trainingDataString = string.Join(Environment.NewLine, trainingData); DataWrapper.UpdateFields(InnerItem, new Dictionary <ID, string> { { Settings.TrainingDataFieldId, trainingDataString } }); //var range = ItemTrainingCount * tagItems.Count; //if (trainingData.Count < range) // return Tuple.Create(false, $"There's only {trainingData.Count} of the required {range} items. {ItemTrainingCount} items per tag (change in settings)."); return(Tuple.Create(true, trainingDataString)); }
public Tuple <bool, string> TestClassifier() { var database = InnerItem.Database.Name; //start testing content for accuracy var testItems = ContentSearchService.GetContent(database, LanguageCode, ContentMappings.Select(a => a.TemplateField).ToList(), ItemTestingCount); if (testItems.Count < ItemTestingCount) { return(Tuple.Create(false, $"There's only {testItems.Count} of the required {ItemTestingCount} items (change in settings).")); } var mappings = ContentService.GetContentMappings(database); var accuracySum = 0f; var overageSum = 0f; var confidenceSum = 0f; var counted = 0; foreach (var t in testItems) { if (!mappings.ContainsKey(t.TemplateId.Guid)) { continue; } var map = mappings[t.TemplateId.Guid]; Item contentItem = DataWrapper.GetItemById(t.ItemId, database); if (contentItem == null) { continue; } var testingTags = ContentService.GetTags(contentItem, SourceTagsFieldId); var testingContent = ContentService.GetTrimmedContent(contentItem, map.ContentFields); if (!testingTags.Any() || string.IsNullOrWhiteSpace(testingContent)) { continue; } var tags = NaturalLanguageClassifier.Classify(ClassifierId, testingContent)?.classes; if (tags == null || !tags.Any()) { continue; } var suggestedTagNames = tags.Select(a => a.class_name).ToList(); var confidenceScores = tags.Where(a => testingTags.Contains(a.class_name)).Select(b => b.confidence); var tagsMatched = testingTags.Count(a => suggestedTagNames.Contains(a)); var tagAccuracy = (float)tagsMatched / testingTags.Count; accuracySum += tagAccuracy; overageSum += (suggestedTagNames.Count > testingTags.Count) ? suggestedTagNames.Count - testingTags.Count : 0; confidenceSum += confidenceScores.Average(); counted++; } var accuracy = (float)accuracySum / counted; var accuracyStr = $"{accuracy * 100:F0}%"; var overage = (float)overageSum / counted; var overageStr = $"{overage:F2}"; var confidence = (float)confidenceSum / counted; var confidenceStr = $"{confidence * 100:F0}%"; DataWrapper.UpdateFields(InnerItem, new Dictionary <ID, string> { { Settings.AccuracyFieldId, accuracyStr }, { Settings.OverageFieldId, overageStr }, { Settings.ConfidenceFieldId, confidenceStr } }); return(Tuple.Create(true, "")); }