Ejemplo n.º 1
0
        private static string GetContentType(string fileName)
        {
            string contentType;
            var    extension = Path.GetExtension(fileName);

            return(ContentMappings.TryGetValue(extension, out contentType) ? contentType : "");
        }
        public Tuple <bool, string> GetTrainingData()
        {
            //get tags from taxonomy mapping folder field
            var tagItems     = ContentSearchService.GetTagsByTemplate(TaxonomyFolderId, LanguageCode, InnerItem.Database.Name, TaxonomyItemTemplateIds);
            var results      = new Dictionary <Guid, string>();
            var trainingData = new List <string>();

            foreach (var tag in tagItems)
            {
                var contentTypes = ContentMappings
                                   .Select(a => a.TemplateField.Guid).ToDictionary(a => a);

                //show on images that have no face item pointing to them
                var links        = Globals.LinkDatabase.GetItemReferrers(tag.GetItem(), false);
                var contentItems = links
                                   .Select(a => a.GetSourceItem())
                                   .Where(a => contentTypes.ContainsKey(a.TemplateID.Guid))
                                   .Take(ItemTrainingCount)
                                   .ToList();

                //todo: if there aren't enough content items for any given tag, we could log that and mention it as an issue for training
                foreach (var c in contentItems)
                {
                    //dedupe
                    if (results.ContainsKey(c.ID.Guid))
                    {
                        continue;
                    }

                    results.Add(c.ID.Guid, string.Empty);

                    //pull content for the items
                    var contentMap  = ContentMappings.First(a => a.TemplateField.Guid == c.TemplateID.Guid);
                    var trainingRow = ContentService.GetTrainingData(c, SourceTagsFieldId, contentMap.ContentFields);
                    if (string.IsNullOrWhiteSpace(trainingRow))
                    {
                        continue;
                    }

                    trainingData.Add(trainingRow);
                }
            }

            if (trainingData.Count == 0)
            {
                return(Tuple.Create(false, "There was no training data"));
            }

            var trainingDataString = string.Join(Environment.NewLine, trainingData);

            DataWrapper.UpdateFields(InnerItem, new Dictionary <ID, string>
            {
                { Settings.TrainingDataFieldId, trainingDataString }
            });

            //var range = ItemTrainingCount * tagItems.Count;
            //if (trainingData.Count < range)
            //    return Tuple.Create(false, $"There's only {trainingData.Count} of the required {range} items. {ItemTrainingCount} items per tag (change in settings).");

            return(Tuple.Create(true, trainingDataString));
        }
 public override bool SupportsThisItem(Item itemToTag)
 {
     return(ContentMappings.Any(c => c.TemplateField.Guid == itemToTag.TemplateID.Guid));
 }
        public Tuple <bool, string> TestClassifier()
        {
            var database = InnerItem.Database.Name;

            //start testing content for accuracy
            var testItems = ContentSearchService.GetContent(database, LanguageCode, ContentMappings.Select(a => a.TemplateField).ToList(), ItemTestingCount);

            if (testItems.Count < ItemTestingCount)
            {
                return(Tuple.Create(false, $"There's only {testItems.Count} of the required {ItemTestingCount} items (change in settings)."));
            }

            var mappings      = ContentService.GetContentMappings(database);
            var accuracySum   = 0f;
            var overageSum    = 0f;
            var confidenceSum = 0f;
            var counted       = 0;

            foreach (var t in testItems)
            {
                if (!mappings.ContainsKey(t.TemplateId.Guid))
                {
                    continue;
                }

                var  map         = mappings[t.TemplateId.Guid];
                Item contentItem = DataWrapper.GetItemById(t.ItemId, database);
                if (contentItem == null)
                {
                    continue;
                }

                var testingTags    = ContentService.GetTags(contentItem, SourceTagsFieldId);
                var testingContent = ContentService.GetTrimmedContent(contentItem, map.ContentFields);
                if (!testingTags.Any() || string.IsNullOrWhiteSpace(testingContent))
                {
                    continue;
                }

                var tags = NaturalLanguageClassifier.Classify(ClassifierId, testingContent)?.classes;
                if (tags == null || !tags.Any())
                {
                    continue;
                }

                var suggestedTagNames = tags.Select(a => a.class_name).ToList();
                var confidenceScores  = tags.Where(a => testingTags.Contains(a.class_name)).Select(b => b.confidence);
                var tagsMatched       = testingTags.Count(a => suggestedTagNames.Contains(a));
                var tagAccuracy       = (float)tagsMatched / testingTags.Count;
                accuracySum   += tagAccuracy;
                overageSum    += (suggestedTagNames.Count > testingTags.Count) ? suggestedTagNames.Count - testingTags.Count : 0;
                confidenceSum += confidenceScores.Average();
                counted++;
            }

            var accuracy    = (float)accuracySum / counted;
            var accuracyStr = $"{accuracy * 100:F0}%";

            var overage    = (float)overageSum / counted;
            var overageStr = $"{overage:F2}";

            var confidence    = (float)confidenceSum / counted;
            var confidenceStr = $"{confidence * 100:F0}%";

            DataWrapper.UpdateFields(InnerItem, new Dictionary <ID, string>
            {
                { Settings.AccuracyFieldId, accuracyStr },
                { Settings.OverageFieldId, overageStr },
                { Settings.ConfidenceFieldId, confidenceStr }
            });

            return(Tuple.Create(true, ""));
        }