public static ItemMapperSettings FromDeserialized(IDeserializedData deserializedData) { var settings = new ItemMapperSettings(); GetSettingsFromCatgoryHierarchy(settings, deserializedData.CategoryHierarchy); GetSettingsFromItems(settings, deserializedData.Items); return(settings); }
private static void GetSettingsFromCatgoryHierarchy(ItemMapperSettings settings, IDictionary <string, IEnumerable <string> > categoryHierarchy) { settings.CategoryCount = categoryHierarchy.Keys.Count * 10 + categoryHierarchy.Values.SelectMany(subCategories => subCategories).Count(); settings.SubCategoryIndex = categoryHierarchy.ToDictionary(pair => pair.Key, pair => pair.Value.ToDictionary(subCategory => subCategory, subCategory => pair.Value.ToList().IndexOf(subCategory))); var categoryIndex = new Dictionary <string, int>(); var index = 0; foreach (var category in categoryHierarchy.Keys) { categoryIndex.Add(category, index); index += categoryHierarchy[category].Count(); index += 10; } settings.CategoryIndex = categoryIndex; }
// TODO: FOr colors, money usw.: only one range for all attributes private static void GetSettingsFromItems(ItemMapperSettings settings, IEnumerable <LostAndFoundIndexedItem> items) { var colorConverter = new ColorConverter(); foreach (var item in items) { if (settings.OldestDate > item.DateOfIncident) { settings.OldestDate = item.DateOfIncident; } if (settings.NewestDate < item.DateOfIncident) { settings.OldestDate = item.DateOfIncident; } foreach (var attribute in item.Attributes) { if (attribute is ColorValueAttribute) { var convertFromInvariantString = colorConverter.ConvertFromInvariantString(attribute.GetValue().ToString()) as Color?; if (convertFromInvariantString == null) { continue; } var colorObject = convertFromInvariantString.Value; var rgb = new Rgb { R = colorObject.R, G = colorObject.G, B = colorObject.B }; var lab = rgb.To <Lab>(); IColorAttributeMapperSettings colorAttributeSettings; if (settings.ColorAttributes.ContainsKey(attribute.ID)) { colorAttributeSettings = settings.ColorAttributes[attribute.ID]; } else { colorAttributeSettings = new ColorAttributeMapperSettings(); settings.ColorAttributes.Add(attribute.ID, colorAttributeSettings); } if (lab.L < colorAttributeSettings.LuminescenceSettings.MinValue) { colorAttributeSettings.LuminescenceSettings.MinValue = lab.L; } if (lab.L > colorAttributeSettings.LuminescenceSettings.MaxValue) { colorAttributeSettings.LuminescenceSettings.MaxValue = lab.L; } if (lab.A < colorAttributeSettings.ASettings.MinValue) { colorAttributeSettings.ASettings.MinValue = lab.A; } if (lab.A > colorAttributeSettings.ASettings.MaxValue) { colorAttributeSettings.ASettings.MaxValue = lab.A; } if (lab.B < colorAttributeSettings.BSettings.MinValue) { colorAttributeSettings.BSettings.MinValue = lab.B; } if (lab.B > colorAttributeSettings.BSettings.MaxValue) { colorAttributeSettings.BSettings.MaxValue = lab.B; } continue; } IAttributeMapperSettings attributeSettings; if (settings.Attributes.ContainsKey(attribute.ID)) { attributeSettings = settings.Attributes[attribute.ID]; } else { attributeSettings = new AttributeMapperSettings(); settings.Attributes.Add(attribute.ID, attributeSettings); } attributeSettings.DataCount++; var value = attribute.GetValue(); if (value is string) { continue; } var moneyValue = value as MoneyValue; if (moneyValue != null) { if (Convert.ToDouble(moneyValue.Value) < attributeSettings.MinValue) { attributeSettings.MinValue = Convert.ToDouble(moneyValue.Value); } if (Convert.ToDouble(moneyValue.Value) > attributeSettings.MaxValue) { attributeSettings.MaxValue = Convert.ToDouble(moneyValue.Value); } continue; } var convertible = value as IConvertible; if (convertible == null) { continue; } var doubleValue = convertible.ToDouble(CultureInfo.InvariantCulture); if (doubleValue < attributeSettings.MinValue) { attributeSettings.MinValue = doubleValue; } if (doubleValue > attributeSettings.MaxValue) { attributeSettings.MaxValue = doubleValue; } } } settings.DateRange = settings.NewestDate - settings.OldestDate; }
public LearningData Load() { var stopWatch = new Stopwatch(); stopWatch.Start(); var deserializer = new ReindexDeserializer(Filename); var deserializedData = deserializer.Deserialize(); var items = deserializedData.Items; if (!UseLegacyData) { items = items.Where(item => !item.IsLegacyObject).ToList(); } stopWatch.Stop(); Logger.DebugFormat("JSON deserialization took {0}", stopWatch.Elapsed); stopWatch.Restart(); // Logger.DebugFormat("Public attributes: " + string.Join(", ", items.SelectMany(item => item.PublicAttributes.Select(attr => attr.Name.DE)).Distinct())); // Logger.DebugFormat("Attributes: {0}", string.Join(", ", items.SelectMany(item => item.Attributes.Select(attr => attr.ID).Distinct()))); // Logger.DebugFormat("Categories: {0}", string.Join(", ", items.Select(item => item.CategoryID))); // Logger.DebugFormat("Attributes: {0}", string.Join(", ", deserializedData.AttributeMetadata.Select(metadata => metadata.Attribute.GetType().FullName).Distinct())); // var enumValues = items.SelectMany(item => item.Attributes.OfType<EnumValueAttribute>()); // Logger.DebugFormat("EnumValues: {0}", string.Join(", ", enumValues.Select(enumValue => $"ID: {enumValue.ID} Value: {enumValue.Value}"))); // Logger.DebugFormat("Dates: {0}", string.Join(", ", items.GroupBy(item => item.DateOfIncident.ToShortDateString()).OrderByDescending(group => group.Count()).Select(group => $"{group.Key}: {group.Count()}"))); // Logger.DebugFormat("Attributes with ID null: {0}", // string.Join(", ", deserializedData.AttributeMetadata.Where(attr => attr.Attribute.ID == null).Select(attr => attr.Attribute.GetType().FullName))); // Logger.DebugFormat("MoneyValue with ID: {0} Without: {1}", // deserializedData.AttributeMetadata.Count(attr => attr.Attribute is MoneyValueAttribute && attr.Attribute.ID != null), // deserializedData.AttributeMetadata.Count(attr => attr.Attribute is MoneyValueAttribute && attr.Attribute.ID == null)); // Logger.DebugFormat("Items with more than one color: {0}", items.Count(item => item.Attributes.OfType<ColorValueAttribute>() // .Count(color => !string.IsNullOrEmpty(color.Value?.Trim()) && color.Value != "#000000" && color.Value != "#ffffff") > 1)); // Logger.DebugFormat("Items with more than one money: {0}", items.Count(item => item.Attributes.OfType<MoneyValueAttribute>().Count(color => color.Value != null && color.Value.Value != 0) > 1)); // Logger.DebugFormat("Items with more than one color: {0}", string.Join(", ", // items.Where(item => item.Attributes.OfType<ColorValueAttribute>().Count(color => !string.IsNullOrEmpty(color.Value?.Trim()) && color.Value != "#000000" && color.Value != "#ffffff") > 1) // .Take(10).Select(i => i.Description))); // File.WriteAllLines("/tmp/text.txt", items.SelectMany(item => new[] {item.Description, item.PublicDescription}.Where(s => !string.IsNullOrEmpty(s)))); // Logger.DebugFormat("Legacy: {0} Not legacy: {1}", items.Count(item => item.IsLegacyObject), items.Count(item => !item.IsLegacyObject)); // return null; stopWatch.Restart(); var mapperSettings = ItemMapperSettings.FromDeserialized(deserializedData); var mapper = new MatchedItemsMapper(mapperSettings); var matcher = new MatchingItemMatcher(mapper); var matches = matcher.GetMatchingPairs(items); var unmatcher = new RandomNotMatchingItemMatcher(mapper, 15); // TODO: Umnmatches auf Basis der matches erzeugen? var unmatched = unmatcher.GetMatchingPairs(items); // unmatched = unmatched.Where(pair => pair.LossAttributes.Any(attr => attr.Value.Length >= 1 && attr.Value[0] != 0.0) && // pair.FindingAttributes.Any(attr => attr.Value.Length >= 1 && attr.Value[0] != 0.0)).ToList(); Logger.InfoFormat("Matches: {0}\tUnmatched: {1}", matches.Count, unmatched.Count); var trainingSetSize = matches.Count / 2; var trainingData = matches.Take(trainingSetSize).Concat(unmatched.Take(trainingSetSize)).ToArray(); var testData = matches.Skip(trainingSetSize).Concat(unmatched.Skip(trainingSetSize).Take(matches.Count - trainingSetSize)).ToArray(); var usedAttributes = matches.Concat(unmatched).SelectMany(pair => pair.FindingAttributes.Concat(pair.LossAttributes).Select(a => a.Key)); var usedMetadata = deserializedData.AttributeMetadata.Where(attr => usedAttributes.Contains(attr.Attribute.ID)); var actualMetadata = usedMetadata.Where(data => data.Attribute.ID != null).ToDictionary(data => data.Attribute.ID); stopWatch.Stop(); Logger.DebugFormat("Data manipulation took {0}", stopWatch.Elapsed); var learningData = new LearningData(actualMetadata, testData, trainingData); if (ExportSerializedAsJson) { SerializeDataAsJson(learningData); } if (ExportSerializedAsBinary) { SerializeDataAsBinary(learningData); } return(learningData); }