Пример #1
0
        public static ItemMapperSettings FromDeserialized(IDeserializedData deserializedData)
        {
            var settings = new ItemMapperSettings();

            GetSettingsFromCatgoryHierarchy(settings, deserializedData.CategoryHierarchy);
            GetSettingsFromItems(settings, deserializedData.Items);

            return(settings);
        }
Пример #2
0
        private static void GetSettingsFromCatgoryHierarchy(ItemMapperSettings settings, IDictionary <string, IEnumerable <string> > categoryHierarchy)
        {
            settings.CategoryCount    = categoryHierarchy.Keys.Count * 10 + categoryHierarchy.Values.SelectMany(subCategories => subCategories).Count();
            settings.SubCategoryIndex = categoryHierarchy.ToDictionary(pair => pair.Key,
                                                                       pair => pair.Value.ToDictionary(subCategory => subCategory, subCategory => pair.Value.ToList().IndexOf(subCategory)));

            var categoryIndex = new Dictionary <string, int>();
            var index         = 0;

            foreach (var category in categoryHierarchy.Keys)
            {
                categoryIndex.Add(category, index);
                index += categoryHierarchy[category].Count();
                index += 10;
            }
            settings.CategoryIndex = categoryIndex;
        }
Пример #3
0
        // TODO: FOr colors, money usw.: only one range for all attributes
        private static void GetSettingsFromItems(ItemMapperSettings settings, IEnumerable <LostAndFoundIndexedItem> items)
        {
            var colorConverter = new ColorConverter();

            foreach (var item in items)
            {
                if (settings.OldestDate > item.DateOfIncident)
                {
                    settings.OldestDate = item.DateOfIncident;
                }
                if (settings.NewestDate < item.DateOfIncident)
                {
                    settings.OldestDate = item.DateOfIncident;
                }

                foreach (var attribute in item.Attributes)
                {
                    if (attribute is ColorValueAttribute)
                    {
                        var convertFromInvariantString = colorConverter.ConvertFromInvariantString(attribute.GetValue().ToString()) as Color?;
                        if (convertFromInvariantString == null)
                        {
                            continue;
                        }

                        var colorObject = convertFromInvariantString.Value;
                        var rgb         = new Rgb {
                            R = colorObject.R, G = colorObject.G, B = colorObject.B
                        };
                        var lab = rgb.To <Lab>();

                        IColorAttributeMapperSettings colorAttributeSettings;
                        if (settings.ColorAttributes.ContainsKey(attribute.ID))
                        {
                            colorAttributeSettings = settings.ColorAttributes[attribute.ID];
                        }
                        else
                        {
                            colorAttributeSettings = new ColorAttributeMapperSettings();
                            settings.ColorAttributes.Add(attribute.ID, colorAttributeSettings);
                        }

                        if (lab.L < colorAttributeSettings.LuminescenceSettings.MinValue)
                        {
                            colorAttributeSettings.LuminescenceSettings.MinValue = lab.L;
                        }
                        if (lab.L > colorAttributeSettings.LuminescenceSettings.MaxValue)
                        {
                            colorAttributeSettings.LuminescenceSettings.MaxValue = lab.L;
                        }
                        if (lab.A < colorAttributeSettings.ASettings.MinValue)
                        {
                            colorAttributeSettings.ASettings.MinValue = lab.A;
                        }
                        if (lab.A > colorAttributeSettings.ASettings.MaxValue)
                        {
                            colorAttributeSettings.ASettings.MaxValue = lab.A;
                        }
                        if (lab.B < colorAttributeSettings.BSettings.MinValue)
                        {
                            colorAttributeSettings.BSettings.MinValue = lab.B;
                        }
                        if (lab.B > colorAttributeSettings.BSettings.MaxValue)
                        {
                            colorAttributeSettings.BSettings.MaxValue = lab.B;
                        }

                        continue;
                    }

                    IAttributeMapperSettings attributeSettings;
                    if (settings.Attributes.ContainsKey(attribute.ID))
                    {
                        attributeSettings = settings.Attributes[attribute.ID];
                    }
                    else
                    {
                        attributeSettings = new AttributeMapperSettings();
                        settings.Attributes.Add(attribute.ID, attributeSettings);
                    }
                    attributeSettings.DataCount++;

                    var value = attribute.GetValue();

                    if (value is string)
                    {
                        continue;
                    }

                    var moneyValue = value as MoneyValue;
                    if (moneyValue != null)
                    {
                        if (Convert.ToDouble(moneyValue.Value) < attributeSettings.MinValue)
                        {
                            attributeSettings.MinValue = Convert.ToDouble(moneyValue.Value);
                        }
                        if (Convert.ToDouble(moneyValue.Value) > attributeSettings.MaxValue)
                        {
                            attributeSettings.MaxValue = Convert.ToDouble(moneyValue.Value);
                        }
                        continue;
                    }

                    var convertible = value as IConvertible;
                    if (convertible == null)
                    {
                        continue;
                    }

                    var doubleValue = convertible.ToDouble(CultureInfo.InvariantCulture);
                    if (doubleValue < attributeSettings.MinValue)
                    {
                        attributeSettings.MinValue = doubleValue;
                    }
                    if (doubleValue > attributeSettings.MaxValue)
                    {
                        attributeSettings.MaxValue = doubleValue;
                    }
                }
            }

            settings.DateRange = settings.NewestDate - settings.OldestDate;
        }
        public LearningData Load()
        {
            var stopWatch = new Stopwatch();

            stopWatch.Start();
            var deserializer     = new ReindexDeserializer(Filename);
            var deserializedData = deserializer.Deserialize();
            var items            = deserializedData.Items;

            if (!UseLegacyData)
            {
                items = items.Where(item => !item.IsLegacyObject).ToList();
            }

            stopWatch.Stop();
            Logger.DebugFormat("JSON deserialization took {0}", stopWatch.Elapsed);

            stopWatch.Restart();

//      Logger.DebugFormat("Public attributes: " + string.Join(", ", items.SelectMany(item => item.PublicAttributes.Select(attr => attr.Name.DE)).Distinct()));
//      Logger.DebugFormat("Attributes: {0}", string.Join(", ", items.SelectMany(item => item.Attributes.Select(attr => attr.ID).Distinct())));
//      Logger.DebugFormat("Categories: {0}", string.Join(", ", items.Select(item => item.CategoryID)));
//      Logger.DebugFormat("Attributes: {0}", string.Join(", ", deserializedData.AttributeMetadata.Select(metadata => metadata.Attribute.GetType().FullName).Distinct()));
//      var enumValues = items.SelectMany(item => item.Attributes.OfType<EnumValueAttribute>());
//      Logger.DebugFormat("EnumValues: {0}", string.Join(", ", enumValues.Select(enumValue => $"ID: {enumValue.ID} Value: {enumValue.Value}")));
//      Logger.DebugFormat("Dates: {0}", string.Join(", ", items.GroupBy(item => item.DateOfIncident.ToShortDateString()).OrderByDescending(group => group.Count()).Select(group => $"{group.Key}: {group.Count()}")));
//      Logger.DebugFormat("Attributes with ID null: {0}",
//        string.Join(", ", deserializedData.AttributeMetadata.Where(attr => attr.Attribute.ID == null).Select(attr => attr.Attribute.GetType().FullName)));
//      Logger.DebugFormat("MoneyValue with ID: {0} Without: {1}",
//        deserializedData.AttributeMetadata.Count(attr => attr.Attribute is MoneyValueAttribute && attr.Attribute.ID != null),
//        deserializedData.AttributeMetadata.Count(attr => attr.Attribute is MoneyValueAttribute && attr.Attribute.ID == null));
//      Logger.DebugFormat("Items with more than one color: {0}", items.Count(item => item.Attributes.OfType<ColorValueAttribute>()
//                          .Count(color => !string.IsNullOrEmpty(color.Value?.Trim()) && color.Value != "#000000" && color.Value != "#ffffff") > 1));
//      Logger.DebugFormat("Items with more than one money: {0}", items.Count(item => item.Attributes.OfType<MoneyValueAttribute>().Count(color => color.Value != null && color.Value.Value != 0) > 1));
//      Logger.DebugFormat("Items with more than one color: {0}", string.Join(", ",
//          items.Where(item => item.Attributes.OfType<ColorValueAttribute>().Count(color => !string.IsNullOrEmpty(color.Value?.Trim()) && color.Value != "#000000" && color.Value != "#ffffff") > 1)
//            .Take(10).Select(i => i.Description)));
//      File.WriteAllLines("/tmp/text.txt", items.SelectMany(item => new[] {item.Description, item.PublicDescription}.Where(s => !string.IsNullOrEmpty(s))));
//      Logger.DebugFormat("Legacy: {0} Not legacy: {1}", items.Count(item => item.IsLegacyObject), items.Count(item => !item.IsLegacyObject));
//      return null;

            stopWatch.Restart();

            var mapperSettings = ItemMapperSettings.FromDeserialized(deserializedData);
            var mapper         = new MatchedItemsMapper(mapperSettings);

            var matcher = new MatchingItemMatcher(mapper);
            var matches = matcher.GetMatchingPairs(items);

            var unmatcher = new RandomNotMatchingItemMatcher(mapper, 15);
            // TODO: Umnmatches auf Basis der matches erzeugen?
            var unmatched = unmatcher.GetMatchingPairs(items);

//      unmatched = unmatched.Where(pair => pair.LossAttributes.Any(attr => attr.Value.Length >= 1 && attr.Value[0] != 0.0) &&
//                                      pair.FindingAttributes.Any(attr => attr.Value.Length >= 1 && attr.Value[0] != 0.0)).ToList();

            Logger.InfoFormat("Matches: {0}\tUnmatched: {1}", matches.Count, unmatched.Count);

            var trainingSetSize = matches.Count / 2;
            var trainingData    = matches.Take(trainingSetSize).Concat(unmatched.Take(trainingSetSize)).ToArray();
            var testData        = matches.Skip(trainingSetSize).Concat(unmatched.Skip(trainingSetSize).Take(matches.Count - trainingSetSize)).ToArray();

            var usedAttributes = matches.Concat(unmatched).SelectMany(pair => pair.FindingAttributes.Concat(pair.LossAttributes).Select(a => a.Key));
            var usedMetadata   = deserializedData.AttributeMetadata.Where(attr => usedAttributes.Contains(attr.Attribute.ID));
            var actualMetadata = usedMetadata.Where(data => data.Attribute.ID != null).ToDictionary(data => data.Attribute.ID);

            stopWatch.Stop();
            Logger.DebugFormat("Data manipulation took {0}", stopWatch.Elapsed);

            var learningData = new LearningData(actualMetadata, testData, trainingData);

            if (ExportSerializedAsJson)
            {
                SerializeDataAsJson(learningData);
            }
            if (ExportSerializedAsBinary)
            {
                SerializeDataAsBinary(learningData);
            }

            return(learningData);
        }