Example #1
0
 public DecisionTreeLearner(LearningData learningData)
 {
     LearningData     = learningData;
     Metadata         = LearningData.ActualMetadata;
     PropertiesToSkip = new List <string>();
     Name             = "DecisionTree";
 }
 public NeuralNetworkLearner(LearningData learningData, int hiddenLayerNeuronCountMin, int hiddenLayerNeuronCountMax)
 {
     LearningData = learningData;
     Metadata     = learningData.ActualMetadata;
     Range        = new IntRange(hiddenLayerNeuronCountMin, hiddenLayerNeuronCountMax);
     Name         = "NeuralNetwork";
     BestError    = double.MaxValue;
 }
        protected virtual void SerializeDataAsBinary(LearningData data)
        {
            var serializer = new BinaryFormatter();

            using (var filestream = File.Open("training.dat", FileMode.Create, FileAccess.Write))
                serializer.Serialize(filestream, data.TrainingData);
            using (var filestream = File.Open("test.dat", FileMode.Create, FileAccess.Write))
                serializer.Serialize(filestream, data.TestData);
        }
        protected virtual void SerializeDataAsJson(LearningData data)
        {
            var serializer = new JsonSerializer();

            using (var filestream = File.Open("training.json", FileMode.Create, FileAccess.Write))
                using (var writer = new StreamWriter(filestream))
                    serializer.Serialize(writer, data.TrainingData);
            using (var filestream = File.Open("test.json", FileMode.Create, FileAccess.Write))
                using (var writer = new StreamWriter(filestream))
                    serializer.Serialize(writer, data.TestData);
        }
        public LearningData Load()
        {
            var stopWatch = new Stopwatch();

            stopWatch.Start();
            var deserializer     = new ReindexDeserializer(Filename);
            var deserializedData = deserializer.Deserialize();
            var items            = deserializedData.Items;

            if (!UseLegacyData)
            {
                items = items.Where(item => !item.IsLegacyObject).ToList();
            }

            stopWatch.Stop();
            Logger.DebugFormat("JSON deserialization took {0}", stopWatch.Elapsed);

            stopWatch.Restart();

//      Logger.DebugFormat("Public attributes: " + string.Join(", ", items.SelectMany(item => item.PublicAttributes.Select(attr => attr.Name.DE)).Distinct()));
//      Logger.DebugFormat("Attributes: {0}", string.Join(", ", items.SelectMany(item => item.Attributes.Select(attr => attr.ID).Distinct())));
//      Logger.DebugFormat("Categories: {0}", string.Join(", ", items.Select(item => item.CategoryID)));
//      Logger.DebugFormat("Attributes: {0}", string.Join(", ", deserializedData.AttributeMetadata.Select(metadata => metadata.Attribute.GetType().FullName).Distinct()));
//      var enumValues = items.SelectMany(item => item.Attributes.OfType<EnumValueAttribute>());
//      Logger.DebugFormat("EnumValues: {0}", string.Join(", ", enumValues.Select(enumValue => $"ID: {enumValue.ID} Value: {enumValue.Value}")));
//      Logger.DebugFormat("Dates: {0}", string.Join(", ", items.GroupBy(item => item.DateOfIncident.ToShortDateString()).OrderByDescending(group => group.Count()).Select(group => $"{group.Key}: {group.Count()}")));
//      Logger.DebugFormat("Attributes with ID null: {0}",
//        string.Join(", ", deserializedData.AttributeMetadata.Where(attr => attr.Attribute.ID == null).Select(attr => attr.Attribute.GetType().FullName)));
//      Logger.DebugFormat("MoneyValue with ID: {0} Without: {1}",
//        deserializedData.AttributeMetadata.Count(attr => attr.Attribute is MoneyValueAttribute && attr.Attribute.ID != null),
//        deserializedData.AttributeMetadata.Count(attr => attr.Attribute is MoneyValueAttribute && attr.Attribute.ID == null));
//      Logger.DebugFormat("Items with more than one color: {0}", items.Count(item => item.Attributes.OfType<ColorValueAttribute>()
//                          .Count(color => !string.IsNullOrEmpty(color.Value?.Trim()) && color.Value != "#000000" && color.Value != "#ffffff") > 1));
//      Logger.DebugFormat("Items with more than one money: {0}", items.Count(item => item.Attributes.OfType<MoneyValueAttribute>().Count(color => color.Value != null && color.Value.Value != 0) > 1));
//      Logger.DebugFormat("Items with more than one color: {0}", string.Join(", ",
//          items.Where(item => item.Attributes.OfType<ColorValueAttribute>().Count(color => !string.IsNullOrEmpty(color.Value?.Trim()) && color.Value != "#000000" && color.Value != "#ffffff") > 1)
//            .Take(10).Select(i => i.Description)));
//      File.WriteAllLines("/tmp/text.txt", items.SelectMany(item => new[] {item.Description, item.PublicDescription}.Where(s => !string.IsNullOrEmpty(s))));
//      Logger.DebugFormat("Legacy: {0} Not legacy: {1}", items.Count(item => item.IsLegacyObject), items.Count(item => !item.IsLegacyObject));
//      return null;

            stopWatch.Restart();

            var mapperSettings = ItemMapperSettings.FromDeserialized(deserializedData);
            var mapper         = new MatchedItemsMapper(mapperSettings);

            var matcher = new MatchingItemMatcher(mapper);
            var matches = matcher.GetMatchingPairs(items);

            var unmatcher = new RandomNotMatchingItemMatcher(mapper, 15);
            // TODO: Umnmatches auf Basis der matches erzeugen?
            var unmatched = unmatcher.GetMatchingPairs(items);

//      unmatched = unmatched.Where(pair => pair.LossAttributes.Any(attr => attr.Value.Length >= 1 && attr.Value[0] != 0.0) &&
//                                      pair.FindingAttributes.Any(attr => attr.Value.Length >= 1 && attr.Value[0] != 0.0)).ToList();

            Logger.InfoFormat("Matches: {0}\tUnmatched: {1}", matches.Count, unmatched.Count);

            var trainingSetSize = matches.Count / 2;
            var trainingData    = matches.Take(trainingSetSize).Concat(unmatched.Take(trainingSetSize)).ToArray();
            var testData        = matches.Skip(trainingSetSize).Concat(unmatched.Skip(trainingSetSize).Take(matches.Count - trainingSetSize)).ToArray();

            var usedAttributes = matches.Concat(unmatched).SelectMany(pair => pair.FindingAttributes.Concat(pair.LossAttributes).Select(a => a.Key));
            var usedMetadata   = deserializedData.AttributeMetadata.Where(attr => usedAttributes.Contains(attr.Attribute.ID));
            var actualMetadata = usedMetadata.Where(data => data.Attribute.ID != null).ToDictionary(data => data.Attribute.ID);

            stopWatch.Stop();
            Logger.DebugFormat("Data manipulation took {0}", stopWatch.Elapsed);

            var learningData = new LearningData(actualMetadata, testData, trainingData);

            if (ExportSerializedAsJson)
            {
                SerializeDataAsJson(learningData);
            }
            if (ExportSerializedAsBinary)
            {
                SerializeDataAsBinary(learningData);
            }

            return(learningData);
        }
Example #6
0
        private static void NeuralNetworkLearningSingleAttributes(LearningData learningData)
        {
            var stopWatch = new Stopwatch();

            stopWatch.Start();

            var testMatcher     = new LoggingNeuralNetworkMatcher(learningData.TestData);
            var trainingMatcher = new LoggingNeuralNetworkMatcher(learningData.TrainingData);

            Parallel.ForEach(learningData.ActualMetadata.Keys, metadataKey =>
            {
                var metadata = new Dictionary <string, IndexableAttributeMetadata> {
                    { metadataKey, learningData.ActualMetadata[metadataKey] }
                };
                var trainingInputs  = learningData.TrainingData.Select(data => data.ToVectorArray(metadata)).ToArray();
                var trainingOutputs = learningData.TrainingData.Select(data => new[] { data.PercentMatch }).ToArray();
                var testInputs      = learningData.TestData.Select(data => data.ToVectorArray(metadata)).ToArray();
                var testOutputs     = learningData.TestData.Select(data => new[] { data.PercentMatch }).ToArray();

                if (testInputs.Length != testOutputs.Length || trainingInputs.Length != trainingOutputs.Length)
                {
                    throw new ArgumentException("Inputs and outputs data are not the same size");
                }
                var vectorSize = trainingInputs.First().Length;
                if (trainingInputs.Any(input => input.Length != vectorSize))
                {
                    throw new ArgumentException("Not all trainingInputs have the same vector size");
                }
                if (testInputs.Any(input => input.Length != vectorSize))
                {
                    throw new ArgumentException("Not test inputs have the correct vector size");
                }

                var results = new List <Tuple <int[], double, double> >();

                Parallel.For(0, 16, i =>
                {
                    var parameters = new[] { i, 1 };

                    var network =
                        new ActivationNetwork(new BipolarSigmoidFunction(), trainingInputs[0].Length,
                                              parameters); //new DeepBeliefNetwork();
                    var teacher = new ParallelResilientBackpropagationLearning(network);
                    var random  = new Random();

                    var error     = double.MaxValue;
                    var iteration = 0;
                    while (error > 0.0005 && iteration < 200)
                    {
                        iteration++;
                        //for (var i = 0; i < 10; i++)
                        {
                            //*
                            var pair = random.Next(0, trainingInputs.Length - 1);
                            error    = teacher.Run(trainingInputs[pair], trainingOutputs[pair]);
                            //*/

                            /*
                             * error = teacher.RunEpoch(trainingInputs, trainingOutputs);
                             * //*/
                            var accuracyRecallPrecision = trainingMatcher.MatchCount(network, metadata, new List <string>());
                            error = 3 - accuracyRecallPrecision.Item1 - accuracyRecallPrecision.Item2 - accuracyRecallPrecision.Item3;
                        }

                        if (iteration % 100 == 0)
                        {
                            Logger.DebugFormat("NeuralNetwork: Iteration {0} Error {1}", iteration, error);
                        }
                    }

                    var inSampleError    = teacher.ComputeError(trainingInputs, trainingOutputs);
                    var outOfSampleError = teacher.ComputeError(testInputs, testOutputs);
                    lock (results)
                    {
                        results.Add(new Tuple <int[], double, double>(parameters, inSampleError, outOfSampleError));
                    }
                    testMatcher.LogMatchCount(string.Format("{0} ({1})", metadataKey, learningData.ActualMetadata[metadataKey].Attribute.GetType().FullName), network,
                                              metadata, new List <string>());
                });

                Logger.InfoFormat("Results for {1} ({2}):\n{0}",
                                  string.Join(", ", results.Select(result => $"{string.Join("-", result.Item1)}: In: {result.Item2} Out: {result.Item3}")), metadataKey,
                                  learningData.ActualMetadata[metadataKey].Attribute.GetType().FullName);
            });

            stopWatch.Stop();
            Logger.InfoFormat("Neural Network learning (single attribute) took {0}", stopWatch.Elapsed);
        }