Beispiel #1
0
        public void TestTrain()
        {
            var randomClassifier = new RandomClassifier();
            var parameter        = new Parameter(1);

            randomClassifier.Train(iris.GetInstanceList(), parameter);
            Assert.AreEqual(69.33, 100 * randomClassifier.Test(iris.GetInstanceList()).GetErrorRate(), 0.01);
            randomClassifier.Train(bupa.GetInstanceList(), parameter);
            Assert.AreEqual(51.59, 100 * randomClassifier.Test(bupa.GetInstanceList()).GetErrorRate(), 0.01);
            randomClassifier.Train(dermatology.GetInstanceList(), parameter);
            Assert.AreEqual(84.97, 100 * randomClassifier.Test(dermatology.GetInstanceList()).GetErrorRate(), 0.01);
            randomClassifier.Train(car.GetInstanceList(), parameter);
            Assert.AreEqual(75.58, 100 * randomClassifier.Test(car.GetInstanceList()).GetErrorRate(), 0.01);
            randomClassifier.Train(tictactoe.GetInstanceList(), parameter);
            Assert.AreEqual(46.35, 100 * randomClassifier.Test(tictactoe.GetInstanceList()).GetErrorRate(), 0.01);
            randomClassifier.Train(nursery.GetInstanceList(), parameter);
            Assert.AreEqual(80.39, 100 * randomClassifier.Test(nursery.GetInstanceList()).GetErrorRate(), 0.01);
            randomClassifier.Train(chess.GetInstanceList(), parameter);
            Assert.AreEqual(94.53, 100 * randomClassifier.Test(chess.GetInstanceList()).GetErrorRate(), 0.01);
        }
Beispiel #2
0
        static void Main(string[] args)
        {
            ImageUnderstandingConfig config = ImageUnderstandingConfig.ImportSettings();

            Console.WriteLine("Feature Generation: [" + config.featureGeneratorMethod + "]");
            Console.WriteLine("Classification:     [" + config.classifierMethod + "]\n");
            Console.WriteLine("SoftNormalization:  [" + config.UseSoftNormalization + "]\n");

            // some parameters
            string path          = config.path;
            int    foldCount     = config.FoldCount;
            int    testFoldCount = config.TestFoldCount;

            string[] restrictTo = config.RestrictTo;
            string[] ignoreTags = config.IgnoreTags;

            // get all images
            List <TaggedImage>       images     = new List <TaggedImage>();
            Dictionary <string, int> tagIndices = new Dictionary <string, int>();

            foreach (string folderPath in Directory.GetDirectories(path))
            {
                foreach (string imagePath in Directory.GetFiles(folderPath))
                {
                    TaggedImage image = new TaggedImage(imagePath);

                    if (ignoreTags.Contains(image.Tag) || (restrictTo.Length != 0 && !restrictTo.Contains(image.Tag)))
                    {
                        continue;
                    }

                    images.Add(image);

                    if (!tagIndices.ContainsKey(image.Tag))
                    {
                        tagIndices.Add(image.Tag, tagIndices.Count);
                    }
                }
            }

            Console.WriteLine("starting feature vector generation [" + config.featureGeneratorMethod + "]\n");

            ////////////////////////////////////////////////////////////////////////////////////////////////////
            // FEATURE GENERATOR                                                                              //
            ////////////////////////////////////////////////////////////////////////////////////////////////////

            FeatureGenerator <TaggedImage, float> featureGenerator;

            switch (config.featureGeneratorMethod)
            {
            case ImageUnderstandingConfig.FeatureGeneratorMethod.HOG:
                featureGenerator = new HOGFeatureGenerator();
                break;

            case ImageUnderstandingConfig.FeatureGeneratorMethod.SIFT:
                featureGenerator = new SIFTFeatureGenerator();
                break;

            case ImageUnderstandingConfig.FeatureGeneratorMethod.Random:
                featureGenerator = new RandomFeatureGenerator();
                break;

            case ImageUnderstandingConfig.FeatureGeneratorMethod.Unity:
                featureGenerator = new UnityFeatureGenerator();
                break;

            default:
                throw new Exception("Unknown Feature Generator Method");
            }

            featureGenerator.InitializeViaConfig(config);


            string tag = "";

            foreach (TaggedImage image in images)
            {
                if (tag != image.Tag)
                {
                    Console.WriteLine("generating feature vector: [" + image.Tag + "]");
                    tag = image.Tag;
                }
                image.FeatureVector = featureGenerator.GetFeatureVector(image);
            }

            featureGenerator.Dispose();

            if (config.UseSoftNormalization)
            {
                SoftNormalizeFeatureVectors(images);
            }

            ////////////////////////////////////////////////////////////////////////////////////////////////////
            // FOLD ORGANIZER                                                                                 //
            ////////////////////////////////////////////////////////////////////////////////////////////////////
            FoldOrganizer <TaggedImage, string> foldOrganizer = new FoldOrganizer <TaggedImage, string>(images, foldCount, testFoldCount);

            ////////////////////////////////////////////////////////////////////////////////////////////////////
            // CONFUSION MATRIX                                                                               //
            ////////////////////////////////////////////////////////////////////////////////////////////////////
            Mat confusionMatrix = new Mat(tagIndices.Count, tagIndices.Count, DepthType.Cv32F, 1);

            // initialize with zeros
            for (int y = 0; y < confusionMatrix.Rows; ++y)
            {
                for (int x = 0; x < confusionMatrix.Cols; ++x)
                {
                    confusionMatrix.SetValue(x, y, 0F);
                }
            }

            Console.WriteLine("\nstarting Classification. [" + config.classifierMethod + "]");

            for (int iteration = 0; iteration < foldCount; ++iteration)
            {
                Console.WriteLine("\ncurrent iteration: " + iteration);

                Console.WriteLine("train classifier (" + iteration + ")");

                ////////////////////////////////////////////////////////////////////////////////////////////////////
                // CLASSIFIER                                                                                     //
                ////////////////////////////////////////////////////////////////////////////////////////////////////
                Classifier.Classifier <TaggedImage, string, float> classifier;

                switch (config.classifierMethod)
                {
                case ImageUnderstandingConfig.ClassifierMethod.KNearest:
                    classifier = new KNearestClassifier();
                    break;

                case ImageUnderstandingConfig.ClassifierMethod.Random:
                    classifier = new RandomClassifier <TaggedImage, string, float>();
                    break;

                case ImageUnderstandingConfig.ClassifierMethod.SingleResult:
                    classifier = new SingleResultClassifier();
                    break;

                case ImageUnderstandingConfig.ClassifierMethod.SVM:
                    classifier = new SVMClassifier();
                    break;

                default:
                    throw new Exception("Unknown Classifier Method");
                }
                classifier.InitializeViaConfig(config);

                classifier.Train(foldOrganizer.GetTrainingData(iteration));

                // evaluate Test set
                Console.WriteLine("testing (" + iteration + ")");

                List <TaggedImage> testSet = foldOrganizer.GetTestData(iteration);

                foreach (TaggedImage testDataSample in testSet)
                {
                    string evaluatedTag = classifier.Evaluate(testDataSample);

                    int indexOfRealTag      = tagIndices[testDataSample.Tag];
                    int indexOfEvaluatedTag = tagIndices[evaluatedTag];

                    float value = confusionMatrix.GetValue(indexOfRealTag, indexOfEvaluatedTag);
                    value += 1F / (float)foldOrganizer.GetTotalDataCount(testDataSample.Tag);

                    confusionMatrix.SetValue(indexOfRealTag, indexOfEvaluatedTag, value);
                }

                classifier.Dispose();

                foreach (KeyValuePair <string, int> tagIndexPair in tagIndices)
                {
                    float accuracy = confusionMatrix.GetValue(tagIndexPair.Value, tagIndexPair.Value);
                    Console.WriteLine("accuracy = " + string.Format("{0,12:#.0000}%", (accuracy * 100)) + ",\t " + tagIndexPair.Key);
                    //Console.WriteLine("accuracy = " + accuracy + ",\t " + tagIndexPair.Key);
                }
            }

            float        totalAccuracy = 0F;
            List <float> accuracies    = new List <float>();

            foreach (KeyValuePair <string, int> tagIndexPair in tagIndices)
            {
                accuracies.Add(confusionMatrix.GetValue(tagIndexPair.Value, tagIndexPair.Value));
                totalAccuracy += confusionMatrix.GetValue(tagIndexPair.Value, tagIndexPair.Value) / tagIndices.Count;
            }
            Console.WriteLine("total accuracy = " + string.Format("{0,12:#.0000}%", (totalAccuracy * 100)));
            Console.WriteLine("stdDeviation over all total accuracies = " + accuracies.ToArray().StdDeviation());


            for (int x = 0; x < tagIndices.Count; ++x)
            {
                for (int y = 0; y < tagIndices.Count; ++y)
                {
                    confusionMatrix.SetValue(x, y, (float)Math.Sqrt(Math.Sqrt(confusionMatrix.GetValue(x, y))));
                }
            }

            // perform Tests
            string s;

            Test.Test t = new Test.FoldOrganizer_Test();
            t.PerformTest(out s);
            Console.WriteLine(s);

            //visualize accuracy
            {
                String win1 = "Confusion Matrix"; //The name of the window
                CvInvoke.NamedWindow(win1);       //Create the window using the specific name

                int confusionMatrixScale = 5;

                Mat scaledConfusionMatrix = new Mat(confusionMatrix.Cols * confusionMatrixScale, confusionMatrix.Rows * confusionMatrixScale, confusionMatrix.Depth, confusionMatrix.NumberOfChannels);

                for (int y = 0; y < scaledConfusionMatrix.Rows; ++y)
                {
                    for (int x = 0; x < scaledConfusionMatrix.Cols; ++x)
                    {
                        scaledConfusionMatrix.SetValue(x, y, (float)confusionMatrix.GetValue(x / confusionMatrixScale, y / confusionMatrixScale));
                    }
                }

                CvInvoke.Imshow(win1, scaledConfusionMatrix); //Show the image
                CvInvoke.WaitKey(0);                          //Wait for the key pressing event
                CvInvoke.DestroyWindow(win1);                 //Destroy the window if key is pressed
            }
        }
Beispiel #3
0
        } = src_path.Substring(0, (src_path.Length) - 4);                                      // USE THIS IF RUNNING FROM VISUAL STUDIO!
        //public static string src_dir { get; } = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); // USE THIS IF RUNNING FROM EXE!
        static void Main(string[] args)
        {
            var setting = JsonHandler.DeserializeSettingsFromFile(src_dir + "/Data/settings.json");
            //var setting = new Setting(threaded: true);
            string method   = null;
            string filename = null;

            Parser.Default.ParseArguments <Options>(args)
            .WithParsed <Options>(o =>
            {
                switch (o.Classifier)
                {
                case "svm":
                    Console.WriteLine("Support Vector Machine on: " + o.InputFile);
                    method = o.Classifier;
                    break;

                case "rb":
                    Console.WriteLine("Rule Based Classifier on: " + o.InputFile);
                    method = o.Classifier;
                    break;

                case "rf":
                    Console.WriteLine("Random Forest on: " + o.InputFile);
                    method = o.Classifier;
                    break;

                case "lstm":
                    Console.WriteLine("LSTM on: " + o.InputFile);
                    method = o.Classifier;
                    break;

                case "random":
                    Console.WriteLine("Random Values on : " + o.InputFile);
                    method = o.Classifier;
                    break;

                case "bl":
                    Console.WriteLine("Baseline Classifier on: " + o.InputFile);
                    method = o.Classifier;
                    break;

                case "mml":
                    Console.WriteLine("Microsoft ML on: " + o.InputFile);
                    method = o.Classifier;
                    break;

                case "data":
                    Console.WriteLine("Creating DataSet");
                    method = o.Classifier;
                    break;

                default:
                    Console.WriteLine("Please provide a valid classifier");
                    System.Environment.Exit(1);
                    break;
                }
                filename = o.InputFile;
            });

            var watch = Stopwatch.StartNew();

            var tokenizer = new Tokenizer(setting);
            //var filename = "/Data/all_data.csv";
            int serverProcessId = -1;

            var testData = src_dir + "/Classifiers/TrainingData/test_data.csv";

            List <Result> threadedResult = new List <Result>();
            List <Result> comparissons   = new List <Result>();

            Interfaces.IEvaluator evalr = new BaselineClassifer();

            switch (method)
            {
            case "prediction":
            case "nb":
            case "svr":
            case "svm":
            case "lstm":
            case "rf":
                Console.WriteLine("Runnning clasifier: " + method);
                setting.threaded = false;              //program cannot run threaded if connected to server (yet)
                serverProcessId  = InitServer(method); // runs the PythonServer with argument parsed
                evalr            = new PythonClassifier(Connect());
                break;

            case "rb":
                Console.WriteLine("Runnning clasifier: " + method);
                var lex  = CSVReader.ReadLexicon(src_dir + "\\Data\\AgreedLexicon.csv");   // lex should be created like this first and then parsed into a pipe, so that we can evaluate on the fly (like we're tokenizing)
                var tlex = CSVReader.ReadTriggerLex(src_dir + "\\Data\\triggerwords.csv");
                evalr = new RuleBasedClassifier(lex, setting, tlex);
                break;

            case "random":
                evalr = new RandomClassifier();
                break;

            case "bl":
                evalr = new BaselineClassifer();
                break;

            case "mml":
                evalr = new SentimentClassifier();
                break;

            case "data":
                var list = new List <(string, string)>();
                foreach (var s in CSVReader.ReadComparrisonsYield(src_dir + "\\" + filename))
                {
                    list.Add(s);
                }
                double count = list.Count * 0.1;
                FileWriter.CreateRandomTrainingTest(list, (int)count, (int)(list.Count - count));
                FileWriter.flushPrinter(setting);
                Environment.Exit(0);
                break;
            }

            if (setting.compare)
            {
                Console.WriteLine("Comparing...");
                comparissons = ConvertTestData(CSVReader.ReadComparrisonsYield, evalr, src_dir + filename).ToList();
                var results   = Pipe_AccuracyTest(comparissons, evalr).ToList();
                var formatted = from result in results select result.ToString();

                addToOutput(formatted);
                Console.WriteLine(Environment.NewLine + Analyzer.MatrixWithNeutralToString(Analyzer.GetConfusion(results)));
                Console.WriteLine(Analyzer.GetAccuracy(results) + "% Accuracy");
            }
            else
            {
                Console.WriteLine("Predicting...");
                //Procedual evaluation
                if (!setting.threaded)
                {
                    var results   = ProcedualSentenceLevelPipe(CSVReader.ReadCommentsYield, filename, evalr).ToList();
                    var formatted = from result in results select result.Value.ToString();

                    addToOutput(formatted);
                }

                //threaded evaluation
                if (setting.threaded)
                {
                    var result    = ThreadedSentenceLevelPipe(CSVReader.ReadCommentsYield, filename, evalr).ToList();
                    var formatted = from val in result select val.Value.ToString();

                    addToOutput(formatted);
                }
            }

            FileWriter.flushPrinter(setting);

            watch.Stop();
            var elapsed = watch.ElapsedMilliseconds;

            Console.WriteLine("Completed in: " + elapsed + "Ms with " + method);

            Console.WriteLine("Go to PATH/out/your_time_stamp for the output");

            if (serverProcessId != -1)
            {
                try
                {
                    Process.GetProcessById(serverProcessId).Kill();
                }
                catch (SocketException)
                {
                    // succesfully closed
                }
            }

            Console.ReadKey();
            Environment.Exit(0);
        }