예제 #1
0
        /// <summary>
        /// Arguments
        ///    0: the directory to find files
        ///    1: a string to filter filenames (e.g., "*.xml")
        /// </summary>
        /// <param name="args"></param>
        static void Main(string[] args)
        {
            if (args.Length < 2)
            {
                return;
            }


            // Get the list of files
            Console.WriteLine("Finding sketch files...");
            List <string> allSketches = new List <string>(System.IO.Directory.GetFiles(args[0], args[1]));

            Console.WriteLine("    found " + allSketches.Count + " sketches");


            // Load all the shapes in all the sketches
            Console.WriteLine("Loading full data set...");
            List <Shape> shapeData = GetShapeData(allSketches);

            Console.WriteLine("    found " + shapeData.Count + " gates");


            // Print classes found
            HashSet <ShapeType> types = new HashSet <ShapeType>();

            foreach (Shape shape in shapeData)
            {
                types.Add(shape.Type);
            }
            Console.WriteLine("Found " + types.Count + " types:");
            foreach (ShapeType type in types)
            {
                Console.WriteLine("    " + type);
            }

            // Save all the shapes to images in the "sketches" folder
            string outputPath = @"sketches\";

            Console.WriteLine("Saving gates to '" + outputPath + "'...");
            if (!System.IO.Directory.Exists(outputPath))
            {
                System.IO.Directory.CreateDirectory(outputPath);
            }
            foreach (Shape shape in shapeData)
            {
                System.Drawing.Bitmap b = ToBitmap.createFromShape(shape, 30, 30, true);
                shape.bitmap = b;
                string filename = String.Format(outputPath + shape.Type + "-{0:x}.png", shape.GetHashCode());
                b.Save(filename);
            }
            Console.WriteLine("    finished saving gates");


            // Train the base recognizers on all the data
            Console.WriteLine("Training recognizers on all data...");

#if false
            Console.WriteLine("    rubine");
            RubineRecognizerUpdateable rubine = new RubineRecognizerUpdateable(shapeData);
            rubine.Save("Rubine.rru");
            rubine.LiteRecognizer.Save("RubineLite.rr");

            Console.WriteLine("    dollar");
            DollarRecognizer dollar = new DollarRecognizer(shapeData);
            dollar.Save("Dollar.dr");
#else
            RubineRecognizerUpdateable rubine = new RubineRecognizerUpdateable();
            rubine.Save("Rubine.rru");
            rubine.LiteRecognizer.Save("RubineLite.rr");

            DollarRecognizer dollar = new DollarRecognizer();
            dollar.Save("Dollar.dr");
#endif

            Console.WriteLine("    zernike");
            ZernikeMomentRecognizerUpdateable zernike = new ZernikeMomentRecognizerUpdateable(shapeData);
            zernike.Save("Zernike.zru");
            zernike.LiteRecognizer.Save("ZernikeLite.zr");

            Console.WriteLine("    image");
            ImageRecognizer image = new ImageRecognizer(shapeData);
            image.Save("Image.ir");

            Console.WriteLine("    finished training recognizers");

            RubineRecognizer        fullRubine  = rubine.LiteRecognizer;
            DollarRecognizer        fullDollar  = dollar;
            ZernikeMomentRecognizer fullZernike = zernike.LiteRecognizer;
            ImageRecognizer         fullImage   = image;


            // Split the data up per-user
            Console.WriteLine("Loading per-user data...");
            Dictionary <string, List <Shape>[]> user2data = GetSketchesPerUser(allSketches);
            Console.WriteLine("    found " + user2data.Count + " users");


            // Foreach user: train each of the recognizers and accumulate training data
            // for the combo recognizer
            List <KeyValuePair <ShapeType, Dictionary <string, object> > > data = new List <KeyValuePair <ShapeType, Dictionary <string, object> > >();
            foreach (KeyValuePair <string, List <Shape>[]> pair in user2data)
            {
                string user = pair.Key;

                ////////////////////////////////////////
                ////////////   Train   /////////////////
                ////////////////////////////////////////

                Console.WriteLine("User: "******"    rubine");
                rubine = new RubineRecognizerUpdateable(trainingSet);
                rubine.Save("Rubine" + user + ".rru");
                rubine.LiteRecognizer.Save("RubineLite" + user + ".rr");

                Console.WriteLine("    dollar");
                dollar = new DollarRecognizer(trainingSet);
                dollar.Save("Dollar" + user + ".dr");
#else
                rubine = new RubineRecognizerUpdateable();
                rubine.Save("Rubine" + user + ".rru");
                rubine.LiteRecognizer.Save("RubineLite" + user + ".rr");

                dollar = new DollarRecognizer();
                dollar.Save("Dollar" + user + ".dr");
#endif

                Console.WriteLine("    zernike");
                zernike = new ZernikeMomentRecognizerUpdateable(trainingSet);
                zernike.Save("Zernike" + user + ".zru");
                zernike.LiteRecognizer.Save("ZernikeLite" + user + ".zr");

                Console.WriteLine("    image");
                image = new ImageRecognizer(trainingSet);
                image.Save("Image" + user + ".ir");
                fullImage = image;

                ////////////////////////////////////////
                //////////// Evaluate //////////////////
                ////////////////////////////////////////


                List <Shape> testingSet = pair.Value[1];

                // Create the training data for the combo recognizer
                List <KeyValuePair <ShapeType, Dictionary <string, object> > > comboTrainingData = TrainingDataCombo(testingSet, rubine, dollar, zernike, image);
                foreach (KeyValuePair <ShapeType, Dictionary <string, object> > pair2 in comboTrainingData)
                {
                    data.Add(pair2);
                }
            }

            if (data.Count == 0)
            {
                throw new Exception("no data!");
            }

            List <string> features = new List <string>();
            foreach (KeyValuePair <ShapeType, Dictionary <string, object> > instance in data)
            {
                foreach (string feature in instance.Value.Keys)
                {
                    if (!features.Contains(feature))
                    {
                        features.Add(feature);
                    }
                }
            }

            Console.WriteLine("Found " + data.Count + " data points and " + features.Count + " features.");

            ComboRecognizer combo = new ComboRecognizer(fullRubine, fullDollar, fullZernike, fullImage);
            combo.TrainCombo(features, data);
            combo.Save("Combo.cru");

            Console.WriteLine("Naive bayes updatable has " + combo.ComboClassifier.Examples.Count + " examples.");
            Console.WriteLine("Naive bayes updatable has " + combo.ComboClassifier.Classifier.Classes.Count + " classes:");
            foreach (ShapeType cls in combo.ComboClassifier.Classifier.Classes)
            {
                Console.WriteLine("    " + cls);
            }

            Console.WriteLine("Press ENTER to continue...");
            Console.ReadLine();
        }
예제 #2
0
        /// <summary>
        /// Arguments
        ///    0: the directory to find files
        ///    1: directory to find real-world data (recursive)
        /// </summary>
        /// <param name="args"></param>
        static void Main(string[] args)
        {
            if (args.Length < 2)
            {
                return;
            }

            // Get the list of files
            Console.WriteLine("Finding sketch files...");
            List <string> allSketches = new List <string>(System.IO.Directory.GetFiles(args[0], "*.xml"));

            Console.WriteLine("    found " + allSketches.Count + " sketches");


            // Load all the shapes in all the sketches
            Console.WriteLine("Loading full data set...");
            List <Shape> shapeData = GetShapeData(allSketches);

            Console.WriteLine("    found " + shapeData.Count + " gates");


            // Print classes found
            HashSet <ShapeType> types = new HashSet <ShapeType>();

            foreach (Shape shape in shapeData)
            {
                types.Add(shape.Type);
            }
            Console.WriteLine("Found " + types.Count + " types:");
            foreach (ShapeType type in types)
            {
                Console.WriteLine("    " + type);
            }

            // Save all the shapes to images in the "sketches" folder
            string outputPath = @"shapes\";

            Console.WriteLine("Saving gates to '" + outputPath + "'...");
            if (!System.IO.Directory.Exists(outputPath))
            {
                System.IO.Directory.CreateDirectory(outputPath);
            }
            foreach (Shape shape in shapeData)
            {
                System.Drawing.Bitmap b = shape.createBitmap(100, 100, true);
                shape.TemplateDrawing = b;
                string filename = String.Format(outputPath + shape.Type + "-{0:x}.png", shape.GetHashCode());
                b.Save(filename);
            }
            Console.WriteLine("    finished saving gates");


            // Train the base recognizers on all the data
            Console.WriteLine("Training recognizers on all data...");

#if false
            Console.WriteLine("    rubine");
            RubineRecognizerUpdateable rubine = new RubineRecognizerUpdateable(shapeData);
            rubine.Save("Rubine.rru");
            rubine.LiteRecognizer.Save("RubineLite.rr");

            Console.WriteLine("    dollar");
            DollarRecognizer dollar = new DollarRecognizer(shapeData);
            dollar.Save("Dollar.dr");

            RubineRecognizerUpdateable rubine = new RubineRecognizerUpdateable();
            rubine.Save("Rubine.rru");
            rubine.LiteRecognizer.Save("RubineLite.rr");

            DollarRecognizer dollar = new DollarRecognizer();
            dollar.Save("Dollar.dr");

            Console.WriteLine("    zernike");
            ZernikeMomentRecognizerUpdateable zernike = new ZernikeMomentRecognizerUpdateable(shapeData);
            zernike.Save("Zernike.zru");
            zernike.LiteRecognizer.Save("ZernikeLite.zr");
#endif

            Console.WriteLine("    adaptive image");
            AdaptiveImageRecognizer adaptiveimage = new AdaptiveImageRecognizer(shapeData);
            adaptiveimage.Save("AdaptiveImage.air");

            Console.WriteLine("    image");
            ImageRecognizer image = new ImageRecognizer(shapeData);
            image.Save("Image.ir");

            Console.WriteLine("    finished training recognizers");

#if false
            RubineRecognizer        fullRubine  = rubine.LiteRecognizer;
            DollarRecognizer        fullDollar  = dollar;
            ZernikeMomentRecognizer fullZernike = zernike.LiteRecognizer;

            ImageRecognizer fullImage = image;

            // Split the data up per-user
            Console.WriteLine("Loading per-user data...");
            Dictionary <string, List <Shape>[]> user2data = GetSketchesPerUser(allSketches);
            Console.WriteLine("    found " + user2data.Count + " users");


            // Foreach user: train each of the recognizers and accumulate training data
            // for the combo recognizer
            List <KeyValuePair <ShapeType, Dictionary <string, object> > > data = new List <KeyValuePair <ShapeType, Dictionary <string, object> > >();
            foreach (KeyValuePair <string, List <Shape>[]> pair in user2data)
            {
                string user = pair.Key;

                ////////////////////////////////////////
                ////////////   Train   /////////////////
                ////////////////////////////////////////

                Console.WriteLine("User: "******"    rubine");
                rubine = new RubineRecognizerUpdateable(trainingSet);
                rubine.Save("Rubine" + user + ".rru");
                rubine.LiteRecognizer.Save("RubineLite" + user + ".rr");

                Console.WriteLine("    dollar");
                dollar = new DollarRecognizer(trainingSet);
                dollar.Save("Dollar" + user + ".dr");
#else
                rubine = new RubineRecognizerUpdateable();
                rubine.Save("Rubine" + user + ".rru");
                rubine.LiteRecognizer.Save("RubineLite" + user + ".rr");

                dollar = new DollarRecognizer();
                dollar.Save("Dollar" + user + ".dr");
#endif

                Console.WriteLine("    zernike");
                zernike = new ZernikeMomentRecognizerUpdateable(trainingSet);
                zernike.Save("Zernike" + user + ".zru");
                zernike.LiteRecognizer.Save("ZernikeLite" + user + ".zr");

                Console.WriteLine("    image");
                image = new ImageRecognizer(trainingSet);
                image.Save("Image" + user + ".ir");
                fullImage = image;

                ////////////////////////////////////////
                //////////// Evaluate //////////////////
                ////////////////////////////////////////


                List <Shape> testingSet = pair.Value[1];

                // Create the training data for the combo recognizer
                List <KeyValuePair <ShapeType, Dictionary <string, object> > > comboTrainingData = TrainingDataCombo(testingSet, rubine, dollar, zernike, image);
                foreach (KeyValuePair <ShapeType, Dictionary <string, object> > pair2 in comboTrainingData)
                {
                    data.Add(pair2);
                }
            }

            if (data.Count == 0)
            {
                throw new Exception("no data!");
            }

            List <string> features = new List <string>();
            foreach (KeyValuePair <ShapeType, Dictionary <string, object> > instance in data)
            {
                foreach (string feature in instance.Value.Keys)
                {
                    if (!features.Contains(feature))
                    {
                        features.Add(feature);
                    }
                }
            }

            Console.WriteLine("Found " + data.Count + " data points and " + features.Count + " features.");

            ComboRecognizer combo = new ComboRecognizer(fullRubine, fullDollar, fullZernike, fullImage);
            combo.TrainCombo(features, data);
            combo.Save("Combo.cru");

            Console.WriteLine("Naive bayes updatable has " + combo.ComboClassifier.Examples.Count + " examples.");
            Console.WriteLine("Naive bayes updatable has " + combo.ComboClassifier.Classifier.Classes.Count + " classes:");
            foreach (ShapeType cls in combo.ComboClassifier.Classifier.Classes)
            {
                Console.WriteLine("    " + cls);
            }
#endif

            Console.WriteLine("Training neural image recognizer on real-world data...");
            List <Shape> goodGates;                  // list of correctly-identified gates
            List <Shape> badGates;                   // list of shapes grouped as gates that aren't
            Dictionary <Shape, string> shapeSources; // map of shapes to source filename

            string cacheFile = outputPath + "goodAndBadGates.data";
            if (!System.IO.File.Exists(cacheFile))
            {
                goodGates    = new List <Shape>();
                badGates     = new List <Shape>();
                shapeSources = new Dictionary <Shape, string>();

                Grouper             grouper    = RecognitionPipeline.createDefaultGrouper();
                Classifier          classifier = RecognitionPipeline.createDefaultClassifier();
                RecognitionPipeline pipeline   = new RecognitionPipeline(classifier, grouper);
                var files = Files.FUtil.AllFiles(args[1], Files.Filetype.XML, true);
                Console.WriteLine("    Found " + files.Count() + " real-world sketches");
                int i = 1;
                foreach (string file in files)
                {
                    Console.WriteLine("    Sketch " + i + " / " + files.Count());
                    i++;

                    Sketch.Sketch sketch   = new ReadXML(file).Sketch;
                    Sketch.Sketch original = sketch.Clone();

                    sketch.RemoveLabels();
                    sketch.resetShapes();

                    pipeline.process(sketch);

                    foreach (Sketch.Shape shape in sketch.Shapes)
                    {
                        if (shape.Classification != LogicDomain.GATE_CLASS)
                        {
                            continue;
                        }

                        Shape originalGate = original.ShapesL.Find(delegate(Shape s) { return(s.GeometricEquals(shape)); });

                        if (originalGate != null && originalGate.Classification == LogicDomain.GATE_CLASS)
                        {
                            goodGates.Add(shape);
                        }
                        else
                        {
                            // We can't just say "this is a bad gate." If it wasn't found,
                            // the shape might be an XOR gate missing the back, or a NAND
                            // gate missing a bubble. We will apply the following heuristic:
                            //    if all the strokes in the shape are part of the same
                            //    shape in the original sketch and that shape in the
                            //    original sketch is a gate, this is not a bad gate.

                            // a shape consists of one or more substrokes from shapes in the
                            // original, correct sketch
                            HashSet <Shape> originalShapes = new HashSet <Shape>();
                            foreach (Substroke substroke in shape.Substrokes)
                            {
                                Substroke originalSubstroke = original.SubstrokesL.Find(delegate(Substroke s) { return(s.GeometricEquals(substroke)); });
                                if (originalSubstroke == null)
                                {
                                    throw new Exception("A substroke is missing in the original sketch???");
                                }
                                if (originalSubstroke.ParentShape != null)
                                {
                                    originalShapes.Add(originalSubstroke.ParentShape);
                                }
                            }

                            List <Shape> originalShapesL = originalShapes.ToList();
                            if (originalShapesL.Count != 1 || originalShapesL[0].Classification != LogicDomain.GATE_CLASS)
                            {
                                badGates.Add(shape);
                            }
                        }
                        shapeSources.Add(shape, file);
                    }
                }

                Console.WriteLine("Saving found gates to " + cacheFile);
                var stream     = System.IO.File.Open(cacheFile, System.IO.FileMode.Create);
                var bformatter = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter();
                bformatter.Serialize(stream, Tuple.Create(goodGates, badGates, shapeSources));
                stream.Close();
            }
            else
            {
                Console.WriteLine("Loading good and bad gates from " + cacheFile);
                var stream     = System.IO.File.Open(cacheFile, System.IO.FileMode.Open);
                var bformatter = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter();
                var data       = (Tuple <List <Shape>, List <Shape>, Dictionary <Shape, string> >)bformatter.Deserialize(stream);
                stream.Close();

                goodGates    = data.Item1;
                badGates     = data.Item2;
                shapeSources = data.Item3;
            }
            Console.WriteLine("    Found " + goodGates.Count + " good gates, " + badGates.Count + " bad gates");

            ImageRecognizer innerNeuralRecgonizer = image;
            string          neuralPath            = @"neuralResults\";
            string          arffFilename          = "data.arff";
            if (!System.IO.Directory.Exists(neuralPath))
            {
                System.IO.Directory.CreateDirectory(neuralPath);
            }

            Console.WriteLine("    Writing ARFF file '" + neuralPath + arffFilename + "'...");
            TextWriter arffWriter = new StreamWriter(neuralPath + arffFilename);
            NeuralImageRecognizer.WriteARFF(arffWriter, innerNeuralRecgonizer, goodGates, badGates);
            arffWriter.Close();

            Console.WriteLine("    Training the network...");

            // Network settings -- determined empircally
            NeuralNetworkInfo info = new NeuralNetworkInfo();
            info.Layers            = new int[] { 8, 1 };
            info.NumTrainingEpochs = 1000;
            info.LearningRate      = 0.05;
            info.Momentum          = 0.2;
            NeuralImageRecognizer neuralImage = new NeuralImageRecognizer(innerNeuralRecgonizer, goodGates, badGates, info);

            neuralImage.Save("NeuralImage.nir");


            Console.WriteLine("    Testing the network (results in " + neuralPath + ")...");
            neuralImage = NeuralImageRecognizer.Load("NeuralImage.nir");

            TextWriter writer = new StreamWriter(neuralPath + "info.csv");

            writer.WriteLine("Sketch File, Shape Bitmap, Good?, Tanimoto, Yule, Partial Hausdorff, Modified Hausdorff, Output Confidence");

            int falseNegatives = 0;
            foreach (Shape gate in goodGates)
            {
                ImageRecognitionResult result = (ImageRecognitionResult)neuralImage.recognize(gate, null);
                if (result.Confidence < 0.5)
                {
                    falseNegatives++;
                }

                System.Drawing.Bitmap b = gate.createBitmap(100, 100, true);
                string filename         = String.Format("good-" + "-{0:x}.png", gate.GetHashCode());
                b.Save(neuralPath + filename);

                writer.WriteLine(shapeSources[gate] + "," + filename +
                                 ", 1, " +
                                 result.Tanimoto + ", " +
                                 result.Yule + ", " +
                                 result.PartialHausdorff + ", " +
                                 result.ModifiedHausdorff + ", " +
                                 result.Confidence);
            }
            Console.WriteLine("    Good gates with low confidence: " + falseNegatives + "/" + (goodGates.Count));

            int falsePositives = 0;
            foreach (Shape gate in badGates)
            {
                ImageRecognitionResult result = (ImageRecognitionResult)neuralImage.recognize(gate, null);
                if (result.Confidence > 0.5)
                {
                    falsePositives++;
                }

                System.Drawing.Bitmap b = gate.createBitmap(100, 100, true);
                string filename         = String.Format("bad-" + "-{0:x}.png", gate.GetHashCode());
                b.Save(neuralPath + filename);

                writer.WriteLine(shapeSources[gate] + "," + filename +
                                 ", 0, " +
                                 result.Tanimoto + ", " +
                                 result.Yule + ", " +
                                 result.PartialHausdorff + ", " +
                                 result.ModifiedHausdorff + ", " +
                                 result.Confidence);
            }
            Console.WriteLine("    Bad gates with high confidence: " + falsePositives + "/" + (badGates.Count));
            writer.Close();

            Console.WriteLine("Press ENTER to continue...");
            Console.ReadLine();
        }