public static TheoryData TokenMatchingTestData(int testDataPoints = 1)
        {
            // prepare dataset and storage options
            var dataset = DatasetReader.ReadDatasetFromCSV(Constants.CompanyNamesDatasetLocation);

            // preparing datapoints for testing
            var sentencesToMatchList = new List <string>();
            var expectedMatches      = new List <MatchResult>();

            for (int i = 0; i < testDataPoints; i++)
            {
                //random datapoint
                var randomSentenceIndex = RandomGenerator.GetRandomIndex(dataset.Count);
                var sentenceToMatch     = dataset[randomSentenceIndex];

                //constructing more complicated sentence that includes entity name
                var sentenceIncludingEntity = "Sentence that includes " + sentenceToMatch + " as an entity name";

                //expected outcome
                var expected = new MatchResult
                {
                    DatabaseMatchInfo = new DatabaseMatchInfo()
                    {
                        MatchText  = dataset[randomSentenceIndex],
                        MatchIndex = randomSentenceIndex
                    }
                };

                //adding to lists
                sentencesToMatchList.Add(sentenceIncludingEntity);
                expectedMatches.Add(expected);
            }


            //returning TheoryData Objects
            if (testDataPoints == 1)
            {
                return(new TheoryData <List <string>, string, MatchResult>
                {
                    {
                        dataset,
                        sentencesToMatchList[0],
                        expectedMatches[0]
                    }
                });
            }
            else
            {
                return(new TheoryData <List <string>, List <string>, List <MatchResult> >
                {
                    {
                        dataset,
                        sentencesToMatchList,
                        expectedMatches
                    }
                });
            }
        }
        public static TheoryData FuzzyMatchingClientPerfomanceTestData()
        {
            var dataset = DatasetReader.ReadDatasetFromCSV(Constants.NewsHeadlinesDatasetLocation);

            int[] testDataSizes = new int[6] {
                10, 100, 1000, 10000, 25000, 50000
            };

            var sentenceToMatch = "take record";

            return(new TheoryData <List <string>, int[], string>
            {
                {
                    dataset,
                    testDataSizes,
                    sentenceToMatch
                }
            });
        }
Exemple #3
0
        public static TheoryData MultipleMatchTestData()
        {
            var dataset = DatasetReader.ReadDatasetFromCSV(Constants.NewsHeadlinesDatasetLocation);

            var testDataPoints    = 9;
            var testSentencesList = new List <string>();
            var expectedMatches   = new List <MatchResult>();

            for (int i = 0; i < testDataPoints; i++)
            {
                // random datapoint
                var randomSentenceIndex = RandomGenerator.GetRandomIndex(dataset.Count);
                var sentenceToMatch     = dataset[randomSentenceIndex];

                // expected outcome
                var expected = new MatchResult
                {
                    DatabaseMatchInfo = new DatabaseMatchInfo()
                    {
                        MatchText  = dataset[randomSentenceIndex],
                        MatchIndex = randomSentenceIndex
                    }
                };

                // adding to lists
                testSentencesList.Add(sentenceToMatch);
                expectedMatches.Add(expected);
            }

            return(new TheoryData <List <string>, List <string>, List <MatchResult> >
            {
                {
                    dataset,
                    testSentencesList,
                    expectedMatches
                }
            });
        }
Exemple #4
0
        public static TheoryData SingleMatchTestData()
        {
            var dataset = DatasetReader.ReadDatasetFromCSV(Constants.NewsHeadlinesDatasetLocation);

            var randomSentenceIndex = RandomGenerator.GetRandomIndex(dataset.Count);
            var sentenceToMatch     = dataset[randomSentenceIndex];
            var expected            = new MatchResult
            {
                DatabaseMatchInfo = new DatabaseMatchInfo()
                {
                    MatchText  = sentenceToMatch,
                    MatchIndex = randomSentenceIndex
                }
            };

            return(new TheoryData <List <string>, string, MatchResult>
            {
                {
                    dataset,
                    sentenceToMatch,
                    expected
                }
            });
        }
Exemple #5
0
        //[TestMethod, DeploymentItem("Resources/DataSets/OCR/characters/cmr_all.xml")]
        public void TestVisionBench()
        {
            // Set some training paths
            string trainingDataset   = "cmr_all.xml";
            string testingDataset    = "Resources/DataSets/OCR/characters/cmr_all.xml";
            double minAccuracy       = 100.0; // force max training cycles
            int    maxTrainingCycles = 5;

            // Create spatial parameters
            Parameters p = Parameters.Empty();

            p.SetParameterByKey(Parameters.KEY.INPUT_DIMENSIONS, new[] { 32, 32 }); // Size of image patch
            p.SetParameterByKey(Parameters.KEY.COLUMN_DIMENSIONS, new[] { 32, 32 });
            p.SetParameterByKey(Parameters.KEY.POTENTIAL_RADIUS, 10000);            // Ensures 100% potential pool
            p.SetParameterByKey(Parameters.KEY.POTENTIAL_PCT, 0.8);
            p.SetParameterByKey(Parameters.KEY.GLOBAL_INHIBITION, true);
            p.SetParameterByKey(Parameters.KEY.LOCAL_AREA_DENSITY, -1.0); // Using numActiveColumnsPerInhArea
            p.SetParameterByKey(Parameters.KEY.NUM_ACTIVE_COLUMNS_PER_INH_AREA, 64.0);
            // All input activity can contribute to feature output
            p.SetParameterByKey(Parameters.KEY.STIMULUS_THRESHOLD, 0.0);
            p.SetParameterByKey(Parameters.KEY.SYN_PERM_INACTIVE_DEC, 0.001);
            p.SetParameterByKey(Parameters.KEY.SYN_PERM_ACTIVE_INC, 0.001);
            p.SetParameterByKey(Parameters.KEY.SYN_PERM_CONNECTED, 0.3);
            p.SetParameterByKey(Parameters.KEY.MIN_PCT_OVERLAP_DUTY_CYCLES, 0.001);
            p.SetParameterByKey(Parameters.KEY.MIN_PCT_ACTIVE_DUTY_CYCLES, 0.001);
            p.SetParameterByKey(Parameters.KEY.DUTY_CYCLE_PERIOD, 1000);
            p.SetParameterByKey(Parameters.KEY.MAX_BOOST, 1.0);
            p.SetParameterByKey(Parameters.KEY.SEED, 1956);                       // The seed that Grok uses
            p.SetParameterByKey(Parameters.KEY.RANDOM, new XorshiftRandom(1956)); // The seed that Grok uses
            p.SetParameterByKey(Parameters.KEY.SP_VERBOSITY, 1);
            p.SetParameterByKey(Parameters.KEY.SP_PARALLELMODE, true);

            Connections cn = new Connections();

            p.Apply(cn);

            // Instantiate our spatial pooler
            SpatialPooler sp = new SpatialPooler();

            sp.Init(cn);

            // Instantiate the spatial pooler test bench.
            VisionTestBench tb = new VisionTestBench(cn, sp);

            // Instantiate the classifier
            KNNClassifier clf = KNNClassifier.GetBuilder().Apply(p);

            // Get testing images and convert them to vectors.
            var tupleTraining   = DatasetReader.GetImagesAndTags(trainingDataset);
            var trainingImages  = (List <Bitmap>)tupleTraining.Get(0);
            var trainingTags    = tupleTraining.Get(1) as List <string>;
            var trainingVectors = trainingImages.Select((i, index) => new { index, vector = i.ToVector() })
                                  .ToDictionary(k => k.index, v => v.vector);

            // Train the spatial pooler on trainingVectors.
            int numcycles = tb.Train(trainingVectors, trainingTags, clf, maxTrainingCycles, minAccuracy);

            // Get testing images and convert them to vectors.
            var tupleTesting   = DatasetReader.GetImagesAndTags(trainingDataset);
            var testingImages  = (List <System.Drawing.Bitmap>)tupleTesting.Get(0);
            var testingTags    = tupleTesting.Get(1) as List <string>;
            var testingVectors = testingImages.Select((i, index) => new { index, vector = i.ToVector() })
                                 .ToDictionary(k => k.index, v => v.vector);

            // Reverse the order of the vectors and tags for testing
            testingTags.Reverse();
            testingVectors.Reverse();

            // Test the spatial pooler on testingVectors.
            var accurancy = tb.Test(testingVectors, testingTags, clf, learn: true);

            Debug.WriteLine("Number of training cycles : " + numcycles);
            Debug.WriteLine("Accurancy : " + accurancy);

            tb.SavePermsAndConns("C:\\temp\\permsAndConns.jpg");
        }