public static TheoryData TokenMatchingTestData(int testDataPoints = 1) { // prepare dataset and storage options var dataset = DatasetReader.ReadDatasetFromCSV(Constants.CompanyNamesDatasetLocation); // preparing datapoints for testing var sentencesToMatchList = new List <string>(); var expectedMatches = new List <MatchResult>(); for (int i = 0; i < testDataPoints; i++) { //random datapoint var randomSentenceIndex = RandomGenerator.GetRandomIndex(dataset.Count); var sentenceToMatch = dataset[randomSentenceIndex]; //constructing more complicated sentence that includes entity name var sentenceIncludingEntity = "Sentence that includes " + sentenceToMatch + " as an entity name"; //expected outcome var expected = new MatchResult { DatabaseMatchInfo = new DatabaseMatchInfo() { MatchText = dataset[randomSentenceIndex], MatchIndex = randomSentenceIndex } }; //adding to lists sentencesToMatchList.Add(sentenceIncludingEntity); expectedMatches.Add(expected); } //returning TheoryData Objects if (testDataPoints == 1) { return(new TheoryData <List <string>, string, MatchResult> { { dataset, sentencesToMatchList[0], expectedMatches[0] } }); } else { return(new TheoryData <List <string>, List <string>, List <MatchResult> > { { dataset, sentencesToMatchList, expectedMatches } }); } }
public static TheoryData FuzzyMatchingClientPerfomanceTestData() { var dataset = DatasetReader.ReadDatasetFromCSV(Constants.NewsHeadlinesDatasetLocation); int[] testDataSizes = new int[6] { 10, 100, 1000, 10000, 25000, 50000 }; var sentenceToMatch = "take record"; return(new TheoryData <List <string>, int[], string> { { dataset, testDataSizes, sentenceToMatch } }); }
public static TheoryData MultipleMatchTestData() { var dataset = DatasetReader.ReadDatasetFromCSV(Constants.NewsHeadlinesDatasetLocation); var testDataPoints = 9; var testSentencesList = new List <string>(); var expectedMatches = new List <MatchResult>(); for (int i = 0; i < testDataPoints; i++) { // random datapoint var randomSentenceIndex = RandomGenerator.GetRandomIndex(dataset.Count); var sentenceToMatch = dataset[randomSentenceIndex]; // expected outcome var expected = new MatchResult { DatabaseMatchInfo = new DatabaseMatchInfo() { MatchText = dataset[randomSentenceIndex], MatchIndex = randomSentenceIndex } }; // adding to lists testSentencesList.Add(sentenceToMatch); expectedMatches.Add(expected); } return(new TheoryData <List <string>, List <string>, List <MatchResult> > { { dataset, testSentencesList, expectedMatches } }); }
public static TheoryData SingleMatchTestData() { var dataset = DatasetReader.ReadDatasetFromCSV(Constants.NewsHeadlinesDatasetLocation); var randomSentenceIndex = RandomGenerator.GetRandomIndex(dataset.Count); var sentenceToMatch = dataset[randomSentenceIndex]; var expected = new MatchResult { DatabaseMatchInfo = new DatabaseMatchInfo() { MatchText = sentenceToMatch, MatchIndex = randomSentenceIndex } }; return(new TheoryData <List <string>, string, MatchResult> { { dataset, sentenceToMatch, expected } }); }
//[TestMethod, DeploymentItem("Resources/DataSets/OCR/characters/cmr_all.xml")] public void TestVisionBench() { // Set some training paths string trainingDataset = "cmr_all.xml"; string testingDataset = "Resources/DataSets/OCR/characters/cmr_all.xml"; double minAccuracy = 100.0; // force max training cycles int maxTrainingCycles = 5; // Create spatial parameters Parameters p = Parameters.Empty(); p.SetParameterByKey(Parameters.KEY.INPUT_DIMENSIONS, new[] { 32, 32 }); // Size of image patch p.SetParameterByKey(Parameters.KEY.COLUMN_DIMENSIONS, new[] { 32, 32 }); p.SetParameterByKey(Parameters.KEY.POTENTIAL_RADIUS, 10000); // Ensures 100% potential pool p.SetParameterByKey(Parameters.KEY.POTENTIAL_PCT, 0.8); p.SetParameterByKey(Parameters.KEY.GLOBAL_INHIBITION, true); p.SetParameterByKey(Parameters.KEY.LOCAL_AREA_DENSITY, -1.0); // Using numActiveColumnsPerInhArea p.SetParameterByKey(Parameters.KEY.NUM_ACTIVE_COLUMNS_PER_INH_AREA, 64.0); // All input activity can contribute to feature output p.SetParameterByKey(Parameters.KEY.STIMULUS_THRESHOLD, 0.0); p.SetParameterByKey(Parameters.KEY.SYN_PERM_INACTIVE_DEC, 0.001); p.SetParameterByKey(Parameters.KEY.SYN_PERM_ACTIVE_INC, 0.001); p.SetParameterByKey(Parameters.KEY.SYN_PERM_CONNECTED, 0.3); p.SetParameterByKey(Parameters.KEY.MIN_PCT_OVERLAP_DUTY_CYCLES, 0.001); p.SetParameterByKey(Parameters.KEY.MIN_PCT_ACTIVE_DUTY_CYCLES, 0.001); p.SetParameterByKey(Parameters.KEY.DUTY_CYCLE_PERIOD, 1000); p.SetParameterByKey(Parameters.KEY.MAX_BOOST, 1.0); p.SetParameterByKey(Parameters.KEY.SEED, 1956); // The seed that Grok uses p.SetParameterByKey(Parameters.KEY.RANDOM, new XorshiftRandom(1956)); // The seed that Grok uses p.SetParameterByKey(Parameters.KEY.SP_VERBOSITY, 1); p.SetParameterByKey(Parameters.KEY.SP_PARALLELMODE, true); Connections cn = new Connections(); p.Apply(cn); // Instantiate our spatial pooler SpatialPooler sp = new SpatialPooler(); sp.Init(cn); // Instantiate the spatial pooler test bench. VisionTestBench tb = new VisionTestBench(cn, sp); // Instantiate the classifier KNNClassifier clf = KNNClassifier.GetBuilder().Apply(p); // Get testing images and convert them to vectors. var tupleTraining = DatasetReader.GetImagesAndTags(trainingDataset); var trainingImages = (List <Bitmap>)tupleTraining.Get(0); var trainingTags = tupleTraining.Get(1) as List <string>; var trainingVectors = trainingImages.Select((i, index) => new { index, vector = i.ToVector() }) .ToDictionary(k => k.index, v => v.vector); // Train the spatial pooler on trainingVectors. int numcycles = tb.Train(trainingVectors, trainingTags, clf, maxTrainingCycles, minAccuracy); // Get testing images and convert them to vectors. var tupleTesting = DatasetReader.GetImagesAndTags(trainingDataset); var testingImages = (List <System.Drawing.Bitmap>)tupleTesting.Get(0); var testingTags = tupleTesting.Get(1) as List <string>; var testingVectors = testingImages.Select((i, index) => new { index, vector = i.ToVector() }) .ToDictionary(k => k.index, v => v.vector); // Reverse the order of the vectors and tags for testing testingTags.Reverse(); testingVectors.Reverse(); // Test the spatial pooler on testingVectors. var accurancy = tb.Test(testingVectors, testingTags, clf, learn: true); Debug.WriteLine("Number of training cycles : " + numcycles); Debug.WriteLine("Accurancy : " + accurancy); tb.SavePermsAndConns("C:\\temp\\permsAndConns.jpg"); }