internal OcrReader(NeuralNet descriptions, NeuralNet tables, NeuralNet numbers, NeuralNet headlines, NeuralNet delimiters) { nnDescriptions = descriptions; nnTables = tables; nnNumbers = numbers; nnHeadlines = headlines; nnDelimiters = delimiters; tableConfig = TableItem.Load(PathHelpers.BuildConfigFilename("TableItems")); descriptionConfig = DescriptionItem.Load(PathHelpers.BuildConfigFilename("Descriptions")); itemConfig = ItemValues.Load(PathHelpers.BuildConfigFilename("ItemValues")); wordList = File.ReadAllLines(PathHelpers.BuildWordFilename("words")); Dictionary<string, int> lookup = new Dictionary<string, int>(); foreach (DescriptionItem di in descriptionConfig) { string[] split = di.Description.Split(new char[] { ' ' }); foreach (string s in split) { lookup[s] = 1; } } List<string> tmp = new List<string>(lookup.Keys); wordListDescription = tmp.ToArray(); }
// Train the neural networks. //Note: This depends on the way your knowlwdge directory is set up and may need to be changed whenever retraining! public static void TrainNN(out OcrReader ocrReader) { int dimensionX = Properties.Settings.Default.DimensionX; int dimensionY = Properties.Settings.Default.DimensionY; NeuralNet nnDescriptions, nnTables, nnNumbers, nnHeadlines, nnDelimiters; List<string> knowledge = new List<string>(); List<char> netKeys = new List<char>(); string files = "abcdefghijklmnopqrstuvwy"; for (int i = 0; i < files.Length; i++) { knowledge.Add(PathHelpers.BuildKnowledgeFilename(DescriptionsNetwork, files[i].ToString() + "_lower")); netKeys.Add(files[i]); } knowledge.Add(PathHelpers.BuildKnowledgeFilename(DescriptionsNetwork, "delimiter")); netKeys.Add('.'); netKeys.Add('#'); nnDescriptions = new NeuralNet(dimensionX, dimensionY, netKeys, knowledge); nnDescriptions.SaveFile = PathHelpers.BuildNetworkFilename(DescriptionsNetwork); nnDescriptions.Train(Properties.Settings.Default.SamplesDescriptions); knowledge.Clear(); netKeys.Clear(); files = "ABCDEFGHIJKLMNOPRSTUVWXY"; for (int i = 0; i < files.Length; i++) { knowledge.Add(PathHelpers.BuildKnowledgeFilename(TablesNetwork, files[i].ToString() + "_upper")); netKeys.Add(files[i]); } knowledge.Add(PathHelpers.BuildKnowledgeFilename(TablesNetwork, "delimiter")); netKeys.Add(':'); netKeys.Add('('); netKeys.Add(')'); knowledge.Add(PathHelpers.BuildKnowledgeFilename(TablesNetwork, "minus")); netKeys.Add('-'); nnTables = new NeuralNet(dimensionX, dimensionY, netKeys, knowledge); nnTables.SaveFile = PathHelpers.BuildNetworkFilename(TablesNetwork); nnTables.Train(Properties.Settings.Default.SamplesTables); knowledge.Clear(); netKeys.Clear(); knowledge.Add(PathHelpers.BuildKnowledgeFilename(DelimitersNetwork, "comma")); netKeys.Add('#'); knowledge.Add(PathHelpers.BuildKnowledgeFilename(DelimitersNetwork, "dot")); netKeys.Add('.'); knowledge.Add(PathHelpers.BuildKnowledgeFilename(DelimitersNetwork, "minus")); netKeys.Add('-'); nnDelimiters = new NeuralNet(dimensionX, dimensionY, netKeys, knowledge); nnDelimiters.SaveFile = PathHelpers.BuildNetworkFilename(DelimitersNetwork); nnDelimiters.Train(Properties.Settings.Default.SamplesNumbers); knowledge.Clear(); netKeys.Clear(); files = "0123456789"; for (int i = 0; i < files.Length; i++) { knowledge.Add(PathHelpers.BuildKnowledgeFilename(NumbersNetwork, files[i].ToString())); netKeys.Add(files[i]); } nnNumbers = new NeuralNet(dimensionX, dimensionY, netKeys, knowledge); nnNumbers.SaveFile = PathHelpers.BuildNetworkFilename(NumbersNetwork); nnNumbers.Train(Properties.Settings.Default.SamplesNumbers); knowledge.Clear(); netKeys.Clear(); files = "0123456789"; for (int i = 0; i < files.Length; i++) { knowledge.Add(PathHelpers.BuildKnowledgeFilename(HeadlinesNetwork, files[i].ToString())); netKeys.Add(files[i]); } files = "ABCDEFGHIJKLMNOPQRSTUVXYZ"; for (int i = 0; i < files.Length; i++) { knowledge.Add(PathHelpers.BuildKnowledgeFilename(HeadlinesNetwork, files[i].ToString() + "_upper")); netKeys.Add(files[i]); } files = "abcdefghijklmnopqrstuvwyz"; for (int i = 0; i < files.Length; i++) { knowledge.Add(PathHelpers.BuildKnowledgeFilename(HeadlinesNetwork, files[i].ToString() + "_lower")); netKeys.Add(files[i]); } knowledge.Add(PathHelpers.BuildKnowledgeFilename(HeadlinesNetwork, "minus")); netKeys.Add('-'); nnHeadlines = new NeuralNet(15, 22, netKeys, knowledge); nnHeadlines.SaveFile = PathHelpers.BuildNetworkFilename(HeadlinesNetwork); nnHeadlines.Factor = 2; nnHeadlines.Train(Properties.Settings.Default.SamplesHeadlines); ocrReader = new OcrReader(nnDescriptions, nnTables, nnNumbers,nnHeadlines, nnDelimiters); }