Esempio n. 1
0
        internal OcrReader(NeuralNet descriptions, NeuralNet tables, NeuralNet numbers, NeuralNet headlines, NeuralNet delimiters)
        {
            nnDescriptions = descriptions;
            nnTables = tables;
            nnNumbers = numbers;
            nnHeadlines = headlines;
            nnDelimiters = delimiters;

            tableConfig = TableItem.Load(PathHelpers.BuildConfigFilename("TableItems"));
            descriptionConfig = DescriptionItem.Load(PathHelpers.BuildConfigFilename("Descriptions"));
            itemConfig = ItemValues.Load(PathHelpers.BuildConfigFilename("ItemValues"));

            wordList = File.ReadAllLines(PathHelpers.BuildWordFilename("words"));

            Dictionary<string, int> lookup = new Dictionary<string, int>();
            foreach (DescriptionItem di in descriptionConfig)
            {
                string[] split = di.Description.Split(new char[] { ' ' });
                foreach (string s in split)
                {
                    lookup[s] = 1;
                }
            }
            List<string> tmp = new List<string>(lookup.Keys);
            wordListDescription = tmp.ToArray();
        }
Esempio n. 2
0
        // Train the neural networks. 
        //Note: This depends on the way your knowlwdge directory is set up and may need to be changed whenever retraining!
        public static void TrainNN(out OcrReader ocrReader)
        {
            int dimensionX = Properties.Settings.Default.DimensionX;
            int dimensionY = Properties.Settings.Default.DimensionY;

            NeuralNet nnDescriptions, nnTables, nnNumbers, nnHeadlines, nnDelimiters;

            List<string> knowledge = new List<string>();
            List<char> netKeys = new List<char>();
            string files = "abcdefghijklmnopqrstuvwy";
            for (int i = 0; i < files.Length; i++)
            {
                knowledge.Add(PathHelpers.BuildKnowledgeFilename(DescriptionsNetwork, files[i].ToString() + "_lower"));
                netKeys.Add(files[i]);
            }
            knowledge.Add(PathHelpers.BuildKnowledgeFilename(DescriptionsNetwork, "delimiter"));
            netKeys.Add('.');
            netKeys.Add('#');

            nnDescriptions = new NeuralNet(dimensionX, dimensionY, netKeys, knowledge);
            nnDescriptions.SaveFile = PathHelpers.BuildNetworkFilename(DescriptionsNetwork);
            nnDescriptions.Train(Properties.Settings.Default.SamplesDescriptions);

            knowledge.Clear();
            netKeys.Clear();

            files = "ABCDEFGHIJKLMNOPRSTUVWXY";
            for (int i = 0; i < files.Length; i++)
            {
                knowledge.Add(PathHelpers.BuildKnowledgeFilename(TablesNetwork, files[i].ToString() + "_upper"));
                netKeys.Add(files[i]);
            }
            knowledge.Add(PathHelpers.BuildKnowledgeFilename(TablesNetwork, "delimiter"));
            netKeys.Add(':');
            netKeys.Add('(');
            netKeys.Add(')');
            knowledge.Add(PathHelpers.BuildKnowledgeFilename(TablesNetwork, "minus"));
            netKeys.Add('-');

            nnTables = new NeuralNet(dimensionX, dimensionY, netKeys, knowledge);
            nnTables.SaveFile = PathHelpers.BuildNetworkFilename(TablesNetwork);
            nnTables.Train(Properties.Settings.Default.SamplesTables);

            knowledge.Clear();
            netKeys.Clear();

            knowledge.Add(PathHelpers.BuildKnowledgeFilename(DelimitersNetwork, "comma"));
            netKeys.Add('#');
            knowledge.Add(PathHelpers.BuildKnowledgeFilename(DelimitersNetwork, "dot"));
            netKeys.Add('.');
            knowledge.Add(PathHelpers.BuildKnowledgeFilename(DelimitersNetwork, "minus"));
            netKeys.Add('-');
            nnDelimiters = new NeuralNet(dimensionX, dimensionY, netKeys, knowledge);
            nnDelimiters.SaveFile = PathHelpers.BuildNetworkFilename(DelimitersNetwork);
            nnDelimiters.Train(Properties.Settings.Default.SamplesNumbers);

            knowledge.Clear();
            netKeys.Clear();

            files = "0123456789";
            for (int i = 0; i < files.Length; i++)
            {
                knowledge.Add(PathHelpers.BuildKnowledgeFilename(NumbersNetwork, files[i].ToString()));
                netKeys.Add(files[i]);
            }
            nnNumbers = new NeuralNet(dimensionX, dimensionY, netKeys, knowledge);
            nnNumbers.SaveFile = PathHelpers.BuildNetworkFilename(NumbersNetwork);
            nnNumbers.Train(Properties.Settings.Default.SamplesNumbers);

            knowledge.Clear();
            netKeys.Clear();

            files = "0123456789";
            for (int i = 0; i < files.Length; i++)
            {
                knowledge.Add(PathHelpers.BuildKnowledgeFilename(HeadlinesNetwork, files[i].ToString()));
                netKeys.Add(files[i]);
            }
            files = "ABCDEFGHIJKLMNOPQRSTUVXYZ";
            for (int i = 0; i < files.Length; i++)
            {
                knowledge.Add(PathHelpers.BuildKnowledgeFilename(HeadlinesNetwork, files[i].ToString() + "_upper"));
                netKeys.Add(files[i]);
            }
            files = "abcdefghijklmnopqrstuvwyz";
            for (int i = 0; i < files.Length; i++)
            {
                knowledge.Add(PathHelpers.BuildKnowledgeFilename(HeadlinesNetwork, files[i].ToString() + "_lower"));
                netKeys.Add(files[i]);
            }
            knowledge.Add(PathHelpers.BuildKnowledgeFilename(HeadlinesNetwork, "minus"));
            netKeys.Add('-');

            nnHeadlines = new NeuralNet(15, 22, netKeys, knowledge);
            nnHeadlines.SaveFile = PathHelpers.BuildNetworkFilename(HeadlinesNetwork);
            nnHeadlines.Factor = 2;
            nnHeadlines.Train(Properties.Settings.Default.SamplesHeadlines);

            ocrReader = new OcrReader(nnDescriptions, nnTables, nnNumbers,nnHeadlines, nnDelimiters);
        }