public string LearnSingle(List <TextExtractExample> textExtractExamples)
        {
            var session        = new RegionSession();
            var regionExamples = new List <RegionExample>();

            foreach (var textExtractExample in textExtractExamples)
            {
                var inputRegion          = RegionSession.CreateStringRegion(textExtractExample.text);
                var textExtractSelection = textExtractExample.selections.First(); // at most only one example is added per string region
                if (textExtractSelection != null)
                {
                    var exampleRegion = inputRegion.Slice((uint)textExtractSelection.startPos, (uint)textExtractSelection.endPos);
                    var regionExample = new RegionExample(inputRegion, exampleRegion);
                    regionExamples.Add(regionExample);
                }
            }

            session.AddConstraints(regionExamples);
            var program = session.Learn();

            return(program.Serialize());
        }
Esempio n. 2
0
        /// <summary>
        ///     Learns a program to extract a multiple regions using two examples in two different files.
        ///     Learning multiple regions is similar to learning single regions, it's just doing same task again for different regions.
        /// </summary>
        private static void LearnMultipleRegionsUsingMultipleFiles(List <string> paths, List <Dictionary <string, string> > regionsToLearn)
        {
            List <StringRegion> inputs = new List <StringRegion>();

            for (int i = 0; i < paths.Count; i++)
            {
                string s = File.ReadAllText(paths[i]);
                inputs.Add(RegionSession.CreateStringRegion(s));
            }

            int trainingDocumentCount = 2;

            List <string> fieldsToLearn = regionsToLearn[0].Keys.ToList();
            Dictionary <string, RegionSession> sessionPerField = new Dictionary <string, RegionSession>();

            foreach (string field in fieldsToLearn)
            {
                RegionSession session = new RegionSession();
                for (int i = 0; i < trainingDocumentCount; i++)
                {
                    string output = regionsToLearn[i][field];
                    uint   start  = inputs[i].IndexOfRelative(output).Value;
                    uint   end    = (uint)(start + output.Length);

                    RegionExample example = new RegionExample(inputs[i], inputs[i].Slice(start, end));
                    session.AddConstraints(example);
                }
                sessionPerField.Add(field, session);
            }
            Dictionary <string, RegionProgram> programPerField = new Dictionary <string, RegionProgram>();

            foreach (var fieldSessionPair in sessionPerField)
            {
                RegionProgram program = fieldSessionPair.Value.Learn();
                if (program == null)
                {
                    Console.Error.WriteLine("Error: Learning fails for Field : " + fieldSessionPair.Key);
                }
                else
                {
                    programPerField.Add(fieldSessionPair.Key, program);
                }
            }

            //testing

            StreamWriter outputWriter = new StreamWriter(@"..\..\output.txt");

            outputWriter.WriteLine(string.Join("\t|\t", programPerField.Keys));
            for (int i = trainingDocumentCount; i < inputs.Count; i++)
            {
                List <string> values = new List <string>();
                foreach (var fieldProgramPair in programPerField)
                {
                    string value = fieldProgramPair.Value.Run(inputs[i])?.Value;
                    values.Add(value);
                }
                outputWriter.WriteLine(string.Join("\t|\t\t", values));
            }
            outputWriter.Flush();
            outputWriter.Close();
        }