Exemple #1
0
        /// <summary>
        /// Analyze the file in <param name="inputPath"></param> using a program learned new from the currently present exampleset.
        /// </summary>
        /// <returns>The extraction result.</returns>
        public override AnalysisResult <string> Analyze(string inputPath, AnalysisResult <string> result, bool verbose)
        {
            ConsolePrinter consolePrinter = new ConsolePrinter(verbose);

            var inputRegion = AnalysisUtil.RegionFromFile(inputPath);

            result.TestInputLineCount = AnalysisUtil.BuildlogLineCount(inputPath);

            consolePrinter.WriteLine("Starting to learn program");
            Stopwatch learningStopwatch = Stopwatch.StartNew();

            RegionProgram topRankedProgram = session.Learn();

            learningStopwatch.Stop();
            consolePrinter.WriteLine("Learning took " + learningStopwatch.Elapsed);
            result.LearningDuration = learningStopwatch.Elapsed;
            result.TestInputPath    = inputPath;

            if (topRankedProgram == null)
            {
                consolePrinter.WriteLine("no program found");
                result.Successful = false;
                result.Output     = "no program found";
                return(result);
            }

            consolePrinter.WriteLine("Learned Program:");
            consolePrinter.WriteLine(topRankedProgram);
            consolePrinter.WriteLine("");
            result.LearnedProgram = topRankedProgram.ToString();

            consolePrinter.WriteLine("Starting to apply program");
            Stopwatch applyingStopwatch = Stopwatch.StartNew();

            StringRegion output = topRankedProgram.Run(inputRegion);

            applyingStopwatch.Stop();
            consolePrinter.WriteLine("Applying took " + applyingStopwatch.Elapsed);
            result.ApplicationDuration = applyingStopwatch.Elapsed;

            if (output == null)
            {
                result.Successful = false;
                result.Output     = "no extraction found for this input";
                return(result);
            }

            result.Successful = true;
            result.Output     = output?.Value;
            return(result);
        }
        /// <summary>
        ///     Learns a program to extract a single region from a file.
        /// </summary>
        private static void LearnRegion()
        {
            var          session = new RegionSession();
            StringRegion input   = RegionSession.CreateStringRegion("Carrie Dodson 100");

            // Only one example because we extract one region from one file.
            // Position specifies the location between two characters in the file. It starts at 0 (the beginning of the file).
            // An example is identified by a pair of start and end positions.
            session.AddConstraints(new RegionExample(input, input.Slice(7, 13))); // "Carrie Dodson 100" => "Dodson"

            RegionProgram topRankedProg = session.Learn();

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            StringRegion testInput = RegionSession.CreateStringRegion("Leonard Robledo 75"); // expect "Robledo"
            StringRegion output    = topRankedProg.Run(testInput);

            if (output == null)
            {
                Console.Error.WriteLine("Error: Extracting fails!");
                return;
            }
            Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output);
        }
        /// <summary>
        ///     Learns to serialize and deserialize Extraction.Text program.
        /// </summary>
        private static void SerializeProgram()
        {
            var          session = new RegionSession();
            StringRegion input   = RegionSession.CreateStringRegion("Carrie Dodson 100");

            session.AddConstraints(new RegionExample(input, input.Slice(7, 13))); // "Carrie Dodson 100" => "Dodson"

            RegionProgram topRankedProg = session.Learn();

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            string        serializedProgram   = topRankedProg.Serialize();
            RegionProgram deserializedProgram = Loader.Instance.Region.Load(serializedProgram);
            StringRegion  testInput           = RegionSession.CreateStringRegion("Leonard Robledo 75"); // expect "Robledo"
            StringRegion  output = deserializedProgram.Run(testInput);

            if (output == null)
            {
                Console.Error.WriteLine("Error: Extracting fails!");
                return;
            }
            Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output);
        }
        /// <summary>
        ///     Learns a program to extract a single region using another region that appears after it as reference (i.e.,
        ///     succeeding sibling region).
        ///     Demonstrates how sibling referencing works.
        /// </summary>
        private static void LearnRegionReferencingSucceedingSibling()
        {
            var          session = new RegionSession();
            StringRegion input   =
                RegionSession.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook 320");

            StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };
            StringRegion[] numbers = { input.Slice(14, 17), input.Slice(34, 36), input.Slice(51, 54) };

            // Suppose we want to extract the first name w.r.t the number
            session.AddConstraints(
                new RegionExample(numbers[0], records[0].Slice(0, 6)),  // "Carrie" => "100"
                new RegionExample(numbers[1], records[1].Slice(18, 25)) // "Leonard" => "75"
                );

            RegionProgram topRankedProg = session.Learn();

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            foreach (StringRegion number in numbers)
            {
                string output = topRankedProg.Run(number)?.Value ?? "null";
                Console.WriteLine("\"{0}\" => \"{1}\"", number, output);
            }
        }
        /// <summary>
        ///     Learns a program to extract a region and provides other references to help find the intended program.
        ///     Demonstrates the use of additional references.
        /// </summary>
        private static void LearnRegionWithAdditionalReferences()
        {
            var          session = new RegionSession();
            StringRegion input   =
                RegionSession.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook ***");

            StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };

            // Suppose we want to extract "100", "75", and "***".
            session.AddConstraints(new RegionExample(records[0], records[0].Slice(14, 17)));
            // "Carrie Dodson 100" => "100"

            // Additional references help Extraction.Text observe the behavior of the learnt programs on unseen data.
            // In this example, if we do not use additional references, Extraction.Text may learn a program that extracts the first number.
            // On the contrary, if other references are present, it knows that this program is not applicable on the third record "Margaret Cook ***",
            // and promotes a more applicable program.
            session.AddInputs(records.Skip(1));

            RegionProgram topRankedProg = session.Learn();

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            foreach (StringRegion record in records)
            {
                string output = topRankedProg.Run(record)?.Value ?? "null";
                Console.WriteLine("\"{0}\" => \"{1}\"", record, output);
            }
        }
        /// <summary>
        ///     Learns a program to extract a region with both positive and negative examples.
        ///     Demonstrates the use of negative examples.
        /// </summary>
        private static void LearnRegionWithNegativeExamples()
        {
            var          session = new RegionSession();
            StringRegion input   =
                RegionSession.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo NA\nMargaret Cook 320");

            StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };

            // Suppose we want to extract "100", "320".
            session.AddConstraints(
                new RegionExample(records[0], records[0].Slice(14, 17)), // "Carrie Dodson 100" => "100"
                new RegionNegativeExample(records[1], records[1])        // no extraction in "Leonard Robledo NA"
                );

            // Extraction.Text will find a program whose output does not OVERLAP with any of the negative examples.
            RegionProgram topRankedProg = session.Learn();

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            foreach (StringRegion record in records)
            {
                string output = topRankedProg.Run(record)?.Value ?? "null";
                Console.WriteLine("\"{0}\" => \"{1}\"", record, output);
            }
        }
        /// <summary>
        ///     Learns a program to extract a single region using two examples in two different files.
        ///     Learning from different files is similar to learning with multiple examples from a single file.
        ///     Demonstrates how to learn with examples from different files.
        /// </summary>
        private static void LearnRegionUsingMultipleFiles()
        {
            var          session = new RegionSession();
            StringRegion input1  = RegionSession.CreateStringRegion("Carrie Dodson 100");
            StringRegion input2  = RegionSession.CreateStringRegion("Leonard Robledo 75");

            session.AddConstraints(
                new RegionExample(input1, input1.Slice(7, 13)), // "Carrie Dodson 100" => "Dodson"
                new RegionExample(input2, input2.Slice(8, 15))  // "Leonard Robledo 75" => "Robledo"
                );

            RegionProgram topRankedProg = session.Learn();

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            StringRegion testInput = RegionSession.CreateStringRegion("Margaret Cook 320"); // expect "Cook"
            StringRegion output    = topRankedProg.Run(testInput);

            if (output == null)
            {
                Console.Error.WriteLine("Error: Extracting fails!");
                return;
            }
            Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output);
        }
Exemple #8
0
            public override void Run()
            {
                if (!Directory.Exists(ReportDirPath))
                {
                    Directory.CreateDirectory(ReportDirPath);
                }
                Console.WriteLine($"Learning Extraction.Text region program for {SubBenchmarkName}");

                Success = false;
                while (!Success && TryGetNextExample(out RegionExample nextConstraint))
                {
                    var session = new RegionSession();
                    UsedExamples.Add(nextConstraint);
                    session.Constraints.Add(UsedExamples);
                    Program = session.Learn();
                    Success = Program != null && AllExamples.All(e => Valid(e, Program));
                }
                RecordResult();
            }
        public string LearnSingle(List <TextExtractExample> textExtractExamples)
        {
            var session        = new RegionSession();
            var regionExamples = new List <RegionExample>();

            foreach (var textExtractExample in textExtractExamples)
            {
                var inputRegion          = RegionSession.CreateStringRegion(textExtractExample.text);
                var textExtractSelection = textExtractExample.selections.First(); // at most only one example is added per string region
                if (textExtractSelection != null)
                {
                    var exampleRegion = inputRegion.Slice((uint)textExtractSelection.startPos, (uint)textExtractSelection.endPos);
                    var regionExample = new RegionExample(inputRegion, exampleRegion);
                    regionExamples.Add(regionExample);
                }
            }

            session.AddConstraints(regionExamples);
            var program = session.Learn();

            return(program.Serialize());
        }