示例#1
0
 public static StringRegion SubStr(StringRegion v, Tuple<uint?, uint?> posPair)
 {
     uint? start = posPair.Item1, end = posPair.Item2;
     if (start == null || end == null || start < v.Start || start > v.End || end < v.Start || end > v.End)
         return null;
     return v.Slice((uint) start, (uint) end);
 }
示例#2
0
        /// <summary>
        ///     Learns all region programs that satisfy the examples (advanced feature).
        ///     Demonstrates access to the entire program set.
        /// </summary>
        private static void LearnAllRegionPrograms()
        {
            var input = StringRegion.Create("Carrie Dodson 100");

            var positiveExamples = new[] {
                new ExtractionExample <StringRegion>(input, input.Slice(14, 17)) // "Carrie Dodson 100" => "Dodson"
            };
            var negativeExamples = Enumerable.Empty <ExtractionExample <StringRegion> >();

            ProgramSet allPrograms = Learner.Instance.LearnAllRegion(positiveExamples, negativeExamples);
            IEnumerable <ProgramNode> topKPrograms = allPrograms.TopK("Score", 3); // "Score" is the ranking feature

            var i = 0;

            StringRegion[] otherInputs = { input, StringRegion.Create("Leonard Robledo NA"), StringRegion.Create("Margaret Cook 320") };
            foreach (var prog in topKPrograms)
            {
                Console.WriteLine("Program {0}:", ++i);
                foreach (var str in otherInputs)
                {
                    State  inputState = State.Create(Language.Grammar.InputSymbol, str); // Create Microsoft.ProgramSynthesis input state
                    object r          = prog.Invoke(inputState);                         // Invoke Microsoft.ProgramSynthesis program node on the input state
                    Console.WriteLine(r != null ? (r as StringRegion).Value : "null");
                }
            }
        }
示例#3
0
        public static uint?RegPos(StringRegion v, Tuple <RegularExpression, RegularExpression> rr, int k)
        {
            List <PositionMatch> ms = rr.Item1.Run(v).Where(m => rr.Item2.MatchesAt(v, m.Right)).ToList();
            int index = k > 0 ? (k - 1) : (ms.Count + k);

            return(index < 0 || index >= ms.Count ? null : (uint?)ms[index].Right);
        }
示例#4
0
        /// <summary>
        ///     Learns a program to extract a single region using another region that appears after it as reference (i.e.,
        ///     succeeding sibling region).
        ///     Demonstrates how sibling referencing works.
        /// </summary>
        private static void LearnRegionReferencingSucceedingSibling()
        {
            var input = StringRegion.Create("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook 320");

            StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };
            StringRegion[] numbers = { input.Slice(14, 17), input.Slice(34, 36), input.Slice(51, 54) };

            // Suppose we want to extract the first name w.r.t the number
            var positiveExamples = new[] {
                new ExtractionExample <StringRegion>(numbers[0], records[0].Slice(0, 6)),  // "Carrie" => "100"
                new ExtractionExample <StringRegion>(numbers[1], records[1].Slice(18, 25)) // "Leonard" => "75"
            };
            var negativeExamples = Enumerable.Empty <ExtractionExample <StringRegion> >();

            Program topRankedProg = Learner.Instance.LearnRegion(positiveExamples, negativeExamples);

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            foreach (var r in topRankedProg.Run(numbers))
            {
                var output = r.Output != null ? r.Output.Value : "null";
                Console.WriteLine("\"{0}\" => \"{1}\"", r.Reference, output);
            }
        }
        /// <summary>
        ///     Learns top-ranked 3 region programs.
        ///     Demonstrates access to lower-ranked programs.
        /// </summary>
        private static void LearnTop3RegionPrograms()
        {
            var          session = new RegionSession();
            StringRegion input   = RegionSession.CreateStringRegion("Carrie Dodson 100");

            session.AddConstraints(new RegionExample(input, input.Slice(14, 17))); // "Carrie Dodson 100" => "Dodson"

            IEnumerable <RegionProgram> topKPrograms = session.LearnTopK(3);

            var i = 0;

            StringRegion[] otherInputs =
            {
                input, RegionSession.CreateStringRegion("Leonard Robledo NA"),
                RegionSession.CreateStringRegion("Margaret Cook 320")
            };
            foreach (RegionProgram prog in topKPrograms)
            {
                Console.WriteLine("Program {0}:", ++i);
                foreach (StringRegion str in otherInputs)
                {
                    var r = prog.Run(str);
                    Console.WriteLine(r != null ? r.Value : "null");
                }
            }
        }
示例#6
0
        /// <summary>
        ///     Learns a program to extract a region using positive examples and the matching regular expression.
        ///     Demonstrates the possibility to give other constraint (regex) to Extraction.Text.
        ///     This is an advanced feature.
        /// </summary>
        private static void LearnRegionWithRegexes()
        {
            var input = StringRegion.Create("Carrie Dodson 100\nLeonard Robledo NA\nMargaret Cook 320");

            StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };

            // Suppose we want to extract the number out of a record
            var positiveExamples = new[] {
                new ExtractionExample <StringRegion>(records[0], records[0].Slice(14, 17)), // "Carrie Dodson 100" => "100"
            };
            var negativeExamples = Enumerable.Empty <ExtractionExample <StringRegion> >();

            Regex lookBehindRegex = new Regex("\\s");
            Regex lookAheadRegex  = null;
            Regex matchingRegex   = new Regex("\\d+");

            IEnumerable <Program> topRankedPrograms =
                Learner.Instance.LearnTopKRegion(positiveExamples, negativeExamples, null, 1, lookBehindRegex, matchingRegex, lookAheadRegex);

            Program topRankedProg = topRankedPrograms.FirstOrDefault();

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            foreach (var r in topRankedProg.Run(records))
            {
                var output = r.Output != null ? r.Output.Value : "null";
                Console.WriteLine("\"{0}\" => \"{1}\"", r.Reference, output);
            }
        }
        /// <summary>
        ///     Learns to serialize and deserialize Extraction.Text program.
        /// </summary>
        private static void SerializeProgram()
        {
            var          session = new RegionSession();
            StringRegion input   = RegionSession.CreateStringRegion("Carrie Dodson 100");

            session.AddConstraints(new RegionExample(input, input.Slice(7, 13))); // "Carrie Dodson 100" => "Dodson"

            RegionProgram topRankedProg = session.Learn();

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            string        serializedProgram   = topRankedProg.Serialize();
            RegionProgram deserializedProgram = Loader.Instance.Region.Load(serializedProgram);
            StringRegion  testInput           = RegionSession.CreateStringRegion("Leonard Robledo 75"); // expect "Robledo"
            StringRegion  output = deserializedProgram.Run(testInput);

            if (output == null)
            {
                Console.Error.WriteLine("Error: Extracting fails!");
                return;
            }
            Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output);
        }
        /// <summary>
        ///     Learns all region programs that satisfy the examples (advanced feature).
        ///     Demonstrates access to the entire program set.
        /// </summary>
        private static void LearnAllRegionPrograms()
        {
            var          session = new RegionSession();
            StringRegion input   = RegionSession.CreateStringRegion("Carrie Dodson 100");

            session.AddConstraints(new RegionExample(input, input.Slice(14, 17))); // "Carrie Dodson 100" => "Dodson"

            ProgramSet allPrograms = session.LearnAll().ProgramSet;
            IEnumerable <ProgramNode> topKPrograms = allPrograms.TopK(RegionLearner.Instance.ScoreFeature, 3);

            var i = 0;

            StringRegion[] otherInputs =
            {
                input, RegionSession.CreateStringRegion("Leonard Robledo NA"),
                RegionSession.CreateStringRegion("Margaret Cook 320")
            };
            foreach (ProgramNode programNode in topKPrograms)
            {
                Console.WriteLine("Program {0}:", ++i);
                var program = new RegionProgram(programNode, ReferenceKind.Parent);
                foreach (StringRegion str in otherInputs)
                {
                    StringRegion r = program.Run(str);
                    Console.WriteLine(r == null ? "null" : r.Value);
                }
            }
        }
        public static async Task <StructureExtractor> TrainExtractorAsync(IEnumerable <Tuple <string, uint, uint> > examples, IEnumerable <string> noneLabeledExamples = null)
        {
            if (null == examples || !examples.Any())
            {
                throw new AggregateException($"{nameof(examples)} must not be null or empty");
            }

            var regionSession = new RegionSession();

            foreach (var example in examples)
            {
                var stringRegion = new StringRegion(example.Item1, Semantics.Tokens);
                var field        = stringRegion.Slice(example.Item2, example.Item3);
                regionSession.AddConstraints(new RegionExample(stringRegion, field));
            }

            if (noneLabeledExamples?.Any() == true)
            {
                regionSession.AddInputs(noneLabeledExamples.Select(e => new StringRegion(e, Semantics.Tokens)));
            }


            var program = await regionSession.LearnAsync();

            if (null == program)
            {
                throw new Exception("No program found");
            }

            return(new StructureExtractor(program));
        }
        /// <summary>
        ///     Learns a program to extract a single region using two examples in two different files.
        ///     Learning from different files is similar to learning with multiple examples from a single file.
        ///     Demonstrates how to learn with examples from different files.
        /// </summary>
        private static void LearnRegionUsingMultipleFiles()
        {
            var          session = new RegionSession();
            StringRegion input1  = RegionSession.CreateStringRegion("Carrie Dodson 100");
            StringRegion input2  = RegionSession.CreateStringRegion("Leonard Robledo 75");

            session.AddConstraints(
                new RegionExample(input1, input1.Slice(7, 13)), // "Carrie Dodson 100" => "Dodson"
                new RegionExample(input2, input2.Slice(8, 15))  // "Leonard Robledo 75" => "Robledo"
                );

            RegionProgram topRankedProg = session.Learn();

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            StringRegion testInput = RegionSession.CreateStringRegion("Margaret Cook 320"); // expect "Cook"
            StringRegion output    = topRankedProg.Run(testInput);

            if (output == null)
            {
                Console.Error.WriteLine("Error: Extracting fails!");
                return;
            }
            Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output);
        }
示例#11
0
        /// <summary>
        ///     Learns a program to extract a region and provides other references to help find the intended program.
        ///     Demonstrates the use of additional references.
        /// </summary>
        private static void LearnRegionWithAdditionalReferences()
        {
            var input = StringRegion.Create("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook ***");

            StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };

            // Suppose we want to extract "100", "75", and "***".
            var positiveExamples = new[] {
                new ExtractionExample <StringRegion>(records[0], records[0].Slice(14, 17)) // "Carrie Dodson 100" => "100"
            };
            var negativeExamples = Enumerable.Empty <ExtractionExample <StringRegion> >();

            // Additional references help Extraction.Text observe the behavior of the learnt programs on unseen data.
            // In this example, if we do not use additional references, Extraction.Text may learn a program that extracts the first number.
            // On the contrary, if other references are present, it knows that this program is not applicable on the third record "Margaret Cook ***",
            // and promotes a more applicable program.
            Program topRankedProg = Learner.Instance.LearnRegion(positiveExamples, negativeExamples, records.Skip(1));

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            foreach (var r in topRankedProg.Run(records))
            {
                var output = r.Output != null ? r.Output.Value : "null";
                Console.WriteLine("\"{0}\" => \"{1}\"", r.Reference, output);
            }
        }
        /// <summary>
        ///     Learns a program to extract a single region from a file.
        /// </summary>
        private static void LearnRegion()
        {
            var          session = new RegionSession();
            StringRegion input   = RegionSession.CreateStringRegion("Carrie Dodson 100");

            // Only one example because we extract one region from one file.
            // Position specifies the location between two characters in the file. It starts at 0 (the beginning of the file).
            // An example is identified by a pair of start and end positions.
            session.AddConstraints(new RegionExample(input, input.Slice(7, 13))); // "Carrie Dodson 100" => "Dodson"

            RegionProgram topRankedProg = session.Learn();

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            StringRegion testInput = RegionSession.CreateStringRegion("Leonard Robledo 75"); // expect "Robledo"
            StringRegion output    = topRankedProg.Run(testInput);

            if (output == null)
            {
                Console.Error.WriteLine("Error: Extracting fails!");
                return;
            }
            Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output);
        }
示例#13
0
        /// <summary>
        ///     Learns a program to extract a single region from a file.
        /// </summary>
        private static void LearnRegion()
        {
            var input = StringRegion.Create("Carrie Dodson 100");

            // Only one example because we extract one region from one file.
            // Position specifies the location between two characters in the file. It starts at 0 (the beginning of the file).
            // An example is identified by a pair of start and end positions.
            var positiveExamples = new[] {
                new ExtractionExample <StringRegion>(input, input.Slice(7, 13)) // "Carrie Dodson 100" => "Dodson"
            };
            var negativeExamples = Enumerable.Empty <ExtractionExample <StringRegion> >();

            Program topRankedProg = Learner.Instance.LearnRegion(positiveExamples, negativeExamples);

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            var testInput = StringRegion.Create("Leonard Robledo 75"); // expect "Robledo"
            IEnumerable <StringRegion> run = topRankedProg.Run(testInput);
            // Retrieve the first element because this is a region textProgram
            var output = run.FirstOrDefault();

            if (output == null)
            {
                Console.Error.WriteLine("Error: Extracting fails!");
                return;
            }
            Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output);
        }
示例#14
0
        /// <summary>
        ///     Learns a program to extract a single region from a file.
        /// </summary>
        private static void LearnRegion()
        {
            var input = RegionLearner.CreateStringRegion("Carrie Dodson 100");

            // Only one example because we extract one region from one file.
            // Position specifies the location between two characters in the file. It starts at 0 (the beginning of the file).
            // An example is identified by a pair of start and end positions.
            var examples = new[] {
                new CorrespondingMemberEquals <StringRegion, StringRegion>(input, input.Slice(7, 13)) // "Carrie Dodson 100" => "Dodson"
            };

            RegionProgram topRankedProg = RegionLearner.Instance.Learn(examples);

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            var          testInput = RegionLearner.CreateStringRegion("Leonard Robledo 75"); // expect "Robledo"
            StringRegion output    = topRankedProg.Run(testInput);

            if (output == null)
            {
                Console.Error.WriteLine("Error: Extracting fails!");
                return;
            }
            Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output);
        }
示例#15
0
        /// <summary>
        ///     Learns a program to extract a single region using two examples in two different files.
        ///     Learning from different files is similar to learning with multiple examples from a single file.
        ///     Demonstrates how to learn with examples from different files.
        /// </summary>
        private static void LearnRegionUsingMultipleFiles()
        {
            var input1 = RegionLearner.CreateStringRegion("Carrie Dodson 100");
            var input2 = RegionLearner.CreateStringRegion("Leonard Robledo 75");

            var examples = new[] {
                new CorrespondingMemberEquals <StringRegion, StringRegion>(input1, input1.Slice(7, 13)), // "Carrie Dodson 100" => "Dodson"
                new CorrespondingMemberEquals <StringRegion, StringRegion>(input2, input2.Slice(8, 15))  // "Leonard Robledo 75" => "Robledo"
            };

            RegionProgram topRankedProg = RegionLearner.Instance.Learn(examples);

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            var          testInput = RegionLearner.CreateStringRegion("Margaret Cook 320"); // expect "Cook"
            StringRegion output    = topRankedProg.Run(testInput);

            if (output == null)
            {
                Console.Error.WriteLine("Error: Extracting fails!");
                return;
            }
            Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output);
        }
示例#16
0
        /// <summary>
        ///     Learns to serialize and deserialize Extraction.Text program.
        /// </summary>
        private static void SerializeProgram()
        {
            var input = RegionLearner.CreateStringRegion("Carrie Dodson 100");

            var examples = new[] {
                new CorrespondingMemberEquals <StringRegion, StringRegion>(input, input.Slice(7, 13)) // "Carrie Dodson 100" => "Dodson"
            };

            RegionProgram topRankedProg = RegionLearner.Instance.Learn(examples);

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            string        serializedProgram   = topRankedProg.Serialize();
            RegionProgram deserializedProgram = Loader.Instance.Region.Load(serializedProgram);
            var           testInput           = RegionLearner.CreateStringRegion("Leonard Robledo 75"); // expect "Robledo"
            StringRegion  output = deserializedProgram.Run(testInput);

            if (output == null)
            {
                Console.Error.WriteLine("Error: Extracting fails!");
                return;
            }
            Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output);
        }
        /// <summary>
        ///     Learns a program to extract a region and provides other references to help find the intended program.
        ///     Demonstrates the use of additional references.
        /// </summary>
        private static void LearnRegionWithAdditionalReferences()
        {
            var          session = new RegionSession();
            StringRegion input   =
                RegionSession.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook ***");

            StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };

            // Suppose we want to extract "100", "75", and "***".
            session.AddConstraints(new RegionExample(records[0], records[0].Slice(14, 17)));
            // "Carrie Dodson 100" => "100"

            // Additional references help Extraction.Text observe the behavior of the learnt programs on unseen data.
            // In this example, if we do not use additional references, Extraction.Text may learn a program that extracts the first number.
            // On the contrary, if other references are present, it knows that this program is not applicable on the third record "Margaret Cook ***",
            // and promotes a more applicable program.
            session.AddInputs(records.Skip(1));

            RegionProgram topRankedProg = session.Learn();

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            foreach (StringRegion record in records)
            {
                string output = topRankedProg.Run(record)?.Value ?? "null";
                Console.WriteLine("\"{0}\" => \"{1}\"", record, output);
            }
        }
        /// <summary>
        ///     Learns a program to extract a single region using another region that appears after it as reference (i.e.,
        ///     succeeding sibling region).
        ///     Demonstrates how sibling referencing works.
        /// </summary>
        private static void LearnRegionReferencingSucceedingSibling()
        {
            var          session = new RegionSession();
            StringRegion input   =
                RegionSession.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook 320");

            StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };
            StringRegion[] numbers = { input.Slice(14, 17), input.Slice(34, 36), input.Slice(51, 54) };

            // Suppose we want to extract the first name w.r.t the number
            session.AddConstraints(
                new RegionExample(numbers[0], records[0].Slice(0, 6)),  // "Carrie" => "100"
                new RegionExample(numbers[1], records[1].Slice(18, 25)) // "Leonard" => "75"
                );

            RegionProgram topRankedProg = session.Learn();

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            foreach (StringRegion number in numbers)
            {
                string output = topRankedProg.Run(number)?.Value ?? "null";
                Console.WriteLine("\"{0}\" => \"{1}\"", number, output);
            }
        }
示例#19
0
        /// <summary>
        ///     Learns a program to extract a sequence of regions using its preceding sibling as reference.
        /// </summary>
        private static void LearnSequence()
        {
            // It is advised to learn a sequence with at least 2 examples because generalizing a sequence from a single element is hard.
            // Also, we need to give positive examples continuously (i.e., we cannot skip any example).
            var input = StringRegion.Create("United States\nCarrie Dodson 100\nLeonard Robledo 75\nMargaret Cook 320\n" +
                                            "Canada\nConcetta Beck 350\nNicholas Sayers 90\nFrancis Terrill 2430\n" +
                                            "Great Britain\nNettie Pope 50\nMack Beeson 1070");
            // Suppose we want to extract all last names from the input string.
            var positiveExamples = new[] {
                new ExtractionExample <StringRegion>(input, input.Slice(14, 20)), // input => "Carrie"
                new ExtractionExample <StringRegion>(input, input.Slice(32, 39))  // input => "Leonard"
            };
            var negativeExamples = Enumerable.Empty <ExtractionExample <StringRegion> >();

            Program topRankedProg = Learner.Instance.LearnSequence(positiveExamples, negativeExamples);

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            foreach (var r in topRankedProg.Run(input))
            {
                var output = r != null ? r.Value : "null";
                Console.WriteLine(output);
            }
        }
        /// <summary>
        ///     Learns a program to extract a region with both positive and negative examples.
        ///     Demonstrates the use of negative examples.
        /// </summary>
        private static void LearnRegionWithNegativeExamples()
        {
            var          session = new RegionSession();
            StringRegion input   =
                RegionSession.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo NA\nMargaret Cook 320");

            StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };

            // Suppose we want to extract "100", "320".
            session.AddConstraints(
                new RegionExample(records[0], records[0].Slice(14, 17)), // "Carrie Dodson 100" => "100"
                new RegionNegativeExample(records[1], records[1])        // no extraction in "Leonard Robledo NA"
                );

            // Extraction.Text will find a program whose output does not OVERLAP with any of the negative examples.
            RegionProgram topRankedProg = session.Learn();

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            foreach (StringRegion record in records)
            {
                string output = topRankedProg.Run(record)?.Value ?? "null";
                Console.WriteLine("\"{0}\" => \"{1}\"", record, output);
            }
        }
示例#21
0
        /// <summary>
        ///     Learns a program to extract a sequence of regions from a file.
        /// </summary>
        private static void LearnSequenceReferencingSibling()
        {
            var input = StringRegion.Create("United States\nCarrie Dodson 100\nLeonard Robledo 75\nMargaret Cook 320\n" +
                                            "Canada\nConcetta Beck 350\nNicholas Sayers 90\nFrancis Terrill 2430\n" +
                                            "Great Britain\nNettie Pope 50\nMack Beeson 1070");

            StringRegion[] countries = { input.Slice(0, 13), input.Slice(69, 75), input.Slice(134, 147) };

            // Suppose we want to extract all last names from the input string.
            var positiveExamples = new[] {
                new ExtractionExample <StringRegion>(countries[0], input.Slice(14, 20)), // "United States" => "Carrie"
                new ExtractionExample <StringRegion>(countries[0], input.Slice(32, 39)), // "United States" => "Leonard"
            };
            var negativeExamples = Enumerable.Empty <ExtractionExample <StringRegion> >();

            Program topRankedProg = Learner.Instance.LearnSequence(positiveExamples, negativeExamples);

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            foreach (var r in topRankedProg.Run(countries))
            {
                var output = r.Output != null ? r.Output.Value : "null";
                Console.WriteLine("\"{0}\" => \"{1}\"", r.Reference, output);
            }
        }
示例#22
0
        /// <summary>
        ///     Learns a program to extract a region with both positive and negative examples.
        ///     Demonstrates the use of negative examples.
        /// </summary>
        private static void LearnRegionWithNegativeExamples()
        {
            var input = StringRegion.Create("Carrie Dodson 100\nLeonard Robledo NA\nMargaret Cook 320");

            StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };

            // Suppose we want to extract "100", "320".
            var positiveExamples = new[] {
                new ExtractionExample <StringRegion>(records[0], records[0].Slice(14, 17)) // "Carrie Dodson 100" => "100"
            };
            var negativeExamples = new[] {
                new ExtractionExample <StringRegion>(records[1], records[1]) // no extraction in "Leonard Robledo NA"
            };

            // Extraction.Text will find a program whose output does not OVERLAP with any of the negative examples.
            Program topRankedProg = Learner.Instance.LearnRegion(positiveExamples, negativeExamples);

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            foreach (var r in topRankedProg.Run(records))
            {
                var output = r.Output != null ? r.Output.Value : "null";
                Console.WriteLine("\"{0}\" => \"{1}\"", r.Reference, output);
            }
        }
示例#23
0
        /// <summary>
        ///     Learns a program to extract a single region using two examples in two different files.
        ///     Learning from different files is similar to learning with multiple examples from a single file.
        ///     Demonstrates how to learn with examples from different files.
        /// </summary>
        private static void LearnRegionUsingMultipleFiles()
        {
            var input1 = StringRegion.Create("Carrie Dodson 100");
            var input2 = StringRegion.Create("Leonard Robledo 75");

            var positiveExamples = new[] {
                new ExtractionExample <StringRegion>(input1, input1.Slice(7, 13)), // "Carrie Dodson 100" => "Dodson"
                new ExtractionExample <StringRegion>(input2, input2.Slice(8, 15))  // "Leonard Robledo 75" => "Robledo"
            };
            var negativeExamples = Enumerable.Empty <ExtractionExample <StringRegion> >();

            Program topRankedProg = Learner.Instance.LearnRegion(positiveExamples, negativeExamples);

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            var testInput = StringRegion.Create("Margaret Cook 320"); // expect "Cook"
            IEnumerable <StringRegion> run = topRankedProg.Run(testInput);
            // Retrieve the first element because this is a region textProgram
            var output = run.FirstOrDefault();

            if (output == null)
            {
                Console.Error.WriteLine("Error: Extracting fails!");
                return;
            }
            Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output);
        }
示例#24
0
        /// <summary>
        ///     Learns to serialize and deserialize Extraction.Text program.
        /// </summary>
        private static void SerializeProgram()
        {
            var input = StringRegion.Create("Carrie Dodson 100");

            var positiveExamples = new[] {
                new ExtractionExample <StringRegion>(input, input.Slice(7, 13)) // "Carrie Dodson 100" => "Dodson"
            };
            var negativeExamples = Enumerable.Empty <ExtractionExample <StringRegion> >();

            Program topRankedProg = Learner.Instance.LearnRegion(positiveExamples, negativeExamples);

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            string  serializedProgram      = topRankedProg.Serialize();
            Program deserializedProgram    = Program.Load(serializedProgram);
            var     testInput              = StringRegion.Create("Leonard Robledo 75"); // expect "Robledo"
            IEnumerable <StringRegion> run = deserializedProgram.Run(testInput);
            // Retrieve the first element because this is a region textProgram
            var output = run.FirstOrDefault();

            if (output == null)
            {
                Console.Error.WriteLine("Error: Extracting fails!");
                return;
            }
            Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output);
        }
示例#25
0
 public static uint?AbsPos(StringRegion v, int k)
 {
     if (Math.Abs(k) > v.Length + 1)
     {
         return(null);
     }
     return((uint)(k > 0 ? (v.Start + k - 1) : (v.End + k + 1)));
 }
示例#26
0
        public static StringRegion SubStr(StringRegion v, Tuple <uint?, uint?> posPair)
        {
            uint?start = posPair.Item1, end = posPair.Item2;

            if (start == null || end == null || start < v.Start || start > v.End || end < v.Start || end > v.End)
            {
                return(null);
            }
            return(v.Slice((uint)start, (uint)end));
        }
示例#27
0
        /// <summary>
        ///     Run the program on a given input
        /// </summary>
        /// <param name="input">The input</param>
        /// <returns></returns>
        public string Run(string input)
        {
            Grammar grammar    = FlashFillGrammar.Grammar;
            State   inputState = new FlashFillInput(input).AsState();

            // Same as above without using the FlashFillInput class:
            inputState = State.Create(grammar.InputSymbol, new[] { StringRegion.Create(input) });
            var result = (StringRegion)ProgramNode.Invoke(inputState);

            return(result == null ? null : result.Value);
        }
示例#28
0
        public static StringRegion SubStr(StringRegion v, Record <uint?, uint?>?posPair)
        {
            uint?start = posPair.Value.Item1;
            uint?end   = posPair.Value.Item2;

            if (start == null || end == null || start < v.Start || start > v.End || end < v.Start || end > v.End)
            {
                return(null);
            }
            return(v.Slice((uint)start, (uint)end));
        }
示例#29
0
        public static StringRegion RegionFromFile(string path)
        {
            if (fileCache.ContainsKey(path))
            {
                return(fileCache[path]);
            }
            string       text   = Util.NormalizeBuildLogString(File.ReadAllText(path));
            StringRegion region = RegionSession.CreateStringRegion(text);

            fileCache[path] = region;
            return(region);
        }
示例#30
0
        /// <summary>
        /// Analyze the file in <param name="inputPath"></param> using a program learned new from the currently present exampleset.
        /// </summary>
        /// <returns>The extraction result.</returns>
        public override AnalysisResult <string> Analyze(string inputPath, AnalysisResult <string> result, bool verbose)
        {
            ConsolePrinter consolePrinter = new ConsolePrinter(verbose);

            var inputRegion = AnalysisUtil.RegionFromFile(inputPath);

            result.TestInputLineCount = AnalysisUtil.BuildlogLineCount(inputPath);

            consolePrinter.WriteLine("Starting to learn program");
            Stopwatch learningStopwatch = Stopwatch.StartNew();

            RegionProgram topRankedProgram = session.Learn();

            learningStopwatch.Stop();
            consolePrinter.WriteLine("Learning took " + learningStopwatch.Elapsed);
            result.LearningDuration = learningStopwatch.Elapsed;
            result.TestInputPath    = inputPath;

            if (topRankedProgram == null)
            {
                consolePrinter.WriteLine("no program found");
                result.Successful = false;
                result.Output     = "no program found";
                return(result);
            }

            consolePrinter.WriteLine("Learned Program:");
            consolePrinter.WriteLine(topRankedProgram);
            consolePrinter.WriteLine("");
            result.LearnedProgram = topRankedProgram.ToString();

            consolePrinter.WriteLine("Starting to apply program");
            Stopwatch applyingStopwatch = Stopwatch.StartNew();

            StringRegion output = topRankedProgram.Run(inputRegion);

            applyingStopwatch.Stop();
            consolePrinter.WriteLine("Applying took " + applyingStopwatch.Elapsed);
            result.ApplicationDuration = applyingStopwatch.Elapsed;

            if (output == null)
            {
                result.Successful = false;
                result.Output     = "no extraction found for this input";
                return(result);
            }

            result.Successful = true;
            result.Output     = output?.Value;
            return(result);
        }
示例#31
0
 public static IEnumerable<StringRegion> SplitLines(StringRegion document)
 {
     Token lineBreak = StringLearningCache.GetStaticTokenByName(Token.LineSeparatorName);
     CachedList lineBreakPositions;
     if (!document.Cache.TryGetMatchPositionsFor(lineBreak, out lineBreakPositions))
         return new[] { document };
     var lines = new List<StringRegion>();
     for (int i = 0; i < lineBreakPositions.Count - 1; i++)
     {
         if (lineBreakPositions[i + 1].Length == 0) continue;
         lines.Add(document.Slice(lineBreakPositions[i].Right, lineBreakPositions[i + 1].Position));
     }
     return lines;
 }
        public static List <StringRegion> LoadBenchmark(string filename, out StringRegion document)
        {
            string content = File.ReadAllText(filename);

            Match[] examples = ExampleRegex.Matches(content).Cast <Match>().ToArray();
            document = RegionLearner.CreateStringRegion(content.Replace("}", "").Replace("{", ""));
            var result = new List <StringRegion>();

            for (int i = 0, shift = -1; i < examples.Length; i++, shift -= 2)
            {
                int start = shift + examples[i].Index;
                int end   = start + examples[i].Length;
                result.Add(document.Slice((uint)start, (uint)end));
            }
            return(result);
        }
示例#33
0
 public static uint? AbsPos(StringRegion v, int k)
 {
     if (Math.Abs(k) > v.Length + 1) return null;
     return (uint) (k > 0 ? (v.Start + k - 1) : (v.End + k + 1));
 }
示例#34
0
 public static List<StringRegion> LoadBenchmark(string filename, out StringRegion document)
 {
     string content = File.ReadAllText(filename);
     Match[] examples = ExampleRegex.Matches(content).Cast<Match>().ToArray();
     document = StringRegion.Create(content.Replace("}", "").Replace("{", ""));
     var result = new List<StringRegion>();
     for (int i = 0, shift = -1; i < examples.Length; i++, shift -= 2)
     {
         int start = shift + examples[i].Index;
         int end = start + examples[i].Length;
         result.Add(document.Slice((uint) start, (uint) end));
     }
     return result;
 }
示例#35
0
 public static uint? RegPos(StringRegion v, Tuple<RegularExpression, RegularExpression> rr, int k)
 {
     List<PositionMatch> ms = rr.Item1.Run(v).Where(m => rr.Item2.MatchesAt(v, m.Right)).ToList();
     int index = k > 0 ? (k - 1) : (ms.Count + k);
     return index < 0 || index >= ms.Count ? null : (uint?) ms[index].Right;
 }