Beispiel #1
0
        /// <summary>
        ///     Learns a program to extract a single region using two examples in two different files.
        ///     Learning from different files is similar to learning with multiple examples from a single file.
        ///     Demonstrates how to learn with examples from different files.
        /// </summary>
        private static void LearnRegionUsingMultipleFiles()
        {
            var input1 = RegionLearner.CreateStringRegion("Carrie Dodson 100");
            var input2 = RegionLearner.CreateStringRegion("Leonard Robledo 75");

            var examples = new[] {
                new CorrespondingMemberEquals <StringRegion, StringRegion>(input1, input1.Slice(7, 13)), // "Carrie Dodson 100" => "Dodson"
                new CorrespondingMemberEquals <StringRegion, StringRegion>(input2, input2.Slice(8, 15))  // "Leonard Robledo 75" => "Robledo"
            };

            RegionProgram topRankedProg = RegionLearner.Instance.Learn(examples);

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            var          testInput = RegionLearner.CreateStringRegion("Margaret Cook 320"); // expect "Cook"
            StringRegion output    = topRankedProg.Run(testInput);

            if (output == null)
            {
                Console.Error.WriteLine("Error: Extracting fails!");
                return;
            }
            Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output);
        }
Beispiel #2
0
        /// <summary>
        ///     Learns to serialize and deserialize Extraction.Text program.
        /// </summary>
        private static void SerializeProgram()
        {
            var input = RegionLearner.CreateStringRegion("Carrie Dodson 100");

            var examples = new[] {
                new CorrespondingMemberEquals <StringRegion, StringRegion>(input, input.Slice(7, 13)) // "Carrie Dodson 100" => "Dodson"
            };

            RegionProgram topRankedProg = RegionLearner.Instance.Learn(examples);

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            string        serializedProgram   = topRankedProg.Serialize();
            RegionProgram deserializedProgram = Loader.Instance.Region.Load(serializedProgram);
            var           testInput           = RegionLearner.CreateStringRegion("Leonard Robledo 75"); // expect "Robledo"
            StringRegion  output = deserializedProgram.Run(testInput);

            if (output == null)
            {
                Console.Error.WriteLine("Error: Extracting fails!");
                return;
            }
            Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output);
        }
Beispiel #3
0
        /// <summary>
        ///     Learns a program to extract a single region from a file.
        /// </summary>
        private static void LearnRegion()
        {
            var input = RegionLearner.CreateStringRegion("Carrie Dodson 100");

            // Only one example because we extract one region from one file.
            // Position specifies the location between two characters in the file. It starts at 0 (the beginning of the file).
            // An example is identified by a pair of start and end positions.
            var examples = new[] {
                new CorrespondingMemberEquals <StringRegion, StringRegion>(input, input.Slice(7, 13)) // "Carrie Dodson 100" => "Dodson"
            };

            RegionProgram topRankedProg = RegionLearner.Instance.Learn(examples);

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            var          testInput = RegionLearner.CreateStringRegion("Leonard Robledo 75"); // expect "Robledo"
            StringRegion output    = topRankedProg.Run(testInput);

            if (output == null)
            {
                Console.Error.WriteLine("Error: Extracting fails!");
                return;
            }
            Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output);
        }
Beispiel #4
0
        /// <summary>
        ///     Learns top-ranked 3 region programs.
        ///     Demonstrates access to lower-ranked programs.
        /// </summary>
        private static void LearnTop3RegionPrograms()
        {
            var input = RegionLearner.CreateStringRegion("Carrie Dodson 100");

            var examples = new[] {
                new CorrespondingMemberEquals <StringRegion, StringRegion>(input, input.Slice(14, 17)) // "Carrie Dodson 100" => "Dodson"
            };

            IEnumerable <RegionProgram> topKPrograms = RegionLearner.Instance.LearnTopK(examples, 3);

            var i = 0;

            StringRegion[] otherInputs =
            {
                input, RegionLearner.CreateStringRegion("Leonard Robledo NA"),
                RegionLearner.CreateStringRegion("Margaret Cook 320")
            };
            foreach (var prog in topKPrograms)
            {
                Console.WriteLine("Program {0}:", ++i);
                foreach (var str in otherInputs)
                {
                    var r = prog.Run(str);
                    Console.WriteLine(r != null ? r.Value : "null");
                }
            }
        }
Beispiel #5
0
        /// <summary>
        ///     Learns all region programs that satisfy the examples (advanced feature).
        ///     Demonstrates access to the entire program set.
        /// </summary>
        private static void LearnAllRegionPrograms()
        {
            var input = RegionLearner.CreateStringRegion("Carrie Dodson 100");

            var examples = new[] {
                new CorrespondingMemberEquals <StringRegion, StringRegion>(input, input.Slice(14, 17)) // "Carrie Dodson 100" => "Dodson"
            };

            ProgramSet allPrograms = RegionLearner.Instance.LearnAll(examples);
            IEnumerable <ProgramNode> topKPrograms =
                allPrograms.TopK(RegionLearner.Instance.ScoreFeature, 3); // "Score" is the ranking feature

            var i = 0;

            StringRegion[] otherInputs =
            {
                input, RegionLearner.CreateStringRegion("Leonard Robledo NA"),
                RegionLearner.CreateStringRegion("Margaret Cook 320")
            };
            foreach (var prog in topKPrograms)
            {
                Console.WriteLine("Program {0}:", ++i);
                foreach (var str in otherInputs)
                {
                    State  inputState = State.Create(Language.Grammar.InputSymbol, str); // Create Microsoft.ProgramSynthesis input state
                    object r          = prog.Invoke(inputState);                         // Invoke Microsoft.ProgramSynthesis program node on the input state
                    Console.WriteLine(r != null ? (r as StringRegion).Value : "null");
                }
            }
        }
Beispiel #6
0
        /// <summary>
        ///     Learns a program to extract a single region using another region that appears after it as reference (i.e.,
        ///     succeeding sibling region).
        ///     Demonstrates how sibling referencing works.
        /// </summary>
        private static void LearnRegionReferencingSucceedingSibling()
        {
            var input = RegionLearner.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook 320");

            StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };
            StringRegion[] numbers = { input.Slice(14, 17), input.Slice(34, 36), input.Slice(51, 54) };

            // Suppose we want to extract the first name w.r.t the number
            var examples = new[] {
                new CorrespondingMemberEquals <StringRegion, StringRegion>(numbers[0], records[0].Slice(0, 6)),  // "Carrie" => "100"
                new CorrespondingMemberEquals <StringRegion, StringRegion>(numbers[1], records[1].Slice(18, 25)) // "Leonard" => "75"
            };

            RegionProgram topRankedProg = RegionLearner.Instance.Learn(examples);

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            foreach (StringRegion number in numbers)
            {
                string output = topRankedProg.Run(number)?.Value ?? "null";
                Console.WriteLine("\"{0}\" => \"{1}\"", number, output);
            }
        }
Beispiel #7
0
        /// <summary>
        ///     Learns a program to extract a region and provides other references to help find the intended program.
        ///     Demonstrates the use of additional references.
        /// </summary>
        private static void LearnRegionWithAdditionalReferences()
        {
            var input = RegionLearner.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook ***");

            StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };

            // Suppose we want to extract "100", "75", and "***".
            var examples = new[] {
                new CorrespondingMemberEquals <StringRegion, StringRegion>(records[0], records[0].Slice(14, 17)) // "Carrie Dodson 100" => "100"
            };

            // Additional references help Extraction.Text observe the behavior of the learnt programs on unseen data.
            // In this example, if we do not use additional references, Extraction.Text may learn a program that extracts the first number.
            // On the contrary, if other references are present, it knows that this program is not applicable on the third record "Margaret Cook ***",
            // and promotes a more applicable program.
            RegionProgram topRankedProg = RegionLearner.Instance.Learn(examples, new[] { records.Skip(1) });

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            foreach (StringRegion record in records)
            {
                string output = topRankedProg.Run(record)?.Value ?? "null";
                Console.WriteLine("\"{0}\" => \"{1}\"", record, output);
            }
        }
Beispiel #8
0
        /// <summary>
        ///     Learns a program to extract a region with both positive and negative examples.
        ///     Demonstrates the use of negative examples.
        /// </summary>
        private static void LearnRegionWithNegativeExamples()
        {
            var input = RegionLearner.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo NA\nMargaret Cook 320");

            StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };

            // Suppose we want to extract "100", "320".
            var constraints = new Constraint <IEnumerable <StringRegion>, IEnumerable <StringRegion> >[] {
                new CorrespondingMemberEquals <StringRegion, StringRegion>(records[0], records[0].Slice(14, 17)), // "Carrie Dodson 100" => "100"
                new CorrespondingMemberDoesNotIntersect <StringRegion>(records[1], records[1])                    // no extraction in "Leonard Robledo NA"
            };

            // Extraction.Text will find a program whose output does not OVERLAP with any of the negative examples.
            RegionProgram topRankedProg = RegionLearner.Instance.Learn(constraints);

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            foreach (StringRegion record in records)
            {
                string output = topRankedProg.Run(record)?.Value ?? "null";
                Console.WriteLine("\"{0}\" => \"{1}\"", record, output);
            }
        }
Beispiel #9
0
        private static void TestTextTransformationBenchmark(Grammar grammar, string benchmark, int exampleCount = 2)
        {
            string[] lines = File.ReadAllLines($"benchmarks/{benchmark}.tsv");
            Tuple <string, string>[] data = lines.Select(l =>
            {
                var parts = l.Split(new[] { "\t" }, StringSplitOptions.RemoveEmptyEntries);
                return(Tuple.Create(parts[0], parts[1]));
            }).ToArray();
            var examples =
                data.Take(exampleCount)
                .ToDictionary(
                    t => State.Create(grammar.InputSymbol, RegionLearner.CreateStringRegion(t.Item1)),
                    t => (object)RegionLearner.CreateStringRegion(t.Item2));
            var         spec    = new ExampleSpec(examples);
            ProgramNode program = Learn(grammar, spec,
                                        new Substrings.RankingScore(grammar),
                                        new Substrings.WitnessFunctions(grammar));

            foreach (Tuple <string, string> row in data.Skip(exampleCount))
            {
                State input = State.Create(grammar.InputSymbol,
                                           RegionLearner.CreateStringRegion(row.Item1));
                var output = program.Invoke(input);
                WriteColored(ConsoleColor.DarkCyan, $"{row.Item1} => {output}");
            }
        }
        public static List <StringRegion> LoadBenchmark(string filename, out StringRegion document)
        {
            string content = File.ReadAllText(filename);

            Match[] examples = ExampleRegex.Matches(content).Cast <Match>().ToArray();
            document = RegionLearner.CreateStringRegion(content.Replace("}", "").Replace("{", ""));
            var result = new List <StringRegion>();

            for (int i = 0, shift = -1; i < examples.Length; i++, shift -= 2)
            {
                int start = shift + examples[i].Index;
                int end   = start + examples[i].Length;
                result.Add(document.Slice((uint)start, (uint)end));
            }
            return(result);
        }
Beispiel #11
0
        private static void LoadAndTestSubstrings()
        {
            var grammar = LoadGrammar("ProseSample.Substrings.grammar");

            if (grammar == null)
            {
                return;
            }

            ProgramNode p = ProgramNode.Parse(@"SubStr(v, PosPair(AbsPos(v, -4), AbsPos(v, -1)))",
                                              grammar, ASTSerializationFormat.HumanReadable);
            StringRegion data  = RegionLearner.CreateStringRegion("Microsoft PROSE SDK");
            State        input = State.Create(grammar.InputSymbol, data);

            Console.WriteLine(p.Invoke(input));

            StringRegion sdk  = data.Slice(data.End - 3, data.End);
            Spec         spec = ShouldConvert.Given(grammar).To(data, sdk);

            Learn(grammar, spec,
                  new Substrings.RankingScore(grammar), new Substrings.WitnessFunctions(grammar));

            TestTextTransformationBenchmark(grammar, "emails");
        }