/// <summary> /// Learns a program to extract a single region using two examples in two different files. /// Learning from different files is similar to learning with multiple examples from a single file. /// Demonstrates how to learn with examples from different files. /// </summary> private static void LearnRegionUsingMultipleFiles() { var input1 = RegionLearner.CreateStringRegion("Carrie Dodson 100"); var input2 = RegionLearner.CreateStringRegion("Leonard Robledo 75"); var examples = new[] { new CorrespondingMemberEquals <StringRegion, StringRegion>(input1, input1.Slice(7, 13)), // "Carrie Dodson 100" => "Dodson" new CorrespondingMemberEquals <StringRegion, StringRegion>(input2, input2.Slice(8, 15)) // "Leonard Robledo 75" => "Robledo" }; RegionProgram topRankedProg = RegionLearner.Instance.Learn(examples); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } var testInput = RegionLearner.CreateStringRegion("Margaret Cook 320"); // expect "Cook" StringRegion output = topRankedProg.Run(testInput); if (output == null) { Console.Error.WriteLine("Error: Extracting fails!"); return; } Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output); }
/// <summary> /// Learns to serialize and deserialize Extraction.Text program. /// </summary> private static void SerializeProgram() { var input = RegionLearner.CreateStringRegion("Carrie Dodson 100"); var examples = new[] { new CorrespondingMemberEquals <StringRegion, StringRegion>(input, input.Slice(7, 13)) // "Carrie Dodson 100" => "Dodson" }; RegionProgram topRankedProg = RegionLearner.Instance.Learn(examples); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } string serializedProgram = topRankedProg.Serialize(); RegionProgram deserializedProgram = Loader.Instance.Region.Load(serializedProgram); var testInput = RegionLearner.CreateStringRegion("Leonard Robledo 75"); // expect "Robledo" StringRegion output = deserializedProgram.Run(testInput); if (output == null) { Console.Error.WriteLine("Error: Extracting fails!"); return; } Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output); }
/// <summary> /// Learns a program to extract a single region from a file. /// </summary> private static void LearnRegion() { var input = RegionLearner.CreateStringRegion("Carrie Dodson 100"); // Only one example because we extract one region from one file. // Position specifies the location between two characters in the file. It starts at 0 (the beginning of the file). // An example is identified by a pair of start and end positions. var examples = new[] { new CorrespondingMemberEquals <StringRegion, StringRegion>(input, input.Slice(7, 13)) // "Carrie Dodson 100" => "Dodson" }; RegionProgram topRankedProg = RegionLearner.Instance.Learn(examples); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } var testInput = RegionLearner.CreateStringRegion("Leonard Robledo 75"); // expect "Robledo" StringRegion output = topRankedProg.Run(testInput); if (output == null) { Console.Error.WriteLine("Error: Extracting fails!"); return; } Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output); }
/// <summary> /// Learns top-ranked 3 region programs. /// Demonstrates access to lower-ranked programs. /// </summary> private static void LearnTop3RegionPrograms() { var input = RegionLearner.CreateStringRegion("Carrie Dodson 100"); var examples = new[] { new CorrespondingMemberEquals <StringRegion, StringRegion>(input, input.Slice(14, 17)) // "Carrie Dodson 100" => "Dodson" }; IEnumerable <RegionProgram> topKPrograms = RegionLearner.Instance.LearnTopK(examples, 3); var i = 0; StringRegion[] otherInputs = { input, RegionLearner.CreateStringRegion("Leonard Robledo NA"), RegionLearner.CreateStringRegion("Margaret Cook 320") }; foreach (var prog in topKPrograms) { Console.WriteLine("Program {0}:", ++i); foreach (var str in otherInputs) { var r = prog.Run(str); Console.WriteLine(r != null ? r.Value : "null"); } } }
/// <summary> /// Learns all region programs that satisfy the examples (advanced feature). /// Demonstrates access to the entire program set. /// </summary> private static void LearnAllRegionPrograms() { var input = RegionLearner.CreateStringRegion("Carrie Dodson 100"); var examples = new[] { new CorrespondingMemberEquals <StringRegion, StringRegion>(input, input.Slice(14, 17)) // "Carrie Dodson 100" => "Dodson" }; ProgramSet allPrograms = RegionLearner.Instance.LearnAll(examples); IEnumerable <ProgramNode> topKPrograms = allPrograms.TopK(RegionLearner.Instance.ScoreFeature, 3); // "Score" is the ranking feature var i = 0; StringRegion[] otherInputs = { input, RegionLearner.CreateStringRegion("Leonard Robledo NA"), RegionLearner.CreateStringRegion("Margaret Cook 320") }; foreach (var prog in topKPrograms) { Console.WriteLine("Program {0}:", ++i); foreach (var str in otherInputs) { State inputState = State.Create(Language.Grammar.InputSymbol, str); // Create Microsoft.ProgramSynthesis input state object r = prog.Invoke(inputState); // Invoke Microsoft.ProgramSynthesis program node on the input state Console.WriteLine(r != null ? (r as StringRegion).Value : "null"); } } }
/// <summary> /// Learns a program to extract a single region using another region that appears after it as reference (i.e., /// succeeding sibling region). /// Demonstrates how sibling referencing works. /// </summary> private static void LearnRegionReferencingSucceedingSibling() { var input = RegionLearner.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook 320"); StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) }; StringRegion[] numbers = { input.Slice(14, 17), input.Slice(34, 36), input.Slice(51, 54) }; // Suppose we want to extract the first name w.r.t the number var examples = new[] { new CorrespondingMemberEquals <StringRegion, StringRegion>(numbers[0], records[0].Slice(0, 6)), // "Carrie" => "100" new CorrespondingMemberEquals <StringRegion, StringRegion>(numbers[1], records[1].Slice(18, 25)) // "Leonard" => "75" }; RegionProgram topRankedProg = RegionLearner.Instance.Learn(examples); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } foreach (StringRegion number in numbers) { string output = topRankedProg.Run(number)?.Value ?? "null"; Console.WriteLine("\"{0}\" => \"{1}\"", number, output); } }
/// <summary> /// Learns a program to extract a region and provides other references to help find the intended program. /// Demonstrates the use of additional references. /// </summary> private static void LearnRegionWithAdditionalReferences() { var input = RegionLearner.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook ***"); StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) }; // Suppose we want to extract "100", "75", and "***". var examples = new[] { new CorrespondingMemberEquals <StringRegion, StringRegion>(records[0], records[0].Slice(14, 17)) // "Carrie Dodson 100" => "100" }; // Additional references help Extraction.Text observe the behavior of the learnt programs on unseen data. // In this example, if we do not use additional references, Extraction.Text may learn a program that extracts the first number. // On the contrary, if other references are present, it knows that this program is not applicable on the third record "Margaret Cook ***", // and promotes a more applicable program. RegionProgram topRankedProg = RegionLearner.Instance.Learn(examples, new[] { records.Skip(1) }); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } foreach (StringRegion record in records) { string output = topRankedProg.Run(record)?.Value ?? "null"; Console.WriteLine("\"{0}\" => \"{1}\"", record, output); } }
/// <summary> /// Learns a program to extract a region with both positive and negative examples. /// Demonstrates the use of negative examples. /// </summary> private static void LearnRegionWithNegativeExamples() { var input = RegionLearner.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo NA\nMargaret Cook 320"); StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) }; // Suppose we want to extract "100", "320". var constraints = new Constraint <IEnumerable <StringRegion>, IEnumerable <StringRegion> >[] { new CorrespondingMemberEquals <StringRegion, StringRegion>(records[0], records[0].Slice(14, 17)), // "Carrie Dodson 100" => "100" new CorrespondingMemberDoesNotIntersect <StringRegion>(records[1], records[1]) // no extraction in "Leonard Robledo NA" }; // Extraction.Text will find a program whose output does not OVERLAP with any of the negative examples. RegionProgram topRankedProg = RegionLearner.Instance.Learn(constraints); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } foreach (StringRegion record in records) { string output = topRankedProg.Run(record)?.Value ?? "null"; Console.WriteLine("\"{0}\" => \"{1}\"", record, output); } }
private static void TestTextTransformationBenchmark(Grammar grammar, string benchmark, int exampleCount = 2) { string[] lines = File.ReadAllLines($"benchmarks/{benchmark}.tsv"); Tuple <string, string>[] data = lines.Select(l => { var parts = l.Split(new[] { "\t" }, StringSplitOptions.RemoveEmptyEntries); return(Tuple.Create(parts[0], parts[1])); }).ToArray(); var examples = data.Take(exampleCount) .ToDictionary( t => State.Create(grammar.InputSymbol, RegionLearner.CreateStringRegion(t.Item1)), t => (object)RegionLearner.CreateStringRegion(t.Item2)); var spec = new ExampleSpec(examples); ProgramNode program = Learn(grammar, spec, new Substrings.RankingScore(grammar), new Substrings.WitnessFunctions(grammar)); foreach (Tuple <string, string> row in data.Skip(exampleCount)) { State input = State.Create(grammar.InputSymbol, RegionLearner.CreateStringRegion(row.Item1)); var output = program.Invoke(input); WriteColored(ConsoleColor.DarkCyan, $"{row.Item1} => {output}"); } }
public static List <StringRegion> LoadBenchmark(string filename, out StringRegion document) { string content = File.ReadAllText(filename); Match[] examples = ExampleRegex.Matches(content).Cast <Match>().ToArray(); document = RegionLearner.CreateStringRegion(content.Replace("}", "").Replace("{", "")); var result = new List <StringRegion>(); for (int i = 0, shift = -1; i < examples.Length; i++, shift -= 2) { int start = shift + examples[i].Index; int end = start + examples[i].Length; result.Add(document.Slice((uint)start, (uint)end)); } return(result); }
private static void LoadAndTestSubstrings() { var grammar = LoadGrammar("ProseSample.Substrings.grammar"); if (grammar == null) { return; } ProgramNode p = ProgramNode.Parse(@"SubStr(v, PosPair(AbsPos(v, -4), AbsPos(v, -1)))", grammar, ASTSerializationFormat.HumanReadable); StringRegion data = RegionLearner.CreateStringRegion("Microsoft PROSE SDK"); State input = State.Create(grammar.InputSymbol, data); Console.WriteLine(p.Invoke(input)); StringRegion sdk = data.Slice(data.End - 3, data.End); Spec spec = ShouldConvert.Given(grammar).To(data, sdk); Learn(grammar, spec, new Substrings.RankingScore(grammar), new Substrings.WitnessFunctions(grammar)); TestTextTransformationBenchmark(grammar, "emails"); }