public static StringRegion SubStr(StringRegion v, Tuple<uint?, uint?> posPair) { uint? start = posPair.Item1, end = posPair.Item2; if (start == null || end == null || start < v.Start || start > v.End || end < v.Start || end > v.End) return null; return v.Slice((uint) start, (uint) end); }
/// <summary> /// Learns all region programs that satisfy the examples (advanced feature). /// Demonstrates access to the entire program set. /// </summary> private static void LearnAllRegionPrograms() { var input = StringRegion.Create("Carrie Dodson 100"); var positiveExamples = new[] { new ExtractionExample <StringRegion>(input, input.Slice(14, 17)) // "Carrie Dodson 100" => "Dodson" }; var negativeExamples = Enumerable.Empty <ExtractionExample <StringRegion> >(); ProgramSet allPrograms = Learner.Instance.LearnAllRegion(positiveExamples, negativeExamples); IEnumerable <ProgramNode> topKPrograms = allPrograms.TopK("Score", 3); // "Score" is the ranking feature var i = 0; StringRegion[] otherInputs = { input, StringRegion.Create("Leonard Robledo NA"), StringRegion.Create("Margaret Cook 320") }; foreach (var prog in topKPrograms) { Console.WriteLine("Program {0}:", ++i); foreach (var str in otherInputs) { State inputState = State.Create(Language.Grammar.InputSymbol, str); // Create Microsoft.ProgramSynthesis input state object r = prog.Invoke(inputState); // Invoke Microsoft.ProgramSynthesis program node on the input state Console.WriteLine(r != null ? (r as StringRegion).Value : "null"); } } }
public static uint?RegPos(StringRegion v, Tuple <RegularExpression, RegularExpression> rr, int k) { List <PositionMatch> ms = rr.Item1.Run(v).Where(m => rr.Item2.MatchesAt(v, m.Right)).ToList(); int index = k > 0 ? (k - 1) : (ms.Count + k); return(index < 0 || index >= ms.Count ? null : (uint?)ms[index].Right); }
/// <summary> /// Learns a program to extract a single region using another region that appears after it as reference (i.e., /// succeeding sibling region). /// Demonstrates how sibling referencing works. /// </summary> private static void LearnRegionReferencingSucceedingSibling() { var input = StringRegion.Create("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook 320"); StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) }; StringRegion[] numbers = { input.Slice(14, 17), input.Slice(34, 36), input.Slice(51, 54) }; // Suppose we want to extract the first name w.r.t the number var positiveExamples = new[] { new ExtractionExample <StringRegion>(numbers[0], records[0].Slice(0, 6)), // "Carrie" => "100" new ExtractionExample <StringRegion>(numbers[1], records[1].Slice(18, 25)) // "Leonard" => "75" }; var negativeExamples = Enumerable.Empty <ExtractionExample <StringRegion> >(); Program topRankedProg = Learner.Instance.LearnRegion(positiveExamples, negativeExamples); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } foreach (var r in topRankedProg.Run(numbers)) { var output = r.Output != null ? r.Output.Value : "null"; Console.WriteLine("\"{0}\" => \"{1}\"", r.Reference, output); } }
/// <summary> /// Learns top-ranked 3 region programs. /// Demonstrates access to lower-ranked programs. /// </summary> private static void LearnTop3RegionPrograms() { var session = new RegionSession(); StringRegion input = RegionSession.CreateStringRegion("Carrie Dodson 100"); session.AddConstraints(new RegionExample(input, input.Slice(14, 17))); // "Carrie Dodson 100" => "Dodson" IEnumerable <RegionProgram> topKPrograms = session.LearnTopK(3); var i = 0; StringRegion[] otherInputs = { input, RegionSession.CreateStringRegion("Leonard Robledo NA"), RegionSession.CreateStringRegion("Margaret Cook 320") }; foreach (RegionProgram prog in topKPrograms) { Console.WriteLine("Program {0}:", ++i); foreach (StringRegion str in otherInputs) { var r = prog.Run(str); Console.WriteLine(r != null ? r.Value : "null"); } } }
/// <summary> /// Learns a program to extract a region using positive examples and the matching regular expression. /// Demonstrates the possibility to give other constraint (regex) to Extraction.Text. /// This is an advanced feature. /// </summary> private static void LearnRegionWithRegexes() { var input = StringRegion.Create("Carrie Dodson 100\nLeonard Robledo NA\nMargaret Cook 320"); StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) }; // Suppose we want to extract the number out of a record var positiveExamples = new[] { new ExtractionExample <StringRegion>(records[0], records[0].Slice(14, 17)), // "Carrie Dodson 100" => "100" }; var negativeExamples = Enumerable.Empty <ExtractionExample <StringRegion> >(); Regex lookBehindRegex = new Regex("\\s"); Regex lookAheadRegex = null; Regex matchingRegex = new Regex("\\d+"); IEnumerable <Program> topRankedPrograms = Learner.Instance.LearnTopKRegion(positiveExamples, negativeExamples, null, 1, lookBehindRegex, matchingRegex, lookAheadRegex); Program topRankedProg = topRankedPrograms.FirstOrDefault(); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } foreach (var r in topRankedProg.Run(records)) { var output = r.Output != null ? r.Output.Value : "null"; Console.WriteLine("\"{0}\" => \"{1}\"", r.Reference, output); } }
/// <summary> /// Learns to serialize and deserialize Extraction.Text program. /// </summary> private static void SerializeProgram() { var session = new RegionSession(); StringRegion input = RegionSession.CreateStringRegion("Carrie Dodson 100"); session.AddConstraints(new RegionExample(input, input.Slice(7, 13))); // "Carrie Dodson 100" => "Dodson" RegionProgram topRankedProg = session.Learn(); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } string serializedProgram = topRankedProg.Serialize(); RegionProgram deserializedProgram = Loader.Instance.Region.Load(serializedProgram); StringRegion testInput = RegionSession.CreateStringRegion("Leonard Robledo 75"); // expect "Robledo" StringRegion output = deserializedProgram.Run(testInput); if (output == null) { Console.Error.WriteLine("Error: Extracting fails!"); return; } Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output); }
/// <summary> /// Learns all region programs that satisfy the examples (advanced feature). /// Demonstrates access to the entire program set. /// </summary> private static void LearnAllRegionPrograms() { var session = new RegionSession(); StringRegion input = RegionSession.CreateStringRegion("Carrie Dodson 100"); session.AddConstraints(new RegionExample(input, input.Slice(14, 17))); // "Carrie Dodson 100" => "Dodson" ProgramSet allPrograms = session.LearnAll().ProgramSet; IEnumerable <ProgramNode> topKPrograms = allPrograms.TopK(RegionLearner.Instance.ScoreFeature, 3); var i = 0; StringRegion[] otherInputs = { input, RegionSession.CreateStringRegion("Leonard Robledo NA"), RegionSession.CreateStringRegion("Margaret Cook 320") }; foreach (ProgramNode programNode in topKPrograms) { Console.WriteLine("Program {0}:", ++i); var program = new RegionProgram(programNode, ReferenceKind.Parent); foreach (StringRegion str in otherInputs) { StringRegion r = program.Run(str); Console.WriteLine(r == null ? "null" : r.Value); } } }
public static async Task <StructureExtractor> TrainExtractorAsync(IEnumerable <Tuple <string, uint, uint> > examples, IEnumerable <string> noneLabeledExamples = null) { if (null == examples || !examples.Any()) { throw new AggregateException($"{nameof(examples)} must not be null or empty"); } var regionSession = new RegionSession(); foreach (var example in examples) { var stringRegion = new StringRegion(example.Item1, Semantics.Tokens); var field = stringRegion.Slice(example.Item2, example.Item3); regionSession.AddConstraints(new RegionExample(stringRegion, field)); } if (noneLabeledExamples?.Any() == true) { regionSession.AddInputs(noneLabeledExamples.Select(e => new StringRegion(e, Semantics.Tokens))); } var program = await regionSession.LearnAsync(); if (null == program) { throw new Exception("No program found"); } return(new StructureExtractor(program)); }
/// <summary> /// Learns a program to extract a single region using two examples in two different files. /// Learning from different files is similar to learning with multiple examples from a single file. /// Demonstrates how to learn with examples from different files. /// </summary> private static void LearnRegionUsingMultipleFiles() { var session = new RegionSession(); StringRegion input1 = RegionSession.CreateStringRegion("Carrie Dodson 100"); StringRegion input2 = RegionSession.CreateStringRegion("Leonard Robledo 75"); session.AddConstraints( new RegionExample(input1, input1.Slice(7, 13)), // "Carrie Dodson 100" => "Dodson" new RegionExample(input2, input2.Slice(8, 15)) // "Leonard Robledo 75" => "Robledo" ); RegionProgram topRankedProg = session.Learn(); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } StringRegion testInput = RegionSession.CreateStringRegion("Margaret Cook 320"); // expect "Cook" StringRegion output = topRankedProg.Run(testInput); if (output == null) { Console.Error.WriteLine("Error: Extracting fails!"); return; } Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output); }
/// <summary> /// Learns a program to extract a region and provides other references to help find the intended program. /// Demonstrates the use of additional references. /// </summary> private static void LearnRegionWithAdditionalReferences() { var input = StringRegion.Create("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook ***"); StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) }; // Suppose we want to extract "100", "75", and "***". var positiveExamples = new[] { new ExtractionExample <StringRegion>(records[0], records[0].Slice(14, 17)) // "Carrie Dodson 100" => "100" }; var negativeExamples = Enumerable.Empty <ExtractionExample <StringRegion> >(); // Additional references help Extraction.Text observe the behavior of the learnt programs on unseen data. // In this example, if we do not use additional references, Extraction.Text may learn a program that extracts the first number. // On the contrary, if other references are present, it knows that this program is not applicable on the third record "Margaret Cook ***", // and promotes a more applicable program. Program topRankedProg = Learner.Instance.LearnRegion(positiveExamples, negativeExamples, records.Skip(1)); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } foreach (var r in topRankedProg.Run(records)) { var output = r.Output != null ? r.Output.Value : "null"; Console.WriteLine("\"{0}\" => \"{1}\"", r.Reference, output); } }
/// <summary> /// Learns a program to extract a single region from a file. /// </summary> private static void LearnRegion() { var session = new RegionSession(); StringRegion input = RegionSession.CreateStringRegion("Carrie Dodson 100"); // Only one example because we extract one region from one file. // Position specifies the location between two characters in the file. It starts at 0 (the beginning of the file). // An example is identified by a pair of start and end positions. session.AddConstraints(new RegionExample(input, input.Slice(7, 13))); // "Carrie Dodson 100" => "Dodson" RegionProgram topRankedProg = session.Learn(); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } StringRegion testInput = RegionSession.CreateStringRegion("Leonard Robledo 75"); // expect "Robledo" StringRegion output = topRankedProg.Run(testInput); if (output == null) { Console.Error.WriteLine("Error: Extracting fails!"); return; } Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output); }
/// <summary> /// Learns a program to extract a single region from a file. /// </summary> private static void LearnRegion() { var input = StringRegion.Create("Carrie Dodson 100"); // Only one example because we extract one region from one file. // Position specifies the location between two characters in the file. It starts at 0 (the beginning of the file). // An example is identified by a pair of start and end positions. var positiveExamples = new[] { new ExtractionExample <StringRegion>(input, input.Slice(7, 13)) // "Carrie Dodson 100" => "Dodson" }; var negativeExamples = Enumerable.Empty <ExtractionExample <StringRegion> >(); Program topRankedProg = Learner.Instance.LearnRegion(positiveExamples, negativeExamples); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } var testInput = StringRegion.Create("Leonard Robledo 75"); // expect "Robledo" IEnumerable <StringRegion> run = topRankedProg.Run(testInput); // Retrieve the first element because this is a region textProgram var output = run.FirstOrDefault(); if (output == null) { Console.Error.WriteLine("Error: Extracting fails!"); return; } Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output); }
/// <summary> /// Learns a program to extract a single region from a file. /// </summary> private static void LearnRegion() { var input = RegionLearner.CreateStringRegion("Carrie Dodson 100"); // Only one example because we extract one region from one file. // Position specifies the location between two characters in the file. It starts at 0 (the beginning of the file). // An example is identified by a pair of start and end positions. var examples = new[] { new CorrespondingMemberEquals <StringRegion, StringRegion>(input, input.Slice(7, 13)) // "Carrie Dodson 100" => "Dodson" }; RegionProgram topRankedProg = RegionLearner.Instance.Learn(examples); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } var testInput = RegionLearner.CreateStringRegion("Leonard Robledo 75"); // expect "Robledo" StringRegion output = topRankedProg.Run(testInput); if (output == null) { Console.Error.WriteLine("Error: Extracting fails!"); return; } Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output); }
/// <summary> /// Learns a program to extract a single region using two examples in two different files. /// Learning from different files is similar to learning with multiple examples from a single file. /// Demonstrates how to learn with examples from different files. /// </summary> private static void LearnRegionUsingMultipleFiles() { var input1 = RegionLearner.CreateStringRegion("Carrie Dodson 100"); var input2 = RegionLearner.CreateStringRegion("Leonard Robledo 75"); var examples = new[] { new CorrespondingMemberEquals <StringRegion, StringRegion>(input1, input1.Slice(7, 13)), // "Carrie Dodson 100" => "Dodson" new CorrespondingMemberEquals <StringRegion, StringRegion>(input2, input2.Slice(8, 15)) // "Leonard Robledo 75" => "Robledo" }; RegionProgram topRankedProg = RegionLearner.Instance.Learn(examples); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } var testInput = RegionLearner.CreateStringRegion("Margaret Cook 320"); // expect "Cook" StringRegion output = topRankedProg.Run(testInput); if (output == null) { Console.Error.WriteLine("Error: Extracting fails!"); return; } Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output); }
/// <summary> /// Learns to serialize and deserialize Extraction.Text program. /// </summary> private static void SerializeProgram() { var input = RegionLearner.CreateStringRegion("Carrie Dodson 100"); var examples = new[] { new CorrespondingMemberEquals <StringRegion, StringRegion>(input, input.Slice(7, 13)) // "Carrie Dodson 100" => "Dodson" }; RegionProgram topRankedProg = RegionLearner.Instance.Learn(examples); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } string serializedProgram = topRankedProg.Serialize(); RegionProgram deserializedProgram = Loader.Instance.Region.Load(serializedProgram); var testInput = RegionLearner.CreateStringRegion("Leonard Robledo 75"); // expect "Robledo" StringRegion output = deserializedProgram.Run(testInput); if (output == null) { Console.Error.WriteLine("Error: Extracting fails!"); return; } Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output); }
/// <summary> /// Learns a program to extract a region and provides other references to help find the intended program. /// Demonstrates the use of additional references. /// </summary> private static void LearnRegionWithAdditionalReferences() { var session = new RegionSession(); StringRegion input = RegionSession.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook ***"); StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) }; // Suppose we want to extract "100", "75", and "***". session.AddConstraints(new RegionExample(records[0], records[0].Slice(14, 17))); // "Carrie Dodson 100" => "100" // Additional references help Extraction.Text observe the behavior of the learnt programs on unseen data. // In this example, if we do not use additional references, Extraction.Text may learn a program that extracts the first number. // On the contrary, if other references are present, it knows that this program is not applicable on the third record "Margaret Cook ***", // and promotes a more applicable program. session.AddInputs(records.Skip(1)); RegionProgram topRankedProg = session.Learn(); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } foreach (StringRegion record in records) { string output = topRankedProg.Run(record)?.Value ?? "null"; Console.WriteLine("\"{0}\" => \"{1}\"", record, output); } }
/// <summary> /// Learns a program to extract a single region using another region that appears after it as reference (i.e., /// succeeding sibling region). /// Demonstrates how sibling referencing works. /// </summary> private static void LearnRegionReferencingSucceedingSibling() { var session = new RegionSession(); StringRegion input = RegionSession.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook 320"); StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) }; StringRegion[] numbers = { input.Slice(14, 17), input.Slice(34, 36), input.Slice(51, 54) }; // Suppose we want to extract the first name w.r.t the number session.AddConstraints( new RegionExample(numbers[0], records[0].Slice(0, 6)), // "Carrie" => "100" new RegionExample(numbers[1], records[1].Slice(18, 25)) // "Leonard" => "75" ); RegionProgram topRankedProg = session.Learn(); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } foreach (StringRegion number in numbers) { string output = topRankedProg.Run(number)?.Value ?? "null"; Console.WriteLine("\"{0}\" => \"{1}\"", number, output); } }
/// <summary> /// Learns a program to extract a sequence of regions using its preceding sibling as reference. /// </summary> private static void LearnSequence() { // It is advised to learn a sequence with at least 2 examples because generalizing a sequence from a single element is hard. // Also, we need to give positive examples continuously (i.e., we cannot skip any example). var input = StringRegion.Create("United States\nCarrie Dodson 100\nLeonard Robledo 75\nMargaret Cook 320\n" + "Canada\nConcetta Beck 350\nNicholas Sayers 90\nFrancis Terrill 2430\n" + "Great Britain\nNettie Pope 50\nMack Beeson 1070"); // Suppose we want to extract all last names from the input string. var positiveExamples = new[] { new ExtractionExample <StringRegion>(input, input.Slice(14, 20)), // input => "Carrie" new ExtractionExample <StringRegion>(input, input.Slice(32, 39)) // input => "Leonard" }; var negativeExamples = Enumerable.Empty <ExtractionExample <StringRegion> >(); Program topRankedProg = Learner.Instance.LearnSequence(positiveExamples, negativeExamples); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } foreach (var r in topRankedProg.Run(input)) { var output = r != null ? r.Value : "null"; Console.WriteLine(output); } }
/// <summary> /// Learns a program to extract a region with both positive and negative examples. /// Demonstrates the use of negative examples. /// </summary> private static void LearnRegionWithNegativeExamples() { var session = new RegionSession(); StringRegion input = RegionSession.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo NA\nMargaret Cook 320"); StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) }; // Suppose we want to extract "100", "320". session.AddConstraints( new RegionExample(records[0], records[0].Slice(14, 17)), // "Carrie Dodson 100" => "100" new RegionNegativeExample(records[1], records[1]) // no extraction in "Leonard Robledo NA" ); // Extraction.Text will find a program whose output does not OVERLAP with any of the negative examples. RegionProgram topRankedProg = session.Learn(); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } foreach (StringRegion record in records) { string output = topRankedProg.Run(record)?.Value ?? "null"; Console.WriteLine("\"{0}\" => \"{1}\"", record, output); } }
/// <summary> /// Learns a program to extract a sequence of regions from a file. /// </summary> private static void LearnSequenceReferencingSibling() { var input = StringRegion.Create("United States\nCarrie Dodson 100\nLeonard Robledo 75\nMargaret Cook 320\n" + "Canada\nConcetta Beck 350\nNicholas Sayers 90\nFrancis Terrill 2430\n" + "Great Britain\nNettie Pope 50\nMack Beeson 1070"); StringRegion[] countries = { input.Slice(0, 13), input.Slice(69, 75), input.Slice(134, 147) }; // Suppose we want to extract all last names from the input string. var positiveExamples = new[] { new ExtractionExample <StringRegion>(countries[0], input.Slice(14, 20)), // "United States" => "Carrie" new ExtractionExample <StringRegion>(countries[0], input.Slice(32, 39)), // "United States" => "Leonard" }; var negativeExamples = Enumerable.Empty <ExtractionExample <StringRegion> >(); Program topRankedProg = Learner.Instance.LearnSequence(positiveExamples, negativeExamples); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } foreach (var r in topRankedProg.Run(countries)) { var output = r.Output != null ? r.Output.Value : "null"; Console.WriteLine("\"{0}\" => \"{1}\"", r.Reference, output); } }
/// <summary> /// Learns a program to extract a region with both positive and negative examples. /// Demonstrates the use of negative examples. /// </summary> private static void LearnRegionWithNegativeExamples() { var input = StringRegion.Create("Carrie Dodson 100\nLeonard Robledo NA\nMargaret Cook 320"); StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) }; // Suppose we want to extract "100", "320". var positiveExamples = new[] { new ExtractionExample <StringRegion>(records[0], records[0].Slice(14, 17)) // "Carrie Dodson 100" => "100" }; var negativeExamples = new[] { new ExtractionExample <StringRegion>(records[1], records[1]) // no extraction in "Leonard Robledo NA" }; // Extraction.Text will find a program whose output does not OVERLAP with any of the negative examples. Program topRankedProg = Learner.Instance.LearnRegion(positiveExamples, negativeExamples); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } foreach (var r in topRankedProg.Run(records)) { var output = r.Output != null ? r.Output.Value : "null"; Console.WriteLine("\"{0}\" => \"{1}\"", r.Reference, output); } }
/// <summary> /// Learns a program to extract a single region using two examples in two different files. /// Learning from different files is similar to learning with multiple examples from a single file. /// Demonstrates how to learn with examples from different files. /// </summary> private static void LearnRegionUsingMultipleFiles() { var input1 = StringRegion.Create("Carrie Dodson 100"); var input2 = StringRegion.Create("Leonard Robledo 75"); var positiveExamples = new[] { new ExtractionExample <StringRegion>(input1, input1.Slice(7, 13)), // "Carrie Dodson 100" => "Dodson" new ExtractionExample <StringRegion>(input2, input2.Slice(8, 15)) // "Leonard Robledo 75" => "Robledo" }; var negativeExamples = Enumerable.Empty <ExtractionExample <StringRegion> >(); Program topRankedProg = Learner.Instance.LearnRegion(positiveExamples, negativeExamples); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } var testInput = StringRegion.Create("Margaret Cook 320"); // expect "Cook" IEnumerable <StringRegion> run = topRankedProg.Run(testInput); // Retrieve the first element because this is a region textProgram var output = run.FirstOrDefault(); if (output == null) { Console.Error.WriteLine("Error: Extracting fails!"); return; } Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output); }
/// <summary> /// Learns to serialize and deserialize Extraction.Text program. /// </summary> private static void SerializeProgram() { var input = StringRegion.Create("Carrie Dodson 100"); var positiveExamples = new[] { new ExtractionExample <StringRegion>(input, input.Slice(7, 13)) // "Carrie Dodson 100" => "Dodson" }; var negativeExamples = Enumerable.Empty <ExtractionExample <StringRegion> >(); Program topRankedProg = Learner.Instance.LearnRegion(positiveExamples, negativeExamples); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } string serializedProgram = topRankedProg.Serialize(); Program deserializedProgram = Program.Load(serializedProgram); var testInput = StringRegion.Create("Leonard Robledo 75"); // expect "Robledo" IEnumerable <StringRegion> run = deserializedProgram.Run(testInput); // Retrieve the first element because this is a region textProgram var output = run.FirstOrDefault(); if (output == null) { Console.Error.WriteLine("Error: Extracting fails!"); return; } Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output); }
public static uint?AbsPos(StringRegion v, int k) { if (Math.Abs(k) > v.Length + 1) { return(null); } return((uint)(k > 0 ? (v.Start + k - 1) : (v.End + k + 1))); }
public static StringRegion SubStr(StringRegion v, Tuple <uint?, uint?> posPair) { uint?start = posPair.Item1, end = posPair.Item2; if (start == null || end == null || start < v.Start || start > v.End || end < v.Start || end > v.End) { return(null); } return(v.Slice((uint)start, (uint)end)); }
/// <summary> /// Run the program on a given input /// </summary> /// <param name="input">The input</param> /// <returns></returns> public string Run(string input) { Grammar grammar = FlashFillGrammar.Grammar; State inputState = new FlashFillInput(input).AsState(); // Same as above without using the FlashFillInput class: inputState = State.Create(grammar.InputSymbol, new[] { StringRegion.Create(input) }); var result = (StringRegion)ProgramNode.Invoke(inputState); return(result == null ? null : result.Value); }
public static StringRegion SubStr(StringRegion v, Record <uint?, uint?>?posPair) { uint?start = posPair.Value.Item1; uint?end = posPair.Value.Item2; if (start == null || end == null || start < v.Start || start > v.End || end < v.Start || end > v.End) { return(null); } return(v.Slice((uint)start, (uint)end)); }
public static StringRegion RegionFromFile(string path) { if (fileCache.ContainsKey(path)) { return(fileCache[path]); } string text = Util.NormalizeBuildLogString(File.ReadAllText(path)); StringRegion region = RegionSession.CreateStringRegion(text); fileCache[path] = region; return(region); }
/// <summary> /// Analyze the file in <param name="inputPath"></param> using a program learned new from the currently present exampleset. /// </summary> /// <returns>The extraction result.</returns> public override AnalysisResult <string> Analyze(string inputPath, AnalysisResult <string> result, bool verbose) { ConsolePrinter consolePrinter = new ConsolePrinter(verbose); var inputRegion = AnalysisUtil.RegionFromFile(inputPath); result.TestInputLineCount = AnalysisUtil.BuildlogLineCount(inputPath); consolePrinter.WriteLine("Starting to learn program"); Stopwatch learningStopwatch = Stopwatch.StartNew(); RegionProgram topRankedProgram = session.Learn(); learningStopwatch.Stop(); consolePrinter.WriteLine("Learning took " + learningStopwatch.Elapsed); result.LearningDuration = learningStopwatch.Elapsed; result.TestInputPath = inputPath; if (topRankedProgram == null) { consolePrinter.WriteLine("no program found"); result.Successful = false; result.Output = "no program found"; return(result); } consolePrinter.WriteLine("Learned Program:"); consolePrinter.WriteLine(topRankedProgram); consolePrinter.WriteLine(""); result.LearnedProgram = topRankedProgram.ToString(); consolePrinter.WriteLine("Starting to apply program"); Stopwatch applyingStopwatch = Stopwatch.StartNew(); StringRegion output = topRankedProgram.Run(inputRegion); applyingStopwatch.Stop(); consolePrinter.WriteLine("Applying took " + applyingStopwatch.Elapsed); result.ApplicationDuration = applyingStopwatch.Elapsed; if (output == null) { result.Successful = false; result.Output = "no extraction found for this input"; return(result); } result.Successful = true; result.Output = output?.Value; return(result); }
public static IEnumerable<StringRegion> SplitLines(StringRegion document) { Token lineBreak = StringLearningCache.GetStaticTokenByName(Token.LineSeparatorName); CachedList lineBreakPositions; if (!document.Cache.TryGetMatchPositionsFor(lineBreak, out lineBreakPositions)) return new[] { document }; var lines = new List<StringRegion>(); for (int i = 0; i < lineBreakPositions.Count - 1; i++) { if (lineBreakPositions[i + 1].Length == 0) continue; lines.Add(document.Slice(lineBreakPositions[i].Right, lineBreakPositions[i + 1].Position)); } return lines; }
public static List <StringRegion> LoadBenchmark(string filename, out StringRegion document) { string content = File.ReadAllText(filename); Match[] examples = ExampleRegex.Matches(content).Cast <Match>().ToArray(); document = RegionLearner.CreateStringRegion(content.Replace("}", "").Replace("{", "")); var result = new List <StringRegion>(); for (int i = 0, shift = -1; i < examples.Length; i++, shift -= 2) { int start = shift + examples[i].Index; int end = start + examples[i].Length; result.Add(document.Slice((uint)start, (uint)end)); } return(result); }
public static uint? AbsPos(StringRegion v, int k) { if (Math.Abs(k) > v.Length + 1) return null; return (uint) (k > 0 ? (v.Start + k - 1) : (v.End + k + 1)); }
public static List<StringRegion> LoadBenchmark(string filename, out StringRegion document) { string content = File.ReadAllText(filename); Match[] examples = ExampleRegex.Matches(content).Cast<Match>().ToArray(); document = StringRegion.Create(content.Replace("}", "").Replace("{", "")); var result = new List<StringRegion>(); for (int i = 0, shift = -1; i < examples.Length; i++, shift -= 2) { int start = shift + examples[i].Index; int end = start + examples[i].Length; result.Add(document.Slice((uint) start, (uint) end)); } return result; }
public static uint? RegPos(StringRegion v, Tuple<RegularExpression, RegularExpression> rr, int k) { List<PositionMatch> ms = rr.Item1.Run(v).Where(m => rr.Item2.MatchesAt(v, m.Right)).ToList(); int index = k > 0 ? (k - 1) : (ms.Count + k); return index < 0 || index >= ms.Count ? null : (uint?) ms[index].Right; }