/// <summary> /// Analyze the file in <param name="inputPath"></param> using a program learned new from the currently present exampleset. /// </summary> /// <returns>The extraction result.</returns> public override AnalysisResult <string> Analyze(string inputPath, AnalysisResult <string> result, bool verbose) { ConsolePrinter consolePrinter = new ConsolePrinter(verbose); var inputRegion = AnalysisUtil.RegionFromFile(inputPath); result.TestInputLineCount = AnalysisUtil.BuildlogLineCount(inputPath); consolePrinter.WriteLine("Starting to learn program"); Stopwatch learningStopwatch = Stopwatch.StartNew(); RegionProgram topRankedProgram = session.Learn(); learningStopwatch.Stop(); consolePrinter.WriteLine("Learning took " + learningStopwatch.Elapsed); result.LearningDuration = learningStopwatch.Elapsed; result.TestInputPath = inputPath; if (topRankedProgram == null) { consolePrinter.WriteLine("no program found"); result.Successful = false; result.Output = "no program found"; return(result); } consolePrinter.WriteLine("Learned Program:"); consolePrinter.WriteLine(topRankedProgram); consolePrinter.WriteLine(""); result.LearnedProgram = topRankedProgram.ToString(); consolePrinter.WriteLine("Starting to apply program"); Stopwatch applyingStopwatch = Stopwatch.StartNew(); StringRegion output = topRankedProgram.Run(inputRegion); applyingStopwatch.Stop(); consolePrinter.WriteLine("Applying took " + applyingStopwatch.Elapsed); result.ApplicationDuration = applyingStopwatch.Elapsed; if (output == null) { result.Successful = false; result.Output = "no extraction found for this input"; return(result); } result.Successful = true; result.Output = output?.Value; return(result); }
/// <summary> /// Learns a program to extract a single region from a file. /// </summary> private static void LearnRegion() { var session = new RegionSession(); StringRegion input = RegionSession.CreateStringRegion("Carrie Dodson 100"); // Only one example because we extract one region from one file. // Position specifies the location between two characters in the file. It starts at 0 (the beginning of the file). // An example is identified by a pair of start and end positions. session.AddConstraints(new RegionExample(input, input.Slice(7, 13))); // "Carrie Dodson 100" => "Dodson" RegionProgram topRankedProg = session.Learn(); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } StringRegion testInput = RegionSession.CreateStringRegion("Leonard Robledo 75"); // expect "Robledo" StringRegion output = topRankedProg.Run(testInput); if (output == null) { Console.Error.WriteLine("Error: Extracting fails!"); return; } Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output); }
/// <summary> /// Learns to serialize and deserialize Extraction.Text program. /// </summary> private static void SerializeProgram() { var session = new RegionSession(); StringRegion input = RegionSession.CreateStringRegion("Carrie Dodson 100"); session.AddConstraints(new RegionExample(input, input.Slice(7, 13))); // "Carrie Dodson 100" => "Dodson" RegionProgram topRankedProg = session.Learn(); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } string serializedProgram = topRankedProg.Serialize(); RegionProgram deserializedProgram = Loader.Instance.Region.Load(serializedProgram); StringRegion testInput = RegionSession.CreateStringRegion("Leonard Robledo 75"); // expect "Robledo" StringRegion output = deserializedProgram.Run(testInput); if (output == null) { Console.Error.WriteLine("Error: Extracting fails!"); return; } Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output); }
/// <summary> /// Learns a program to extract a single region using another region that appears after it as reference (i.e., /// succeeding sibling region). /// Demonstrates how sibling referencing works. /// </summary> private static void LearnRegionReferencingSucceedingSibling() { var session = new RegionSession(); StringRegion input = RegionSession.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook 320"); StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) }; StringRegion[] numbers = { input.Slice(14, 17), input.Slice(34, 36), input.Slice(51, 54) }; // Suppose we want to extract the first name w.r.t the number session.AddConstraints( new RegionExample(numbers[0], records[0].Slice(0, 6)), // "Carrie" => "100" new RegionExample(numbers[1], records[1].Slice(18, 25)) // "Leonard" => "75" ); RegionProgram topRankedProg = session.Learn(); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } foreach (StringRegion number in numbers) { string output = topRankedProg.Run(number)?.Value ?? "null"; Console.WriteLine("\"{0}\" => \"{1}\"", number, output); } }
/// <summary> /// Learns a program to extract a region and provides other references to help find the intended program. /// Demonstrates the use of additional references. /// </summary> private static void LearnRegionWithAdditionalReferences() { var session = new RegionSession(); StringRegion input = RegionSession.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook ***"); StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) }; // Suppose we want to extract "100", "75", and "***". session.AddConstraints(new RegionExample(records[0], records[0].Slice(14, 17))); // "Carrie Dodson 100" => "100" // Additional references help Extraction.Text observe the behavior of the learnt programs on unseen data. // In this example, if we do not use additional references, Extraction.Text may learn a program that extracts the first number. // On the contrary, if other references are present, it knows that this program is not applicable on the third record "Margaret Cook ***", // and promotes a more applicable program. session.AddInputs(records.Skip(1)); RegionProgram topRankedProg = session.Learn(); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } foreach (StringRegion record in records) { string output = topRankedProg.Run(record)?.Value ?? "null"; Console.WriteLine("\"{0}\" => \"{1}\"", record, output); } }
/// <summary> /// Learns a program to extract a region with both positive and negative examples. /// Demonstrates the use of negative examples. /// </summary> private static void LearnRegionWithNegativeExamples() { var session = new RegionSession(); StringRegion input = RegionSession.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo NA\nMargaret Cook 320"); StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) }; // Suppose we want to extract "100", "320". session.AddConstraints( new RegionExample(records[0], records[0].Slice(14, 17)), // "Carrie Dodson 100" => "100" new RegionNegativeExample(records[1], records[1]) // no extraction in "Leonard Robledo NA" ); // Extraction.Text will find a program whose output does not OVERLAP with any of the negative examples. RegionProgram topRankedProg = session.Learn(); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } foreach (StringRegion record in records) { string output = topRankedProg.Run(record)?.Value ?? "null"; Console.WriteLine("\"{0}\" => \"{1}\"", record, output); } }
/// <summary> /// Learns a program to extract a single region using two examples in two different files. /// Learning from different files is similar to learning with multiple examples from a single file. /// Demonstrates how to learn with examples from different files. /// </summary> private static void LearnRegionUsingMultipleFiles() { var session = new RegionSession(); StringRegion input1 = RegionSession.CreateStringRegion("Carrie Dodson 100"); StringRegion input2 = RegionSession.CreateStringRegion("Leonard Robledo 75"); session.AddConstraints( new RegionExample(input1, input1.Slice(7, 13)), // "Carrie Dodson 100" => "Dodson" new RegionExample(input2, input2.Slice(8, 15)) // "Leonard Robledo 75" => "Robledo" ); RegionProgram topRankedProg = session.Learn(); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } StringRegion testInput = RegionSession.CreateStringRegion("Margaret Cook 320"); // expect "Cook" StringRegion output = topRankedProg.Run(testInput); if (output == null) { Console.Error.WriteLine("Error: Extracting fails!"); return; } Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output); }
public override void Run() { if (!Directory.Exists(ReportDirPath)) { Directory.CreateDirectory(ReportDirPath); } Console.WriteLine($"Learning Extraction.Text region program for {SubBenchmarkName}"); Success = false; while (!Success && TryGetNextExample(out RegionExample nextConstraint)) { var session = new RegionSession(); UsedExamples.Add(nextConstraint); session.Constraints.Add(UsedExamples); Program = session.Learn(); Success = Program != null && AllExamples.All(e => Valid(e, Program)); } RecordResult(); }
public string LearnSingle(List <TextExtractExample> textExtractExamples) { var session = new RegionSession(); var regionExamples = new List <RegionExample>(); foreach (var textExtractExample in textExtractExamples) { var inputRegion = RegionSession.CreateStringRegion(textExtractExample.text); var textExtractSelection = textExtractExample.selections.First(); // at most only one example is added per string region if (textExtractSelection != null) { var exampleRegion = inputRegion.Slice((uint)textExtractSelection.startPos, (uint)textExtractSelection.endPos); var regionExample = new RegionExample(inputRegion, exampleRegion); regionExamples.Add(regionExample); } } session.AddConstraints(regionExamples); var program = session.Learn(); return(program.Serialize()); }