/// <summary> /// Learns a program to extract a region and provides other references to help find the intended program. /// Demonstrates the use of additional references. /// </summary> private static void LearnRegionWithAdditionalReferences() { var session = new RegionSession(); StringRegion input = RegionSession.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook ***"); StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) }; // Suppose we want to extract "100", "75", and "***". session.AddConstraints(new RegionExample(records[0], records[0].Slice(14, 17))); // "Carrie Dodson 100" => "100" // Additional references help Extraction.Text observe the behavior of the learnt programs on unseen data. // In this example, if we do not use additional references, Extraction.Text may learn a program that extracts the first number. // On the contrary, if other references are present, it knows that this program is not applicable on the third record "Margaret Cook ***", // and promotes a more applicable program. session.AddInputs(records.Skip(1)); RegionProgram topRankedProg = session.Learn(); if (topRankedProg == null) { Console.Error.WriteLine("Error: Learning fails!"); return; } foreach (StringRegion record in records) { string output = topRankedProg.Run(record)?.Value ?? "null"; Console.WriteLine("\"{0}\" => \"{1}\"", record, output); } }
public static async Task <StructureExtractor> TrainExtractorAsync(IEnumerable <Tuple <string, uint, uint> > examples, IEnumerable <string> noneLabeledExamples = null) { if (null == examples || !examples.Any()) { throw new AggregateException($"{nameof(examples)} must not be null or empty"); } var regionSession = new RegionSession(); foreach (var example in examples) { var stringRegion = new StringRegion(example.Item1, Semantics.Tokens); var field = stringRegion.Slice(example.Item2, example.Item3); regionSession.AddConstraints(new RegionExample(stringRegion, field)); } if (noneLabeledExamples?.Any() == true) { regionSession.AddInputs(noneLabeledExamples.Select(e => new StringRegion(e, Semantics.Tokens))); } var program = await regionSession.LearnAsync(); if (null == program) { throw new Exception("No program found"); } return(new StructureExtractor(program)); }