/// <summary>
        ///     Learns a program to extract a single region using two examples in two different files.
        ///     Learning from different files is similar to learning with multiple examples from a single file.
        ///     Demonstrates how to learn with examples from different files.
        /// </summary>
        private static void LearnRegionUsingMultipleFiles()
        {
            var          session = new RegionSession();
            StringRegion input1  = RegionSession.CreateStringRegion("Carrie Dodson 100");
            StringRegion input2  = RegionSession.CreateStringRegion("Leonard Robledo 75");

            session.AddConstraints(
                new RegionExample(input1, input1.Slice(7, 13)), // "Carrie Dodson 100" => "Dodson"
                new RegionExample(input2, input2.Slice(8, 15))  // "Leonard Robledo 75" => "Robledo"
                );

            RegionProgram topRankedProg = session.Learn();

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            StringRegion testInput = RegionSession.CreateStringRegion("Margaret Cook 320"); // expect "Cook"
            StringRegion output    = topRankedProg.Run(testInput);

            if (output == null)
            {
                Console.Error.WriteLine("Error: Extracting fails!");
                return;
            }
            Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output);
        }
        /// <summary>
        ///     Learns a program to extract a region with both positive and negative examples.
        ///     Demonstrates the use of negative examples.
        /// </summary>
        private static void LearnRegionWithNegativeExamples()
        {
            var          session = new RegionSession();
            StringRegion input   =
                RegionSession.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo NA\nMargaret Cook 320");

            StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };

            // Suppose we want to extract "100", "320".
            session.AddConstraints(
                new RegionExample(records[0], records[0].Slice(14, 17)), // "Carrie Dodson 100" => "100"
                new RegionNegativeExample(records[1], records[1])        // no extraction in "Leonard Robledo NA"
                );

            // Extraction.Text will find a program whose output does not OVERLAP with any of the negative examples.
            RegionProgram topRankedProg = session.Learn();

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            foreach (StringRegion record in records)
            {
                string output = topRankedProg.Run(record)?.Value ?? "null";
                Console.WriteLine("\"{0}\" => \"{1}\"", record, output);
            }
        }
        /// <summary>
        ///     Learns all region programs that satisfy the examples (advanced feature).
        ///     Demonstrates access to the entire program set.
        /// </summary>
        private static void LearnAllRegionPrograms()
        {
            var          session = new RegionSession();
            StringRegion input   = RegionSession.CreateStringRegion("Carrie Dodson 100");

            session.AddConstraints(new RegionExample(input, input.Slice(14, 17))); // "Carrie Dodson 100" => "Dodson"

            ProgramSet allPrograms = session.LearnAll().ProgramSet;
            IEnumerable <ProgramNode> topKPrograms = allPrograms.TopK(RegionLearner.Instance.ScoreFeature, 3);

            var i = 0;

            StringRegion[] otherInputs =
            {
                input, RegionSession.CreateStringRegion("Leonard Robledo NA"),
                RegionSession.CreateStringRegion("Margaret Cook 320")
            };
            foreach (ProgramNode programNode in topKPrograms)
            {
                Console.WriteLine("Program {0}:", ++i);
                var program = new RegionProgram(programNode, ReferenceKind.Parent);
                foreach (StringRegion str in otherInputs)
                {
                    StringRegion r = program.Run(str);
                    Console.WriteLine(r == null ? "null" : r.Value);
                }
            }
        }
        /// <summary>
        ///     Learns top-ranked 3 region programs.
        ///     Demonstrates access to lower-ranked programs.
        /// </summary>
        private static void LearnTop3RegionPrograms()
        {
            var          session = new RegionSession();
            StringRegion input   = RegionSession.CreateStringRegion("Carrie Dodson 100");

            session.AddConstraints(new RegionExample(input, input.Slice(14, 17))); // "Carrie Dodson 100" => "Dodson"

            IEnumerable <RegionProgram> topKPrograms = session.LearnTopK(3);

            var i = 0;

            StringRegion[] otherInputs =
            {
                input, RegionSession.CreateStringRegion("Leonard Robledo NA"),
                RegionSession.CreateStringRegion("Margaret Cook 320")
            };
            foreach (RegionProgram prog in topKPrograms)
            {
                Console.WriteLine("Program {0}:", ++i);
                foreach (StringRegion str in otherInputs)
                {
                    var r = prog.Run(str);
                    Console.WriteLine(r != null ? r.Value : "null");
                }
            }
        }
        /// <summary>
        ///     Learns to serialize and deserialize Extraction.Text program.
        /// </summary>
        private static void SerializeProgram()
        {
            var          session = new RegionSession();
            StringRegion input   = RegionSession.CreateStringRegion("Carrie Dodson 100");

            session.AddConstraints(new RegionExample(input, input.Slice(7, 13))); // "Carrie Dodson 100" => "Dodson"

            RegionProgram topRankedProg = session.Learn();

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            string        serializedProgram   = topRankedProg.Serialize();
            RegionProgram deserializedProgram = Loader.Instance.Region.Load(serializedProgram);
            StringRegion  testInput           = RegionSession.CreateStringRegion("Leonard Robledo 75"); // expect "Robledo"
            StringRegion  output = deserializedProgram.Run(testInput);

            if (output == null)
            {
                Console.Error.WriteLine("Error: Extracting fails!");
                return;
            }
            Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output);
        }
        /// <summary>
        ///     Learns a program to extract a single region from a file.
        /// </summary>
        private static void LearnRegion()
        {
            var          session = new RegionSession();
            StringRegion input   = RegionSession.CreateStringRegion("Carrie Dodson 100");

            // Only one example because we extract one region from one file.
            // Position specifies the location between two characters in the file. It starts at 0 (the beginning of the file).
            // An example is identified by a pair of start and end positions.
            session.AddConstraints(new RegionExample(input, input.Slice(7, 13))); // "Carrie Dodson 100" => "Dodson"

            RegionProgram topRankedProg = session.Learn();

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            StringRegion testInput = RegionSession.CreateStringRegion("Leonard Robledo 75"); // expect "Robledo"
            StringRegion output    = topRankedProg.Run(testInput);

            if (output == null)
            {
                Console.Error.WriteLine("Error: Extracting fails!");
                return;
            }
            Console.WriteLine("\"{0}\" => \"{1}\"", testInput, output);
        }
        public static async Task <StructureExtractor> TrainExtractorAsync(IEnumerable <Tuple <string, uint, uint> > examples, IEnumerable <string> noneLabeledExamples = null)
        {
            if (null == examples || !examples.Any())
            {
                throw new AggregateException($"{nameof(examples)} must not be null or empty");
            }

            var regionSession = new RegionSession();

            foreach (var example in examples)
            {
                var stringRegion = new StringRegion(example.Item1, Semantics.Tokens);
                var field        = stringRegion.Slice(example.Item2, example.Item3);
                regionSession.AddConstraints(new RegionExample(stringRegion, field));
            }

            if (noneLabeledExamples?.Any() == true)
            {
                regionSession.AddInputs(noneLabeledExamples.Select(e => new StringRegion(e, Semantics.Tokens)));
            }


            var program = await regionSession.LearnAsync();

            if (null == program)
            {
                throw new Exception("No program found");
            }

            return(new StructureExtractor(program));
        }
        /// <summary>
        ///     Learns a program to extract a single region using another region that appears after it as reference (i.e.,
        ///     succeeding sibling region).
        ///     Demonstrates how sibling referencing works.
        /// </summary>
        private static void LearnRegionReferencingSucceedingSibling()
        {
            var          session = new RegionSession();
            StringRegion input   =
                RegionSession.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook 320");

            StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };
            StringRegion[] numbers = { input.Slice(14, 17), input.Slice(34, 36), input.Slice(51, 54) };

            // Suppose we want to extract the first name w.r.t the number
            session.AddConstraints(
                new RegionExample(numbers[0], records[0].Slice(0, 6)),  // "Carrie" => "100"
                new RegionExample(numbers[1], records[1].Slice(18, 25)) // "Leonard" => "75"
                );

            RegionProgram topRankedProg = session.Learn();

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            foreach (StringRegion number in numbers)
            {
                string output = topRankedProg.Run(number)?.Value ?? "null";
                Console.WriteLine("\"{0}\" => \"{1}\"", number, output);
            }
        }
        /// <summary>
        ///     Learns a program to extract a region and provides other references to help find the intended program.
        ///     Demonstrates the use of additional references.
        /// </summary>
        private static void LearnRegionWithAdditionalReferences()
        {
            var          session = new RegionSession();
            StringRegion input   =
                RegionSession.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo 75\nMargaret Cook ***");

            StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };

            // Suppose we want to extract "100", "75", and "***".
            session.AddConstraints(new RegionExample(records[0], records[0].Slice(14, 17)));
            // "Carrie Dodson 100" => "100"

            // Additional references help Extraction.Text observe the behavior of the learnt programs on unseen data.
            // In this example, if we do not use additional references, Extraction.Text may learn a program that extracts the first number.
            // On the contrary, if other references are present, it knows that this program is not applicable on the third record "Margaret Cook ***",
            // and promotes a more applicable program.
            session.AddInputs(records.Skip(1));

            RegionProgram topRankedProg = session.Learn();

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            foreach (StringRegion record in records)
            {
                string output = topRankedProg.Run(record)?.Value ?? "null";
                Console.WriteLine("\"{0}\" => \"{1}\"", record, output);
            }
        }
        public string RunSingle(string programAsString, string input)
        {
            var program     = Loader.Instance.Region.Load(programAsString);
            var inputRegion = RegionSession.CreateStringRegion(input);

            var result = program.Run(inputRegion);

            return(result.ToString());
        }
示例#11
0
        public static StringRegion RegionFromFile(string path)
        {
            if (fileCache.ContainsKey(path))
            {
                return(fileCache[path]);
            }
            string       text   = Util.NormalizeBuildLogString(File.ReadAllText(path));
            StringRegion region = RegionSession.CreateStringRegion(text);

            fileCache[path] = region;
            return(region);
        }
示例#12
0
        public static List <StringRegion> LoadBenchmark(string filename, out StringRegion document)
        {
            string content = File.ReadAllText(filename);

            Match[] examples = ExampleRegex.Matches(content).Cast <Match>().ToArray();
            document = RegionSession.CreateStringRegion(content.Replace("}", "").Replace("{", ""));
            var result = new List <StringRegion>();

            for (int i = 0, shift = -1; i < examples.Length; i++, shift -= 2)
            {
                int start = shift + examples[i].Index;
                int end   = start + examples[i].Length;
                result.Add(document.Slice((uint)start, (uint)end));
            }
            return(result);
        }
示例#13
0
            public override void Run()
            {
                if (!Directory.Exists(ReportDirPath))
                {
                    Directory.CreateDirectory(ReportDirPath);
                }
                Console.WriteLine($"Learning Extraction.Text region program for {SubBenchmarkName}");

                Success = false;
                while (!Success && TryGetNextExample(out RegionExample nextConstraint))
                {
                    var session = new RegionSession();
                    UsedExamples.Add(nextConstraint);
                    session.Constraints.Add(UsedExamples);
                    Program = session.Learn();
                    Success = Program != null && AllExamples.All(e => Valid(e, Program));
                }
                RecordResult();
            }
示例#14
0
        /// <summary>
        ///     Learns a program to extract a region using positive examples and the matching regular expression.
        ///     Demonstrates the possibility to give other constraint (regex) to Extraction.Text.
        ///     This is an advanced feature.
        /// </summary>
        private static void LearnRegionWithRegexes()
        {
            StringRegion input =
                RegionSession.CreateStringRegion("Carrie Dodson 100\nLeonard Robledo NA\nMargaret Cook 320");

            StringRegion[] records = { input.Slice(0, 17), input.Slice(18, 36), input.Slice(37, 54) };

            // Suppose we want to extract the number out of a record
            var examples = new[]
            {
                new RegionExample(records[0], records[0].Slice(14, 17)), // "Carrie Dodson 100" => "100"
            };

            Regex lookBehindRegex = new Regex("\\s");
            Regex lookAheadRegex  = null;
            Regex matchingRegex   = new Regex("\\d+");

            IEnumerable <RegionProgram> topRankedPrograms = RegionLearner.Instance.LearnTopK(examples,
                                                                                             RegionLearner.Instance.ScoreFeature,
                                                                                             1,
                                                                                             null,
                                                                                             default(ProgramSamplingStrategy),
                                                                                             null,
                                                                                             lookBehindRegex,
                                                                                             matchingRegex,
                                                                                             lookAheadRegex).TopPrograms;

            RegionProgram topRankedProg = topRankedPrograms.FirstOrDefault();

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            foreach (StringRegion record in records)
            {
                string output = topRankedProg.Run(record)?.Value ?? "null";
                Console.WriteLine("\"{0}\" => \"{1}\"", record, output);
            }
        }
        public string LearnSingle(List <TextExtractExample> textExtractExamples)
        {
            var session        = new RegionSession();
            var regionExamples = new List <RegionExample>();

            foreach (var textExtractExample in textExtractExamples)
            {
                var inputRegion          = RegionSession.CreateStringRegion(textExtractExample.text);
                var textExtractSelection = textExtractExample.selections.First(); // at most only one example is added per string region
                if (textExtractSelection != null)
                {
                    var exampleRegion = inputRegion.Slice((uint)textExtractSelection.startPos, (uint)textExtractSelection.endPos);
                    var regionExample = new RegionExample(inputRegion, exampleRegion);
                    regionExamples.Add(regionExample);
                }
            }

            session.AddConstraints(regionExamples);
            var program = session.Learn();

            return(program.Serialize());
        }
示例#16
0
        /// <summary>
        ///     Learns a program to extract a multiple regions using two examples in two different files.
        ///     Learning multiple regions is similar to learning single regions, it's just doing same task again for different regions.
        /// </summary>
        private static void LearnMultipleRegionsUsingMultipleFiles(List <string> paths, List <Dictionary <string, string> > regionsToLearn)
        {
            List <StringRegion> inputs = new List <StringRegion>();

            for (int i = 0; i < paths.Count; i++)
            {
                string s = File.ReadAllText(paths[i]);
                inputs.Add(RegionSession.CreateStringRegion(s));
            }

            int trainingDocumentCount = 2;

            List <string> fieldsToLearn = regionsToLearn[0].Keys.ToList();
            Dictionary <string, RegionSession> sessionPerField = new Dictionary <string, RegionSession>();

            foreach (string field in fieldsToLearn)
            {
                RegionSession session = new RegionSession();
                for (int i = 0; i < trainingDocumentCount; i++)
                {
                    string output = regionsToLearn[i][field];
                    uint   start  = inputs[i].IndexOfRelative(output).Value;
                    uint   end    = (uint)(start + output.Length);

                    RegionExample example = new RegionExample(inputs[i], inputs[i].Slice(start, end));
                    session.AddConstraints(example);
                }
                sessionPerField.Add(field, session);
            }
            Dictionary <string, RegionProgram> programPerField = new Dictionary <string, RegionProgram>();

            foreach (var fieldSessionPair in sessionPerField)
            {
                RegionProgram program = fieldSessionPair.Value.Learn();
                if (program == null)
                {
                    Console.Error.WriteLine("Error: Learning fails for Field : " + fieldSessionPair.Key);
                }
                else
                {
                    programPerField.Add(fieldSessionPair.Key, program);
                }
            }

            //testing

            StreamWriter outputWriter = new StreamWriter(@"..\..\output.txt");

            outputWriter.WriteLine(string.Join("\t|\t", programPerField.Keys));
            for (int i = trainingDocumentCount; i < inputs.Count; i++)
            {
                List <string> values = new List <string>();
                foreach (var fieldProgramPair in programPerField)
                {
                    string value = fieldProgramPair.Value.Run(inputs[i])?.Value;
                    values.Add(value);
                }
                outputWriter.WriteLine(string.Join("\t|\t\t", values));
            }
            outputWriter.Flush();
            outputWriter.Close();
        }
        public static void extract(List <string> inputlistString)
        {
            RegionSession RegionSession = new RegionSession();
            string        regexQuantity = @"[\d*(\.|\,)?\d*]+\s?(tons|mts|mt|ton|kg|kgs|dwt|mtons)";

            string [] records = { "- QUANTITY : 25,000 MTS (BAGS 25KGS)", //0
                                  "15,000 mts +/- 10 % CAN (Axan)",       //1
                                  "16,000 mts +/- 10 % CAN-27",           //2
                                  "16,000 mts CAN-27",                    //3
                                  "5,100 mts min/max Nitrabor",           //4
                                  "5,100 mts Nitrabor",                   //5
                                  "6.5 tons",                             //6
                                  "7 tons",                               //7
                                  "- Qty : 15,000MT (+/-5% MOLCO)",       //8
                                  "7.5 tons",
                                  "   6,600 mts +/- 10 % moloo NPK (19-04-19)",
                                  "   6,600 mts NPK",
                                  "   7,000 mts CAN (Axan)",
                                  "   8,000 mts CAN (Axan)",
                                  "  5000  TONS OPTION 4000 OPTION  2000 TONS DJIBOUTI",
                                  "  9000 TONS MOMBASA",
                                  "(2000mt for kwangyang, 3000mt for ulsan)",
                                  "- 2500 TONS LOAD",

                                  "- Quantity: 20,00mt - 2 pct moloo calcined petcoke",
                                  "- Q’ty: 2 lots x 41,000MT +/- 10% MOLOO",
                                  "- Unit Weight 30.5 Tons.",
                                  "-Q’TTY : ABT. 4,000 – 5,000MT",
                                  "10 m - UW 22 tons",
                                  "150 x 100 x   20 cm  = uw  150 kg  (x5)",
                                  "18.5 tons",
                                  "19 tons",
                                  "20,000 - 35,000 DWT .",
                                  "25.000 tons metcoke",
                                  "3.500 TONS WOODPULP",
                                  "30.000tons",
                                  "33,500 MT ON 10.65M SSW",
                                  "4.4 tons",
                                  "40-50,000 dwt",
                                  "40.000tons",
                                  "41,000 MTONS",
                                  "44/4570 mts profiles",
                                  "4500/5K TONS CORN",
                                  "5 HO / HA / TPC 44.5 MT",
                                  "5 tons",
                                  "50.000tons",
                                  "5000mtons wric",
                                  "570 x 130 x 130 cm =  3 mt",
                                  "5K TONS CORN",
                                  "6 tons",
                                  "7000MTS, (basis 4cr x 30mts  min) / 12000MTS.",
                                  "as p/c fr grd 25ts tng",
                                  "Cargo quantity : 10.000 tons.",
                                  "CARGO&amp;QTTY: ABT50,000MT COAL IN BULK 10PCT MOLCO",
                                  "DWCC: 2000-7000 ts",
                                  "grd sid min 4 x 25 ts cranes",
                                  "Max 20 years / min 25mt cranes",
                                  "Max. 28 M.Tons.",
                                  "No. 1     5,00 m    x    4,05 m    x    3,95 m  =     8.645 kg",
                                  "No. 2     6,30 m    x    4,70 m    x    3,16 m   =   11.995 kg",
                                  "one monthly shipment of 40.000 mt 5%moloo",
                                  "penang(4k mt) +port kelang (24k mt) to luoyuan",
                                  "QTY",
                                  "Qty :20000 MT",
                                  "QTY        : 42000 MTS +- 10% MOL CHARTER OPTION",
                                  "Qty (mt)",
                                  "qty - 30,000 mt +/- 2% moloo metalurgical calcined alumina powder in blk",
                                  "Qty 20 – max 40k Rockphosphate (as sole or part cargo)",
                                  "Qty 30K min max - BF Coke , (SF is 65 wog)",
                                  "QTY 5,500 CBM 10% MOLCO (NEED HOLD CAPA MIN 14,000 CBM)",
                                  "QTY :  20,000 MT +/- 10%",
                                  "QTY :  60,000 MT +/- 5%",
                                  "QTY :  MIN 27,000 MT - MAX 30,000 MT",
                                  "qty : 30000 +- 10% molco",
                                  "Qty : 50,000 MT +/- 10%",
                                  "Qty : 55000 mt +\\-10",
                                  "Qty above 25k - 40k for 2 port discharge Hazira + Kandla",
                                  "Qty break up - 25,000 mt at Pipavav and 30,000 mt at Porbandar",
                                  "Qty is 6,000GMT & needs 23,000CBM & above!",
                                  "Qty is 7,000GMT (est. 5,200 to 5,300DBMT) & needs 25,000CBM & above!",
                                  "Qty upto 25k for 1 port discharge Hazira",
                                  "Qty: 10,000BDMT +/-10% MOLOO (S/f: 5.4)",
                                  "Qty: 20000MT",
                                  "Qty: 26000  TO 33000  TONS at owners option",
                                  "Qty: 30 kt and 15kt ±10% Moloo",
                                  "Qty: 35,000mt",
                                  "Qty: 35,000mt=0A=",
                                  "QTY: 40,000-45000 MT LIMESTONE",
                                  "qty: 9,000 mt",
                                  "Qty: Firm Quantity-1X (45,000-60,000) MT MOLOO",
                                  "Qty:35,000mt",
                                  "Qty:35,000mt=0A=",
                                  "QTYS : 25,000 ~ 30,000 MT / SHIPMENT (IN BULK OR 1,000KG BIG BAG)",
                                  "Quantity",
                                  "Quantity : 7,000mt – 8,000mt (inchopt)",
                                  "Quantity :50-55,000MT in Bulk ( need geared vsl )",
                                  "quantity : 25000 mt (+/- 5% charter option ) , other details ",
                                  "QUANTITY : 30 – 40,000 M/TONS (ABOUT) TO BE FINALIZED",
                                  "quantity       : 25,000 mt +/-10% moloo sand",
                                  "Quantity   : 10,000 OR 15,000 Mts",
                                  "QUANTITY   : 25000 MT (+/- 5% Charter Option ) , OTHER DETAILS INCLUDING",
                                  "Quantity  30,000 mt +/-5%",
                                  "Quantity (MT)",
                                  "Quantity - 10,000mt +/-10% MOLOO",
                                  "Quantity - 100,000mt +/-10% MOLOO",
                                  "Quantity - 17,000 BDMT+/-10% MOLOO",
                                  "Quantity - 45,000mt +/-10% MOLOO",
                                  "Quantity - 5,000 MT +/-10% MOLOO",
                                  "Quantity - 55,000mt +/-10% MOLOO",
                                  "Quantity 12000 mt +/- 10 Pct choption",
                                  "quantity : (250.000 )mt .",
                                  "Quantity : 10,000 mt, Or 11,000 mt, Or 15,000 Mts, All in Choptn",
                                  "Quantity : 15000mt+/-5% molcopt,  Steel scrap for melting in bulk, SF 2.0 abt wog.",
                                  "Quantity : 200,000 Tons",
                                  "Quantity : 200,000 Tons of Aggregate",
                                  "quantity : 40000 +/-10% rock phosphate",
                                  "Quantity : 7000-8,000mt 2% MOLCO",
                                  "Quantity : About 10,000 mt, Or 11,000 mt, Or 15,000 Mts, All in Choptn ",
                                  "Quantity : About 11,000 mt, Or 15,000 Mts, in Choptn",
                                  "QUANTITY : LOADABLE QTY 50,000 MT + 10%  (supramax or ultramax only)",
                                  "QUANTITY AS DETERMINED BY THE DRAFT SURVEY WITH THE FLWG REMARKS ONLY:",
                                  "QUANTITY INCREASED",
                                  "Quantity is 42\'500 - 52\'000 MT in Charterer\'s option.",
                                  "quantity requested, if greater.",
                                  "QUANTITY TO BE LOADED AT STOCKTON AND LB IS CHOPT.",
                                  "Quantity:",
                                  "Quantity:                     40,000 MT +/- 10%  Moloo",
                                  "Quantity:                     50,000 MT +/- 10%  Moloo",
                                  "Quantity:     abt 27,000mt Bulk Harmless Fertilizers",
                                  "Quantity:     abt 28,000mt Bulk Harmless Fertilizers.",
                                  "QUANTITY: 1,000 MTS , 5 MTS / COIL",
                                  "Quantity: 1,641 pipes (97 bends) / 5,050 mt / 5,522 cbm 5% molchopt",
                                  "Quantity: 1,656 pipes (114 bends) / 5,069 mt / 5,654 cbm 5% molchopt",
                                  "Quantity: 20,000-40,000mt BHF",
                                  "Quantity: 20,000mts (+/-10%)",
                                  "Quantity: 21.000mt 10% more or less",
                                  "Quantity: 25,000 mts 10pct moloo",
                                  "QUANTITY: 25,000 MTS MIN/MAX",
                                  "Quantity: 25,000 MTS Min/Max (IDEALLY)",
                                  "Quantity: 25.000 MT",
                                  "Quantity: 25000 MT in Bulk",
                                  "Quantity: 25000-27500 MTs in Bulk",
                                  "quantity: 30,000 mt +/- 10%  chopt",
                                  "Quantity: 30000mt (ready in the port)",
                                  "Quantity: 30000mt (ready in the port)=0A=",
                                  "quantity: 35,000mt 10% moloo",
                                  "Quantity: 40,000 – 45,000 GMT",
                                  "Quantity: 50,000MT/10% owners option Thermal Coal",
                                  "Quantity: 55,000MT/10% owners option",
                                  "Quantity: 58,000MT/10% owners option",
                                  "Quantity: 8000 Mton  (+/ - 5 % molco)",
                                  "QUANTITY: 9,300MT, TRY LESS IF NEEDED",
                                  "QUANTITY: MIN 11,000MT +5% CHOPT",
                                  "Quantity: Min 25.000 - Max 30.000 MT in Charterers option",
                                  "quantity: min 30000 mt + 10pct  oo",
                                  "QUANTITY: MIN 41000/ MAX 44000 MT IN OO",
                                  "Quantity: Total 24000mt 10% molchopt Steel scrap",
                                  "TOTAL QUANTITY: ABOUT MIN 150,000 MTS (6 SHIPMENTS) - 350,000MTS +/- 10% PER YEAR IN CHOPT.",
                                  "TTL QUANTITY 250.000 MTS",
                                  "TTL W  900 MTONS",
                                  "TTL W  900MTONS",
                                  "VPT- 8000 MT/day",
                                  "Weight : 1.578 tons/bag",
                                  "Weight_1 : 41000",
                                  "Weight_2 : 41000" };

            StringRegion inputRegion   = RegionSession.CreateStringRegion(records[0]);
            StringRegion inputRegion2  = RegionSession.CreateStringRegion(records[1]);
            StringRegion inputRegion3  = RegionSession.CreateStringRegion(records[2]);
            StringRegion inputRegion4  = RegionSession.CreateStringRegion(records[3]);
            StringRegion inputRegion5  = RegionSession.CreateStringRegion(records[4]);
            StringRegion inputRegion6  = RegionSession.CreateStringRegion(records[5]);
            StringRegion inputRegion7  = RegionSession.CreateStringRegion(records[6]);
            StringRegion inputRegion8  = RegionSession.CreateStringRegion(records[7]);
            StringRegion inputRegion9  = RegionSession.CreateStringRegion(records[8]);
            StringRegion inputRegion10 = RegionSession.CreateStringRegion(records[9]);

            Console.WriteLine(inputRegion.Slice(13, 23));
            Console.WriteLine(inputRegion2.Slice(0, 11));
            Console.WriteLine(inputRegion3.Slice(0, 11));
            IEnumerable <Match> matches = Regex.Matches(@"- QUANTITY : 25,000 MTS (BAGS 25KGS)", regexQuantity, RegexOptions.IgnoreCase);

            foreach (var match in matches)
            {
                Console.WriteLine(match.Value);
            }
            var examples = new[] {
                new RegionExample(inputRegion, inputRegion.Slice(14, 23)),
                new RegionExample(inputRegion2, inputRegion2.Slice(1, 11)),
                new RegionExample(inputRegion3, inputRegion3.Slice(1, 11)),
            };
            // RegionSession.Constraints.Add(
            //     new RegionExample(inputRegion2, new[] {
            //                 inputRegion2.Slice(1, 6), // input => "25,000"
            //                 inputRegion2.Slice(8, 11), // input => "MTS"
            //             })
            // );
            // RegionSession.Constraints.Add(
            //     new RegionExample(inputRegion3, new[] {
            //                 inputRegion3.Slice(1, 6), // input => "25,000"
            //                 inputRegion3.Slice(8, 11), // input => "MTS"
            //             })
            // );
            // RegionSession.Constraints.Add(
            //     new RegionExample(inputRegion4, new[] {
            //                 inputRegion4.Slice(1, 6), // input => "25,000"
            //                 inputRegion4.Slice(8, 11), // input => "MTS"
            //             })
            // );
            // RegionSession.Constraints.Add(
            //     new RegionExample(inputRegion5, new[] {
            //                 inputRegion5.Slice(1, 5), // input => "25,000"
            //                 inputRegion5.Slice(7, 10), // input => "MTS"
            //             })
            // );
            // RegionSession.Constraints.Add(
            //     new RegionExample(inputRegion6, new[] {
            //                 inputRegion6.Slice(1, 5), // input => "25,000"
            //                 inputRegion6.Slice(7, 10), // input => "MTS"
            //             })
            // );
            // RegionSession.Constraints.Add(
            //     new RegionExample(inputRegion7, new[] {
            //                 inputRegion7.Slice(1, 3), // input => "25,000"
            //                 inputRegion7.Slice(5, 8), // input => "MTS"
            //             })
            // );
            // RegionSession.Constraints.Add(
            //     new RegionExample(inputRegion8, new[] {
            //                 inputRegion8.Slice(1, 1), // input => "25,000"
            //                 inputRegion8.Slice(3, 6), // input => "MTS"
            //             })
            // );
            // RegionSession.Constraints.Add(
            //     new RegionExample(inputRegion9, new[] {

            //                 inputRegion9.Slice(9, 14), // input => "25,000"
            //                 inputRegion9.Slice(15, 16), // input => "MTS"
            //             })
            // );
            //IEnumerable<RegionProgram> topRankedPrograms = RegionLearner.Instance.LearnTopK(RegionExample, null, 1, regexQuantity);;

            // if ( RegionSession.Learn()!= null)
            // {
            //     Console.WriteLine(RegionSession.Learn());
            // }

            // List <StringRegion> inputlist =new List<StringRegion> ();

            // foreach (var input in inputlistString)
            // {
            //     inputlist.Add(RegionSession.CreateStringRegion(input));
            // }

            // foreach (var result in topRankedProg.Run(inputlist))
            // {
            //     foreach (var item in result)
            //     {
            //         Console.WriteLine(item);
            //     }
            // }
            IEnumerable <RegionProgram> topRankedPrograms = RegionLearner.Instance.LearnTopK(examples,
                                                                                             RegionLearner.Instance.ScoreFeature,
                                                                                             1,
                                                                                             null,
                                                                                             default(ProgramSamplingStrategy),
                                                                                             null,
                                                                                             new Regex(regexQuantity)).TopPrograms;
            RegionProgram topRankedProg = topRankedPrograms.FirstOrDefault();

            if (topRankedProg == null)
            {
                Console.Error.WriteLine("Error: Learning fails!");
                return;
            }

            foreach (string input in inputlistString)
            {
                StringRegion inputStringRegion = RegionSession.CreateStringRegion(input);
                string       output            = topRankedProg.Run(inputStringRegion)?.Value ?? "null";
                Console.WriteLine("\"{0}\" => \"{1}\"", inputStringRegion, output);
            }
            return;
        }
示例#18
0
            internal void ReifyFromString(string inputString)
            {
                var s = inputString.Replace("\r\n", "\n");

                _stringRegion = RegionSession.CreateStringRegion(s).Slice(Start, End);
            }