Exemplo n.º 1
0
        public IEnumerable <ITransformationResult> Transform(IExtractionResult <FormatObject> extract)
        {
            var fileNameExtract = (FileNameExtractionResult)extract;

            if (!(TempLocation.Exists))
            {
                TempLocation.Create();
            }

            var extractedFiles = new List <FileInfo>();

            foreach (var file in Compression.UnZip(new FileInfo(fileNameExtract.FileFullName), TempLocation))
            {
                extractedFiles.Add(file);
            }

            var binnedOneMinute = Cleaner.CleanToOneMinute(extractedFiles);

            foreach (var pair in binnedOneMinute.Keys)
            {
                yield return(new OneMinuteTransformationResult
                {
                    OneMinuteData = binnedOneMinute[pair],
                    Pair = pair,
                    ExtractedFiles = extractedFiles,
                });
            }
        }
        public async Task <IActionResult> ExtractData([FromForm] EmailParserRequestModel model)
        {
            IExtractionResult result  = null;
            string            content = "";

            if (model.Template == null)
            {
                content = "No template found!";
            }
            try
            {
                if (model.Template != null)
                {
                    result = this._Extract(model.Template.OpenReadStream(), model.File.OpenReadStream());
                    if (result != null)
                    {
                        content = result.ToJsonString();
                    }
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e.StackTrace);
                content  = "Can not extract data \n";
                content += e.ToString();
            }

            return(Ok(new { status = result == null ? 0 : 1, content = content }));
        }
Exemplo n.º 3
0
        public static void Main(string[] args)
        {
            C1.TextParser.LicenseManager.Key = License.Key;

            FileStream fst = File.Open(@"template.xml", FileMode.Open);
            FileStream fss = File.Open(@"input.txt", FileMode.Open);

            TemplateBasedExtractor templateBasedExtractor = new TemplateBasedExtractor(fst);
            IExtractionResult      extractedResult        = templateBasedExtractor.Extract(fss);

            fss.Close();
            fst.Close();

            Console.WriteLine("------------------------------------------------------------------------------------------");
            Console.WriteLine("GrapeCity, inc, all rights reserved");
            Console.WriteLine("Demo of the C1TextParser library - TemplateBased extractor sample");
            Console.WriteLine("Test case: From a file containing information about the users of a specific service");
            Console.WriteLine("           extract all the fields related to each user, such as its name, age");
            Console.WriteLine("           residency address, work address and contacts.");
            Console.WriteLine("Detail: A custom format is used to specify the information about an user. The custom data");
            Console.WriteLine("        format described by the xml template is presented below.");
            Console.WriteLine("        Also, the extraction result was exported to \"ExtractUsersInfo.csv\" at the");
            Console.WriteLine("        current working directory");
            Console.WriteLine("------------------------------------------------------------------------------------------");

            Console.WriteLine("");

            Console.WriteLine("------------------------------------------------------------------------------------------");
            Console.WriteLine("Template:");
            Console.WriteLine("------------------------------------------------------------------------------------------");
            Console.WriteLine(new StreamReader(File.Open(@"template.xml", FileMode.Open)).ReadToEnd());
            Console.WriteLine("------------------------------------------------------------------------------------------");

            Console.WriteLine("");

            Console.WriteLine("------------------------------------------------------------------------------------------");
            Console.WriteLine("Input stream:");
            Console.WriteLine("------------------------------------------------------------------------------------------");
            Console.WriteLine(new StreamReader(File.Open(@"input.txt", FileMode.Open)).ReadToEnd());
            Console.WriteLine("------------------------------------------------------------------------------------------");

            Console.WriteLine("");

            Console.WriteLine("------------------------------------------------------------------------------------------");
            Console.WriteLine("JSon String result:");
            Console.WriteLine("------------------------------------------------------------------------------------------");
            Console.WriteLine(extractedResult.ToJsonString());
            Console.WriteLine("------------------------------------------------------------------------------------------");

            Users         t   = extractedResult.Get <Users>();
            StringBuilder sb  = CsvExportHelper.ExportList(t.User);
            string        str = sb.ToString();

            File.WriteAllText("ExtractUsersInfo.csv", sb.ToString());

            Console.ReadLine();
        }
        public static void Main(string[] args)
        {
            C1.TextParser.LicenseManager.Key = License.Key;

            FileStream fst = File.Open(@"template.xml", FileMode.Open);
            FileStream fss = File.Open(@"input.txt", FileMode.Open);

            TemplateBasedExtractor templateBasedExtractor = new TemplateBasedExtractor(fst);
            IExtractionResult      extractedResult        = templateBasedExtractor.Extract(fss);

            fss.Close();
            fst.Close();

            Console.WriteLine("------------------------------------------------------------------------------------------");
            Console.WriteLine("GrapeCity, inc, all rights reserved");
            Console.WriteLine("Demo of the C1TextParser library - TemplateBased extractor sample");
            Console.WriteLine("Test case: From a server log file, extract all the ERROR logs");
            Console.WriteLine("Detail: Each log follows a predefined fixed structure, that consists in 4 major elements.");
            Console.WriteLine("        These are: The date, the time (up to ms), the log type and finally, ");
            Console.WriteLine("        the description of the log");
            Console.WriteLine("        The input stream content, the template and also the extracted result");
            Console.WriteLine("        (in Json format) are displayed down below. Also, the extracted result was");
            Console.WriteLine("        exported to \"ExtractErrorLogs.csv\" at the current working directory");
            Console.WriteLine("------------------------------------------------------------------------------------------");

            Console.WriteLine("");

            Console.WriteLine("------------------------------------------------------------------------------------------");
            Console.WriteLine("Input stream:");
            Console.WriteLine("------------------------------------------------------------------------------------------");
            Console.WriteLine(new StreamReader(File.Open(@"input.txt", FileMode.Open)).ReadToEnd());
            Console.WriteLine("------------------------------------------------------------------------------------------");

            Console.WriteLine("");

            Console.WriteLine("------------------------------------------------------------------------------------------");
            Console.WriteLine("Template:");
            Console.WriteLine("------------------------------------------------------------------------------------------");
            Console.WriteLine(new StreamReader(File.Open(@"template.xml", FileMode.Open)).ReadToEnd());
            Console.WriteLine("------------------------------------------------------------------------------------------");

            Console.WriteLine("");

            Console.WriteLine("------------------------------------------------------------------------------------------");
            Console.WriteLine("JSon String result:");
            Console.WriteLine("------------------------------------------------------------------------------------------");
            Console.WriteLine(extractedResult.ToJsonString());
            Console.WriteLine("------------------------------------------------------------------------------------------");

            Logs          t   = extractedResult.Get <Logs>();
            StringBuilder sb  = CsvExportHelper.ExportList(t.ErrorLogs);
            string        str = sb.ToString();

            File.WriteAllText("ExtractErrorLogs.csv", sb.ToString());

            Console.ReadLine();
        }
        private IExtractionResult _Extract(Stream templateStream, Stream source)
        {
            IExtractionResult result = null;

            try
            {
                HtmlExtractor _Template = HtmlExtractor.Load(templateStream);
                result = _Template.Extract(source);
            }
            catch (Exception e)
            {
                Console.WriteLine(e.StackTrace);
            }
            return(result);
        }
        public static void Main()
        {
            C1.TextParser.LicenseManager.Key = License.Key;

            StartsAfterContinuesUntil startsAfterContinuesUntil = new StartsAfterContinuesUntil(@"//", @"\r\n");
            Stream            inputStream     = File.Open(@"input.txt", FileMode.Open);
            IExtractionResult extractedResult = startsAfterContinuesUntil.Extract(inputStream);

            inputStream.Close();

            Console.WriteLine("--------------------------------------------------------------------------------");
            Console.WriteLine("GrapeCity, inc, all rights reserved");
            Console.WriteLine("Demo of the C1TextParser library - StartsAfterContinuesUntil extractor sample");
            Console.WriteLine("Test case: Extract all the comments from a c++ source file");
            Console.WriteLine("Extractor specification: Starts After // Continues Until \\r\\n");
            Console.WriteLine("Detail: The input stream content, as well as the extracted");
            Console.WriteLine("        result (in Json format) are displayed down below");
            Console.WriteLine("        Also, the extracted result was exported to \"ExtractComments.csv\"");
            Console.WriteLine("        at the current working directory");
            Console.WriteLine("--------------------------------------------------------------------------------");

            Console.WriteLine("");

            Console.WriteLine("--------------------------------------------------------------------------------");
            Console.WriteLine("Input stream:");
            Console.WriteLine("--------------------------------------------------------------------------------");
            Console.WriteLine(new StreamReader(File.Open(@"input.txt", FileMode.Open)).ReadToEnd());
            Console.WriteLine("--------------------------------------------------------------------------------");

            Console.WriteLine("");

            Console.WriteLine("--------------------------------------------------------------------------------");
            Console.WriteLine("JSon String result:");
            Console.WriteLine("--------------------------------------------------------------------------------");
            Console.WriteLine(extractedResult.ToJsonString());
            Console.WriteLine("--------------------------------------------------------------------------------");

            MyExtractionResultClass t  = extractedResult.Get <MyExtractionResultClass>();
            StringBuilder           sb = CsvExportHelper.ExportList(t.Result);
            string str = sb.ToString();

            File.WriteAllText("ExtractComments.csv", sb.ToString());

            Console.ReadLine();
        }
Exemplo n.º 7
0
        public async Task Extract_ResultDetails_True()
        {
            IMediaInfo info = await MediaInfo.Get(Resources.MKV_Encoded_MultipleTracks);

            var video_track = info.VideoTracks.First();

            IExtractionResult result = await Extraction.New()
                                       .AddTrack(video_track)
                                       .Start()
                                       .ConfigureAwait(false);

            var duration = result.EndTime - result.StartTime;

            Assert.True(duration.TotalMilliseconds > 1);
            Assert.True(result.Success);

            File.Delete(video_track.FileInfo.FullName);
        }
Exemplo n.º 8
0
        public async Task Extract_ChangeExtension_True()
        {
            IMediaInfo info = await MediaInfo.Get(Resources.MKV_Encoded_MultipleTracks);

            var video_track = info.VideoTracks.First();

            video_track.SetExtension(Extension.MKV);

            IExtractionResult result = await Extraction.New()
                                       .AddTrack(video_track)
                                       .SetOutputDirectory(Directory.GetCurrentDirectory())
                                       .Start()
                                       .ConfigureAwait(false);

            Assert.Equal(".mkv", Path.GetExtension(video_track.FileInfo.FullName));
            Assert.True(File.Exists(video_track.FileInfo.FullName));

            File.Delete(video_track.FileInfo.FullName);
        }
Exemplo n.º 9
0
        private void Button2_Click(object sender, EventArgs e)
        {
            StartsAfterContinuesUntil extractor = null;

            try
            {
                extractor = C1TextParserWrapper.GetStartsAfterContinuesUntilExtractor(textBox1.Text, textBox2.Text);
            }
            catch (Exception ex1)
            {
                MessageBox.Show("Regular expression parsing error:\n" + ex1.Message, "C1TextParser Winforms Edition", MessageBoxButtons.OK, MessageBoxIcon.Error);
                return;
            }

            var plainTextStream = new MemoryStream();
            var writer          = new StreamWriter(plainTextStream);

            writer.Write(textBox4.Text);
            writer.Flush();
            plainTextStream.Position = 0;

            IExtractionResult extractedResult = extractor.Extract(plainTextStream);
            var results = extractedResult.Get <MyExtractionResultClass>();

            this.c1FlexGrid1.Rows.RemoveRange(1, this.c1FlexGrid1.Rows.Count - 1);
            foreach (var result in results.Result)
            {
                this.c1FlexGrid1.AddItem(new string[2] {
                    result.Index.ToString(), result.Text
                });
            }

            writer.Dispose();
            plainTextStream.Dispose();

            MessageBox.Show(String.Format("{0} instance(s) extracted sucessfully from the input source!", results.Result.Count), "C1TextParser Winforms Edition", MessageBoxButtons.OK, MessageBoxIcon.Information);
        }
Exemplo n.º 10
0
        private void Button2_Click(object sender, EventArgs e)
        {
            Stream templateStream = new MemoryStream();
            var    templateWriter = new StreamWriter(templateStream);

            templateWriter.Write(textBox2.Text);
            templateWriter.Flush();
            templateStream.Position = 0;

            Stream plainTextStream = new MemoryStream();
            var    sourceWriter    = new StreamWriter(plainTextStream);

            sourceWriter.Write(textBox4.Text);
            sourceWriter.Flush();
            plainTextStream.Position = 0;

            try
            {
                var extractor = C1TextParserWrapper.GetTemplateBasedExtractor(templateStream);
                IExtractionResult extractedResult = extractor.Extract(plainTextStream);
                var results = extractedResult.ToJsonString();

                textBox5.Text = results;
                MessageBox.Show(String.Format("Extraction of the input text acording to the xml template specified succeed!"), "C1TextParser Winforms Edition", MessageBoxButtons.OK, MessageBoxIcon.Information);
            }
            catch (Exception ex)
            {
                MessageBox.Show("Template specification error:\n" + ex.Message, "C1TextParser Winforms Edition", MessageBoxButtons.OK, MessageBoxIcon.Information);
            }
            finally
            {
                templateWriter.Dispose();
                sourceWriter.Dispose();
                templateStream.Dispose();
                plainTextStream.Dispose();
            }
        }
Exemplo n.º 11
0
 public IEnumerable <ITransformationResult> Transform(IExtractionResult <FormatObject> extract)
 {
     return(TransformFileExtract(extract as IFileNameExtractionResult));
 }
Exemplo n.º 12
0
 private void CheckAllExpected(Bitmap b, IExtractionResult result, HashSet<Color> testColors)
 {
     ClearPolygons(b, result, testColors);
     CheckColors(testColors, b);
 }
Exemplo n.º 13
0
 private void ClearPolygons(Bitmap b, IExtractionResult result, HashSet<Color> testColors)
 {
     foreach (var polygon in result.Polygons)
     {
         ClearPolygon(b, polygon, testColors);
     }
 }
Exemplo n.º 14
0
        private void checkExtractedInExpected(IExtractionResult result, Bitmap expected, HashSet<Color> testColor)
        {
            HashSet<Color> usedColors = new HashSet<Color>();

            foreach (IPolygon polygon in result.Polygons)
            {
                Color polygonColor = checkPolygonColor(expected, polygon);
                Assert.IsFalse(usedColors.Contains(polygonColor), "Many polygons for the same cell");
                Assert.IsTrue(testColor.Contains(polygonColor), "Polygon outside any test color");
                usedColors.Add(polygonColor);
            }
        }
Exemplo n.º 15
0
        public static void Main()
        {
            C1.TextParser.LicenseManager.Key = License.Key;

            /***********************************************Vietjetair template********************************************/
            Stream        vietjetairTemplateStream = File.Open(@"vietjetairEmail1.html", FileMode.Open);
            HtmlExtractor vietjetairTemplate       = new HtmlExtractor(vietjetairTemplateStream);

            //Fixed placeHolder for the passenger name
            String passengerNameXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[2]/tbody/tr[3]/td";

            vietjetairTemplate.AddPlaceHolder("passenger name", passengerNameXPath);

            //Fixed placeHolder for the booking number
            String bookingNumberXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[1]/tbody/tr/td[2]/span";

            vietjetairTemplate.AddPlaceHolder("booking number", bookingNumberXPath);

            //Fixed placeHolder for the booking status
            String bookingStatusXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[2]/tbody/tr[1]/td[1]";

            vietjetairTemplate.AddPlaceHolder("booking status", bookingStatusXPath);

            //Fixed placeHolder for the fare type
            String fareTypeXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[4]/tbody/tr/td[3]";

            vietjetairTemplate.AddPlaceHolder("fare type", fareTypeXPath);

            //Fixed placeHolder for total amount
            String totalAmountXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[6]/tbody/tr[2]/td/table[2]/tbody/tr[2]/td[3]";

            vietjetairTemplate.AddPlaceHolder("total amount", totalAmountXPath);

            //Fixed placeHolder for city of departure
            String cityOfDepartureXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[4]/tbody/tr/td[4]/text()";

            vietjetairTemplate.AddPlaceHolder("city of departure", cityOfDepartureXPath, 8, 12);

            //Fixed placeHolder for year of booking date
            String yearOfBookingXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[2]/tbody/tr[2]/td[1]";

            vietjetairTemplate.AddPlaceHolder("year of booking", yearOfBookingXPath, 6, 4);
            /***************************************************************************************************************/

            Stream            source          = File.Open(@"vietjetairEmail2.html", FileMode.Open);
            IExtractionResult extractedResult = vietjetairTemplate.Extract(source);

            Console.WriteLine("------------------------------------------------------------------------------------------------------------------");
            Console.WriteLine("GrapeCity, inc, all rights reserved");
            Console.WriteLine("Demo of the C1TextParser library - Html extractor sample");
            Console.WriteLine("Test case: Test case: From a vietjetair e-ticket extract relevant information about the flight. Note that the");
            Console.WriteLine("           email used as extraction source was modified on purpose (added random text at different locations)");
            Console.WriteLine("           with the intent to show that html extractor is flexible enough to retrieve the intended text.");
            Console.WriteLine("Detail: This consists on seven fixed place holders. These are: the passenger name, the booking number, the");
            Console.WriteLine("        booking status, the fare type, the total amount, the city of departure and, finally, the year of booking");
            Console.WriteLine("        The vietjetair email used as the extraction source is \"vietjetairEmail2.html\" and can be consulted");
            Console.WriteLine("        in the current working directory. Also, \"FlightETicket.csv\" contains the parsing result");
            Console.WriteLine("------------------------------------------------------------------------------------------------------------------");

            Console.WriteLine("------------------------------------------------------------------------------------------------------------------");
            Console.WriteLine("JSon String result:");
            Console.WriteLine("------------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(extractedResult.ToJsonString());
            Console.WriteLine("------------------------------------------------------------------------------------------------------------------");

            FlightTicket  vietjetairResult = extractedResult.Get <FlightTicket>();
            StringBuilder sb = CsvExportHelper.ExportList(new List <FlightTicket>()
            {
                vietjetairResult
            });

            File.WriteAllText("FlightETicket.csv", sb.ToString());

            Console.ReadLine();
        }
        public static void Main()
        {
            C1.TextParser.LicenseManager.Key = License.Key;

            /**************************************************Amazon template*********************************************/
            Stream        amazonTemplateStream = File.Open(@"amazonEmail1.html", FileMode.Open);
            HtmlExtractor amazonTemplate       = new HtmlExtractor(amazonTemplateStream);

            //Repeated block for each article in the order
            String articleNameXPath = @"//*[@id=""shipmentDetails""]/table/tbody/tr[1]/td[2]/p/a";

            amazonTemplate.AddPlaceHolder("ordered articles", "article name", articleNameXPath);
            String articlePriceXPath = @"//*[@id=""shipmentDetails""]/table/tbody/tr[1]/td[3]/strong";

            amazonTemplate.AddPlaceHolder("ordered articles", "article price", articlePriceXPath);
            String articleSellerXPath = @"//*[@id=""shipmentDetails""]/table/tbody/tr[1]/td[2]/p/span";

            amazonTemplate.AddPlaceHolder("ordered articles", "article seller", articleSellerXPath, 8, 18);

            //Fixed placeHolder for the expected delivery date
            String deliveryDateXPath = @"/html/body/div[2]/div/div/div/table/tbody/tr[3]/td/table/tbody/tr[1]/td[1]/p/strong";

            amazonTemplate.AddPlaceHolder("delivery date", deliveryDateXPath);

            //Fixed placeHolder for the total amount of the order
            String totalAmountXPath = @"//*[@id=""shipmentDetails""]/table/tbody/tr[8]/td[2]/strong";

            amazonTemplate.AddPlaceHolder("total order amount", totalAmountXPath);

            //Fixed placeHolder for the customer name
            String customerNameXPath = @"/html/body/div[2]/div/div/div/table/tbody/tr[2]/td/p[1]";

            amazonTemplate.AddPlaceHolder("customer name", customerNameXPath, 6, 15);
            /***************************************************************************************************************/

            Stream            source          = File.Open(@"amazonEmail2.html", FileMode.Open);
            IExtractionResult extractedResult = amazonTemplate.Extract(source);

            Console.WriteLine("------------------------------------------------------------------------------------------------------------");
            Console.WriteLine("GrapeCity, inc, all rights reserved");
            Console.WriteLine("Demo of the C1TextParser library - Html extractor sample");
            Console.WriteLine("Test case: From amazon order emails extract relevant information about the order itself.");
            Console.WriteLine("           This sample pretends to demonstrate the repeated place holder extraction capabilities of");
            Console.WriteLine("           C1TextParser - Html extractor");
            Console.WriteLine("Detail: The sample consists on three fixed place holders and one repeated block. The fixed place holders are");
            Console.WriteLine("        the customer name, the order delivery date and also the total amount of the order. The repeated ");
            Console.WriteLine("        block is used to extract each article that appear in the ordered article list. It contains three");
            Console.WriteLine("        repeated place holders. These are: the name, the price and the seller of the article.");
            Console.WriteLine("        The amazon email used as the extraction source is \"amazonEmail2.html\" and can be consulted in the");
            Console.WriteLine("        current working directory. Also, \"ECommerceOrder.csv\" contains the parsing result");
            Console.WriteLine("------------------------------------------------------------------------------------------------------------");

            Console.WriteLine("------------------------------------------------------------------------------------------------------------");
            Console.WriteLine("JSon String result:");
            Console.WriteLine("------------------------------------------------------------------------------------------------------------");
            Console.WriteLine(extractedResult.ToJsonString());
            Console.WriteLine("------------------------------------------------------------------------------------------------------------");

            AmazonTemplateFixedPlaceHolders amazonTemplateFixedPlaceHolders = extractedResult.Get <AmazonTemplateFixedPlaceHolders>();
            StringBuilder sb1 = CsvExportHelper.ExportList(new List <AmazonTemplateFixedPlaceHolders>()
            {
                amazonTemplateFixedPlaceHolders
            });
            var           amazonTemplateOrderedItems = extractedResult.Get <AmazonTemplateRepeatedBlocks>().OrderedItems;
            StringBuilder sb2 = CsvExportHelper.ExportList(amazonTemplateOrderedItems);
            var           sb3 = sb1 + "\n" + sb2;

            File.WriteAllText("ECommerceOrder.csv", sb3);

            Console.ReadLine();
        }