public IEnumerable <ITransformationResult> Transform(IExtractionResult <FormatObject> extract) { var fileNameExtract = (FileNameExtractionResult)extract; if (!(TempLocation.Exists)) { TempLocation.Create(); } var extractedFiles = new List <FileInfo>(); foreach (var file in Compression.UnZip(new FileInfo(fileNameExtract.FileFullName), TempLocation)) { extractedFiles.Add(file); } var binnedOneMinute = Cleaner.CleanToOneMinute(extractedFiles); foreach (var pair in binnedOneMinute.Keys) { yield return(new OneMinuteTransformationResult { OneMinuteData = binnedOneMinute[pair], Pair = pair, ExtractedFiles = extractedFiles, }); } }
public async Task <IActionResult> ExtractData([FromForm] EmailParserRequestModel model) { IExtractionResult result = null; string content = ""; if (model.Template == null) { content = "No template found!"; } try { if (model.Template != null) { result = this._Extract(model.Template.OpenReadStream(), model.File.OpenReadStream()); if (result != null) { content = result.ToJsonString(); } } } catch (Exception e) { Console.WriteLine(e.StackTrace); content = "Can not extract data \n"; content += e.ToString(); } return(Ok(new { status = result == null ? 0 : 1, content = content })); }
public static void Main(string[] args) { C1.TextParser.LicenseManager.Key = License.Key; FileStream fst = File.Open(@"template.xml", FileMode.Open); FileStream fss = File.Open(@"input.txt", FileMode.Open); TemplateBasedExtractor templateBasedExtractor = new TemplateBasedExtractor(fst); IExtractionResult extractedResult = templateBasedExtractor.Extract(fss); fss.Close(); fst.Close(); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("GrapeCity, inc, all rights reserved"); Console.WriteLine("Demo of the C1TextParser library - TemplateBased extractor sample"); Console.WriteLine("Test case: From a file containing information about the users of a specific service"); Console.WriteLine(" extract all the fields related to each user, such as its name, age"); Console.WriteLine(" residency address, work address and contacts."); Console.WriteLine("Detail: A custom format is used to specify the information about an user. The custom data"); Console.WriteLine(" format described by the xml template is presented below."); Console.WriteLine(" Also, the extraction result was exported to \"ExtractUsersInfo.csv\" at the"); Console.WriteLine(" current working directory"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("Template:"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(new StreamReader(File.Open(@"template.xml", FileMode.Open)).ReadToEnd()); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("Input stream:"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(new StreamReader(File.Open(@"input.txt", FileMode.Open)).ReadToEnd()); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("JSon String result:"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(extractedResult.ToJsonString()); Console.WriteLine("------------------------------------------------------------------------------------------"); Users t = extractedResult.Get <Users>(); StringBuilder sb = CsvExportHelper.ExportList(t.User); string str = sb.ToString(); File.WriteAllText("ExtractUsersInfo.csv", sb.ToString()); Console.ReadLine(); }
public static void Main(string[] args) { C1.TextParser.LicenseManager.Key = License.Key; FileStream fst = File.Open(@"template.xml", FileMode.Open); FileStream fss = File.Open(@"input.txt", FileMode.Open); TemplateBasedExtractor templateBasedExtractor = new TemplateBasedExtractor(fst); IExtractionResult extractedResult = templateBasedExtractor.Extract(fss); fss.Close(); fst.Close(); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("GrapeCity, inc, all rights reserved"); Console.WriteLine("Demo of the C1TextParser library - TemplateBased extractor sample"); Console.WriteLine("Test case: From a server log file, extract all the ERROR logs"); Console.WriteLine("Detail: Each log follows a predefined fixed structure, that consists in 4 major elements."); Console.WriteLine(" These are: The date, the time (up to ms), the log type and finally, "); Console.WriteLine(" the description of the log"); Console.WriteLine(" The input stream content, the template and also the extracted result"); Console.WriteLine(" (in Json format) are displayed down below. Also, the extracted result was"); Console.WriteLine(" exported to \"ExtractErrorLogs.csv\" at the current working directory"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("Input stream:"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(new StreamReader(File.Open(@"input.txt", FileMode.Open)).ReadToEnd()); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("Template:"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(new StreamReader(File.Open(@"template.xml", FileMode.Open)).ReadToEnd()); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("JSon String result:"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(extractedResult.ToJsonString()); Console.WriteLine("------------------------------------------------------------------------------------------"); Logs t = extractedResult.Get <Logs>(); StringBuilder sb = CsvExportHelper.ExportList(t.ErrorLogs); string str = sb.ToString(); File.WriteAllText("ExtractErrorLogs.csv", sb.ToString()); Console.ReadLine(); }
private IExtractionResult _Extract(Stream templateStream, Stream source) { IExtractionResult result = null; try { HtmlExtractor _Template = HtmlExtractor.Load(templateStream); result = _Template.Extract(source); } catch (Exception e) { Console.WriteLine(e.StackTrace); } return(result); }
public static void Main() { C1.TextParser.LicenseManager.Key = License.Key; StartsAfterContinuesUntil startsAfterContinuesUntil = new StartsAfterContinuesUntil(@"//", @"\r\n"); Stream inputStream = File.Open(@"input.txt", FileMode.Open); IExtractionResult extractedResult = startsAfterContinuesUntil.Extract(inputStream); inputStream.Close(); Console.WriteLine("--------------------------------------------------------------------------------"); Console.WriteLine("GrapeCity, inc, all rights reserved"); Console.WriteLine("Demo of the C1TextParser library - StartsAfterContinuesUntil extractor sample"); Console.WriteLine("Test case: Extract all the comments from a c++ source file"); Console.WriteLine("Extractor specification: Starts After // Continues Until \\r\\n"); Console.WriteLine("Detail: The input stream content, as well as the extracted"); Console.WriteLine(" result (in Json format) are displayed down below"); Console.WriteLine(" Also, the extracted result was exported to \"ExtractComments.csv\""); Console.WriteLine(" at the current working directory"); Console.WriteLine("--------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("--------------------------------------------------------------------------------"); Console.WriteLine("Input stream:"); Console.WriteLine("--------------------------------------------------------------------------------"); Console.WriteLine(new StreamReader(File.Open(@"input.txt", FileMode.Open)).ReadToEnd()); Console.WriteLine("--------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("--------------------------------------------------------------------------------"); Console.WriteLine("JSon String result:"); Console.WriteLine("--------------------------------------------------------------------------------"); Console.WriteLine(extractedResult.ToJsonString()); Console.WriteLine("--------------------------------------------------------------------------------"); MyExtractionResultClass t = extractedResult.Get <MyExtractionResultClass>(); StringBuilder sb = CsvExportHelper.ExportList(t.Result); string str = sb.ToString(); File.WriteAllText("ExtractComments.csv", sb.ToString()); Console.ReadLine(); }
public async Task Extract_ResultDetails_True() { IMediaInfo info = await MediaInfo.Get(Resources.MKV_Encoded_MultipleTracks); var video_track = info.VideoTracks.First(); IExtractionResult result = await Extraction.New() .AddTrack(video_track) .Start() .ConfigureAwait(false); var duration = result.EndTime - result.StartTime; Assert.True(duration.TotalMilliseconds > 1); Assert.True(result.Success); File.Delete(video_track.FileInfo.FullName); }
public async Task Extract_ChangeExtension_True() { IMediaInfo info = await MediaInfo.Get(Resources.MKV_Encoded_MultipleTracks); var video_track = info.VideoTracks.First(); video_track.SetExtension(Extension.MKV); IExtractionResult result = await Extraction.New() .AddTrack(video_track) .SetOutputDirectory(Directory.GetCurrentDirectory()) .Start() .ConfigureAwait(false); Assert.Equal(".mkv", Path.GetExtension(video_track.FileInfo.FullName)); Assert.True(File.Exists(video_track.FileInfo.FullName)); File.Delete(video_track.FileInfo.FullName); }
private void Button2_Click(object sender, EventArgs e) { StartsAfterContinuesUntil extractor = null; try { extractor = C1TextParserWrapper.GetStartsAfterContinuesUntilExtractor(textBox1.Text, textBox2.Text); } catch (Exception ex1) { MessageBox.Show("Regular expression parsing error:\n" + ex1.Message, "C1TextParser Winforms Edition", MessageBoxButtons.OK, MessageBoxIcon.Error); return; } var plainTextStream = new MemoryStream(); var writer = new StreamWriter(plainTextStream); writer.Write(textBox4.Text); writer.Flush(); plainTextStream.Position = 0; IExtractionResult extractedResult = extractor.Extract(plainTextStream); var results = extractedResult.Get <MyExtractionResultClass>(); this.c1FlexGrid1.Rows.RemoveRange(1, this.c1FlexGrid1.Rows.Count - 1); foreach (var result in results.Result) { this.c1FlexGrid1.AddItem(new string[2] { result.Index.ToString(), result.Text }); } writer.Dispose(); plainTextStream.Dispose(); MessageBox.Show(String.Format("{0} instance(s) extracted sucessfully from the input source!", results.Result.Count), "C1TextParser Winforms Edition", MessageBoxButtons.OK, MessageBoxIcon.Information); }
private void Button2_Click(object sender, EventArgs e) { Stream templateStream = new MemoryStream(); var templateWriter = new StreamWriter(templateStream); templateWriter.Write(textBox2.Text); templateWriter.Flush(); templateStream.Position = 0; Stream plainTextStream = new MemoryStream(); var sourceWriter = new StreamWriter(plainTextStream); sourceWriter.Write(textBox4.Text); sourceWriter.Flush(); plainTextStream.Position = 0; try { var extractor = C1TextParserWrapper.GetTemplateBasedExtractor(templateStream); IExtractionResult extractedResult = extractor.Extract(plainTextStream); var results = extractedResult.ToJsonString(); textBox5.Text = results; MessageBox.Show(String.Format("Extraction of the input text acording to the xml template specified succeed!"), "C1TextParser Winforms Edition", MessageBoxButtons.OK, MessageBoxIcon.Information); } catch (Exception ex) { MessageBox.Show("Template specification error:\n" + ex.Message, "C1TextParser Winforms Edition", MessageBoxButtons.OK, MessageBoxIcon.Information); } finally { templateWriter.Dispose(); sourceWriter.Dispose(); templateStream.Dispose(); plainTextStream.Dispose(); } }
public IEnumerable <ITransformationResult> Transform(IExtractionResult <FormatObject> extract) { return(TransformFileExtract(extract as IFileNameExtractionResult)); }
private void CheckAllExpected(Bitmap b, IExtractionResult result, HashSet<Color> testColors) { ClearPolygons(b, result, testColors); CheckColors(testColors, b); }
private void ClearPolygons(Bitmap b, IExtractionResult result, HashSet<Color> testColors) { foreach (var polygon in result.Polygons) { ClearPolygon(b, polygon, testColors); } }
private void checkExtractedInExpected(IExtractionResult result, Bitmap expected, HashSet<Color> testColor) { HashSet<Color> usedColors = new HashSet<Color>(); foreach (IPolygon polygon in result.Polygons) { Color polygonColor = checkPolygonColor(expected, polygon); Assert.IsFalse(usedColors.Contains(polygonColor), "Many polygons for the same cell"); Assert.IsTrue(testColor.Contains(polygonColor), "Polygon outside any test color"); usedColors.Add(polygonColor); } }
public static void Main() { C1.TextParser.LicenseManager.Key = License.Key; /***********************************************Vietjetair template********************************************/ Stream vietjetairTemplateStream = File.Open(@"vietjetairEmail1.html", FileMode.Open); HtmlExtractor vietjetairTemplate = new HtmlExtractor(vietjetairTemplateStream); //Fixed placeHolder for the passenger name String passengerNameXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[2]/tbody/tr[3]/td"; vietjetairTemplate.AddPlaceHolder("passenger name", passengerNameXPath); //Fixed placeHolder for the booking number String bookingNumberXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[1]/tbody/tr/td[2]/span"; vietjetairTemplate.AddPlaceHolder("booking number", bookingNumberXPath); //Fixed placeHolder for the booking status String bookingStatusXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[2]/tbody/tr[1]/td[1]"; vietjetairTemplate.AddPlaceHolder("booking status", bookingStatusXPath); //Fixed placeHolder for the fare type String fareTypeXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[4]/tbody/tr/td[3]"; vietjetairTemplate.AddPlaceHolder("fare type", fareTypeXPath); //Fixed placeHolder for total amount String totalAmountXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[6]/tbody/tr[2]/td/table[2]/tbody/tr[2]/td[3]"; vietjetairTemplate.AddPlaceHolder("total amount", totalAmountXPath); //Fixed placeHolder for city of departure String cityOfDepartureXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[4]/tbody/tr/td[4]/text()"; vietjetairTemplate.AddPlaceHolder("city of departure", cityOfDepartureXPath, 8, 12); //Fixed placeHolder for year of booking date String yearOfBookingXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[2]/tbody/tr[2]/td[1]"; vietjetairTemplate.AddPlaceHolder("year of booking", yearOfBookingXPath, 6, 4); /***************************************************************************************************************/ Stream source = File.Open(@"vietjetairEmail2.html", FileMode.Open); IExtractionResult extractedResult = vietjetairTemplate.Extract(source); Console.WriteLine("------------------------------------------------------------------------------------------------------------------"); Console.WriteLine("GrapeCity, inc, all rights reserved"); Console.WriteLine("Demo of the C1TextParser library - Html extractor sample"); Console.WriteLine("Test case: Test case: From a vietjetair e-ticket extract relevant information about the flight. Note that the"); Console.WriteLine(" email used as extraction source was modified on purpose (added random text at different locations)"); Console.WriteLine(" with the intent to show that html extractor is flexible enough to retrieve the intended text."); Console.WriteLine("Detail: This consists on seven fixed place holders. These are: the passenger name, the booking number, the"); Console.WriteLine(" booking status, the fare type, the total amount, the city of departure and, finally, the year of booking"); Console.WriteLine(" The vietjetair email used as the extraction source is \"vietjetairEmail2.html\" and can be consulted"); Console.WriteLine(" in the current working directory. Also, \"FlightETicket.csv\" contains the parsing result"); Console.WriteLine("------------------------------------------------------------------------------------------------------------------"); Console.WriteLine("------------------------------------------------------------------------------------------------------------------"); Console.WriteLine("JSon String result:"); Console.WriteLine("------------------------------------------------------------------------------------------------------------------"); Console.WriteLine(extractedResult.ToJsonString()); Console.WriteLine("------------------------------------------------------------------------------------------------------------------"); FlightTicket vietjetairResult = extractedResult.Get <FlightTicket>(); StringBuilder sb = CsvExportHelper.ExportList(new List <FlightTicket>() { vietjetairResult }); File.WriteAllText("FlightETicket.csv", sb.ToString()); Console.ReadLine(); }
public static void Main() { C1.TextParser.LicenseManager.Key = License.Key; /**************************************************Amazon template*********************************************/ Stream amazonTemplateStream = File.Open(@"amazonEmail1.html", FileMode.Open); HtmlExtractor amazonTemplate = new HtmlExtractor(amazonTemplateStream); //Repeated block for each article in the order String articleNameXPath = @"//*[@id=""shipmentDetails""]/table/tbody/tr[1]/td[2]/p/a"; amazonTemplate.AddPlaceHolder("ordered articles", "article name", articleNameXPath); String articlePriceXPath = @"//*[@id=""shipmentDetails""]/table/tbody/tr[1]/td[3]/strong"; amazonTemplate.AddPlaceHolder("ordered articles", "article price", articlePriceXPath); String articleSellerXPath = @"//*[@id=""shipmentDetails""]/table/tbody/tr[1]/td[2]/p/span"; amazonTemplate.AddPlaceHolder("ordered articles", "article seller", articleSellerXPath, 8, 18); //Fixed placeHolder for the expected delivery date String deliveryDateXPath = @"/html/body/div[2]/div/div/div/table/tbody/tr[3]/td/table/tbody/tr[1]/td[1]/p/strong"; amazonTemplate.AddPlaceHolder("delivery date", deliveryDateXPath); //Fixed placeHolder for the total amount of the order String totalAmountXPath = @"//*[@id=""shipmentDetails""]/table/tbody/tr[8]/td[2]/strong"; amazonTemplate.AddPlaceHolder("total order amount", totalAmountXPath); //Fixed placeHolder for the customer name String customerNameXPath = @"/html/body/div[2]/div/div/div/table/tbody/tr[2]/td/p[1]"; amazonTemplate.AddPlaceHolder("customer name", customerNameXPath, 6, 15); /***************************************************************************************************************/ Stream source = File.Open(@"amazonEmail2.html", FileMode.Open); IExtractionResult extractedResult = amazonTemplate.Extract(source); Console.WriteLine("------------------------------------------------------------------------------------------------------------"); Console.WriteLine("GrapeCity, inc, all rights reserved"); Console.WriteLine("Demo of the C1TextParser library - Html extractor sample"); Console.WriteLine("Test case: From amazon order emails extract relevant information about the order itself."); Console.WriteLine(" This sample pretends to demonstrate the repeated place holder extraction capabilities of"); Console.WriteLine(" C1TextParser - Html extractor"); Console.WriteLine("Detail: The sample consists on three fixed place holders and one repeated block. The fixed place holders are"); Console.WriteLine(" the customer name, the order delivery date and also the total amount of the order. The repeated "); Console.WriteLine(" block is used to extract each article that appear in the ordered article list. It contains three"); Console.WriteLine(" repeated place holders. These are: the name, the price and the seller of the article."); Console.WriteLine(" The amazon email used as the extraction source is \"amazonEmail2.html\" and can be consulted in the"); Console.WriteLine(" current working directory. Also, \"ECommerceOrder.csv\" contains the parsing result"); Console.WriteLine("------------------------------------------------------------------------------------------------------------"); Console.WriteLine("------------------------------------------------------------------------------------------------------------"); Console.WriteLine("JSon String result:"); Console.WriteLine("------------------------------------------------------------------------------------------------------------"); Console.WriteLine(extractedResult.ToJsonString()); Console.WriteLine("------------------------------------------------------------------------------------------------------------"); AmazonTemplateFixedPlaceHolders amazonTemplateFixedPlaceHolders = extractedResult.Get <AmazonTemplateFixedPlaceHolders>(); StringBuilder sb1 = CsvExportHelper.ExportList(new List <AmazonTemplateFixedPlaceHolders>() { amazonTemplateFixedPlaceHolders }); var amazonTemplateOrderedItems = extractedResult.Get <AmazonTemplateRepeatedBlocks>().OrderedItems; StringBuilder sb2 = CsvExportHelper.ExportList(amazonTemplateOrderedItems); var sb3 = sb1 + "\n" + sb2; File.WriteAllText("ECommerceOrder.csv", sb3); Console.ReadLine(); }