public async Task <IActionResult> ExtractData([FromForm] EmailParserRequestModel model) { IExtractionResult result = null; string content = ""; if (model.Template == null) { content = "No template found!"; } try { if (model.Template != null) { result = this._Extract(model.Template.OpenReadStream(), model.File.OpenReadStream()); if (result != null) { content = result.ToJsonString(); } } } catch (Exception e) { Console.WriteLine(e.StackTrace); content = "Can not extract data \n"; content += e.ToString(); } return(Ok(new { status = result == null ? 0 : 1, content = content })); }
public static void Main(string[] args) { C1.TextParser.LicenseManager.Key = License.Key; FileStream fst = File.Open(@"template.xml", FileMode.Open); FileStream fss = File.Open(@"input.txt", FileMode.Open); TemplateBasedExtractor templateBasedExtractor = new TemplateBasedExtractor(fst); IExtractionResult extractedResult = templateBasedExtractor.Extract(fss); fss.Close(); fst.Close(); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("GrapeCity, inc, all rights reserved"); Console.WriteLine("Demo of the C1TextParser library - TemplateBased extractor sample"); Console.WriteLine("Test case: From a file containing information about the users of a specific service"); Console.WriteLine(" extract all the fields related to each user, such as its name, age"); Console.WriteLine(" residency address, work address and contacts."); Console.WriteLine("Detail: A custom format is used to specify the information about an user. The custom data"); Console.WriteLine(" format described by the xml template is presented below."); Console.WriteLine(" Also, the extraction result was exported to \"ExtractUsersInfo.csv\" at the"); Console.WriteLine(" current working directory"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("Template:"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(new StreamReader(File.Open(@"template.xml", FileMode.Open)).ReadToEnd()); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("Input stream:"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(new StreamReader(File.Open(@"input.txt", FileMode.Open)).ReadToEnd()); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("JSon String result:"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(extractedResult.ToJsonString()); Console.WriteLine("------------------------------------------------------------------------------------------"); Users t = extractedResult.Get <Users>(); StringBuilder sb = CsvExportHelper.ExportList(t.User); string str = sb.ToString(); File.WriteAllText("ExtractUsersInfo.csv", sb.ToString()); Console.ReadLine(); }
public static void Main(string[] args) { C1.TextParser.LicenseManager.Key = License.Key; FileStream fst = File.Open(@"template.xml", FileMode.Open); FileStream fss = File.Open(@"input.txt", FileMode.Open); TemplateBasedExtractor templateBasedExtractor = new TemplateBasedExtractor(fst); IExtractionResult extractedResult = templateBasedExtractor.Extract(fss); fss.Close(); fst.Close(); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("GrapeCity, inc, all rights reserved"); Console.WriteLine("Demo of the C1TextParser library - TemplateBased extractor sample"); Console.WriteLine("Test case: From a server log file, extract all the ERROR logs"); Console.WriteLine("Detail: Each log follows a predefined fixed structure, that consists in 4 major elements."); Console.WriteLine(" These are: The date, the time (up to ms), the log type and finally, "); Console.WriteLine(" the description of the log"); Console.WriteLine(" The input stream content, the template and also the extracted result"); Console.WriteLine(" (in Json format) are displayed down below. Also, the extracted result was"); Console.WriteLine(" exported to \"ExtractErrorLogs.csv\" at the current working directory"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("Input stream:"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(new StreamReader(File.Open(@"input.txt", FileMode.Open)).ReadToEnd()); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("Template:"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(new StreamReader(File.Open(@"template.xml", FileMode.Open)).ReadToEnd()); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("JSon String result:"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(extractedResult.ToJsonString()); Console.WriteLine("------------------------------------------------------------------------------------------"); Logs t = extractedResult.Get <Logs>(); StringBuilder sb = CsvExportHelper.ExportList(t.ErrorLogs); string str = sb.ToString(); File.WriteAllText("ExtractErrorLogs.csv", sb.ToString()); Console.ReadLine(); }
public static void Main() { C1.TextParser.LicenseManager.Key = License.Key; StartsAfterContinuesUntil startsAfterContinuesUntil = new StartsAfterContinuesUntil(@"//", @"\r\n"); Stream inputStream = File.Open(@"input.txt", FileMode.Open); IExtractionResult extractedResult = startsAfterContinuesUntil.Extract(inputStream); inputStream.Close(); Console.WriteLine("--------------------------------------------------------------------------------"); Console.WriteLine("GrapeCity, inc, all rights reserved"); Console.WriteLine("Demo of the C1TextParser library - StartsAfterContinuesUntil extractor sample"); Console.WriteLine("Test case: Extract all the comments from a c++ source file"); Console.WriteLine("Extractor specification: Starts After // Continues Until \\r\\n"); Console.WriteLine("Detail: The input stream content, as well as the extracted"); Console.WriteLine(" result (in Json format) are displayed down below"); Console.WriteLine(" Also, the extracted result was exported to \"ExtractComments.csv\""); Console.WriteLine(" at the current working directory"); Console.WriteLine("--------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("--------------------------------------------------------------------------------"); Console.WriteLine("Input stream:"); Console.WriteLine("--------------------------------------------------------------------------------"); Console.WriteLine(new StreamReader(File.Open(@"input.txt", FileMode.Open)).ReadToEnd()); Console.WriteLine("--------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("--------------------------------------------------------------------------------"); Console.WriteLine("JSon String result:"); Console.WriteLine("--------------------------------------------------------------------------------"); Console.WriteLine(extractedResult.ToJsonString()); Console.WriteLine("--------------------------------------------------------------------------------"); MyExtractionResultClass t = extractedResult.Get <MyExtractionResultClass>(); StringBuilder sb = CsvExportHelper.ExportList(t.Result); string str = sb.ToString(); File.WriteAllText("ExtractComments.csv", sb.ToString()); Console.ReadLine(); }
private void Button2_Click(object sender, EventArgs e) { Stream templateStream = new MemoryStream(); var templateWriter = new StreamWriter(templateStream); templateWriter.Write(textBox2.Text); templateWriter.Flush(); templateStream.Position = 0; Stream plainTextStream = new MemoryStream(); var sourceWriter = new StreamWriter(plainTextStream); sourceWriter.Write(textBox4.Text); sourceWriter.Flush(); plainTextStream.Position = 0; try { var extractor = C1TextParserWrapper.GetTemplateBasedExtractor(templateStream); IExtractionResult extractedResult = extractor.Extract(plainTextStream); var results = extractedResult.ToJsonString(); textBox5.Text = results; MessageBox.Show(String.Format("Extraction of the input text acording to the xml template specified succeed!"), "C1TextParser Winforms Edition", MessageBoxButtons.OK, MessageBoxIcon.Information); } catch (Exception ex) { MessageBox.Show("Template specification error:\n" + ex.Message, "C1TextParser Winforms Edition", MessageBoxButtons.OK, MessageBoxIcon.Information); } finally { templateWriter.Dispose(); sourceWriter.Dispose(); templateStream.Dispose(); plainTextStream.Dispose(); } }
public static void Main() { C1.TextParser.LicenseManager.Key = License.Key; /***********************************************Vietjetair template********************************************/ Stream vietjetairTemplateStream = File.Open(@"vietjetairEmail1.html", FileMode.Open); HtmlExtractor vietjetairTemplate = new HtmlExtractor(vietjetairTemplateStream); //Fixed placeHolder for the passenger name String passengerNameXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[2]/tbody/tr[3]/td"; vietjetairTemplate.AddPlaceHolder("passenger name", passengerNameXPath); //Fixed placeHolder for the booking number String bookingNumberXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[1]/tbody/tr/td[2]/span"; vietjetairTemplate.AddPlaceHolder("booking number", bookingNumberXPath); //Fixed placeHolder for the booking status String bookingStatusXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[2]/tbody/tr[1]/td[1]"; vietjetairTemplate.AddPlaceHolder("booking status", bookingStatusXPath); //Fixed placeHolder for the fare type String fareTypeXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[4]/tbody/tr/td[3]"; vietjetairTemplate.AddPlaceHolder("fare type", fareTypeXPath); //Fixed placeHolder for total amount String totalAmountXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[6]/tbody/tr[2]/td/table[2]/tbody/tr[2]/td[3]"; vietjetairTemplate.AddPlaceHolder("total amount", totalAmountXPath); //Fixed placeHolder for city of departure String cityOfDepartureXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[4]/tbody/tr/td[4]/text()"; vietjetairTemplate.AddPlaceHolder("city of departure", cityOfDepartureXPath, 8, 12); //Fixed placeHolder for year of booking date String yearOfBookingXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[2]/tbody/tr[2]/td[1]"; vietjetairTemplate.AddPlaceHolder("year of booking", yearOfBookingXPath, 6, 4); /***************************************************************************************************************/ Stream source = File.Open(@"vietjetairEmail2.html", FileMode.Open); IExtractionResult extractedResult = vietjetairTemplate.Extract(source); Console.WriteLine("------------------------------------------------------------------------------------------------------------------"); Console.WriteLine("GrapeCity, inc, all rights reserved"); Console.WriteLine("Demo of the C1TextParser library - Html extractor sample"); Console.WriteLine("Test case: Test case: From a vietjetair e-ticket extract relevant information about the flight. Note that the"); Console.WriteLine(" email used as extraction source was modified on purpose (added random text at different locations)"); Console.WriteLine(" with the intent to show that html extractor is flexible enough to retrieve the intended text."); Console.WriteLine("Detail: This consists on seven fixed place holders. These are: the passenger name, the booking number, the"); Console.WriteLine(" booking status, the fare type, the total amount, the city of departure and, finally, the year of booking"); Console.WriteLine(" The vietjetair email used as the extraction source is \"vietjetairEmail2.html\" and can be consulted"); Console.WriteLine(" in the current working directory. Also, \"FlightETicket.csv\" contains the parsing result"); Console.WriteLine("------------------------------------------------------------------------------------------------------------------"); Console.WriteLine("------------------------------------------------------------------------------------------------------------------"); Console.WriteLine("JSon String result:"); Console.WriteLine("------------------------------------------------------------------------------------------------------------------"); Console.WriteLine(extractedResult.ToJsonString()); Console.WriteLine("------------------------------------------------------------------------------------------------------------------"); FlightTicket vietjetairResult = extractedResult.Get <FlightTicket>(); StringBuilder sb = CsvExportHelper.ExportList(new List <FlightTicket>() { vietjetairResult }); File.WriteAllText("FlightETicket.csv", sb.ToString()); Console.ReadLine(); }
public static void Main() { C1.TextParser.LicenseManager.Key = License.Key; /**************************************************Amazon template*********************************************/ Stream amazonTemplateStream = File.Open(@"amazonEmail1.html", FileMode.Open); HtmlExtractor amazonTemplate = new HtmlExtractor(amazonTemplateStream); //Repeated block for each article in the order String articleNameXPath = @"//*[@id=""shipmentDetails""]/table/tbody/tr[1]/td[2]/p/a"; amazonTemplate.AddPlaceHolder("ordered articles", "article name", articleNameXPath); String articlePriceXPath = @"//*[@id=""shipmentDetails""]/table/tbody/tr[1]/td[3]/strong"; amazonTemplate.AddPlaceHolder("ordered articles", "article price", articlePriceXPath); String articleSellerXPath = @"//*[@id=""shipmentDetails""]/table/tbody/tr[1]/td[2]/p/span"; amazonTemplate.AddPlaceHolder("ordered articles", "article seller", articleSellerXPath, 8, 18); //Fixed placeHolder for the expected delivery date String deliveryDateXPath = @"/html/body/div[2]/div/div/div/table/tbody/tr[3]/td/table/tbody/tr[1]/td[1]/p/strong"; amazonTemplate.AddPlaceHolder("delivery date", deliveryDateXPath); //Fixed placeHolder for the total amount of the order String totalAmountXPath = @"//*[@id=""shipmentDetails""]/table/tbody/tr[8]/td[2]/strong"; amazonTemplate.AddPlaceHolder("total order amount", totalAmountXPath); //Fixed placeHolder for the customer name String customerNameXPath = @"/html/body/div[2]/div/div/div/table/tbody/tr[2]/td/p[1]"; amazonTemplate.AddPlaceHolder("customer name", customerNameXPath, 6, 15); /***************************************************************************************************************/ Stream source = File.Open(@"amazonEmail2.html", FileMode.Open); IExtractionResult extractedResult = amazonTemplate.Extract(source); Console.WriteLine("------------------------------------------------------------------------------------------------------------"); Console.WriteLine("GrapeCity, inc, all rights reserved"); Console.WriteLine("Demo of the C1TextParser library - Html extractor sample"); Console.WriteLine("Test case: From amazon order emails extract relevant information about the order itself."); Console.WriteLine(" This sample pretends to demonstrate the repeated place holder extraction capabilities of"); Console.WriteLine(" C1TextParser - Html extractor"); Console.WriteLine("Detail: The sample consists on three fixed place holders and one repeated block. The fixed place holders are"); Console.WriteLine(" the customer name, the order delivery date and also the total amount of the order. The repeated "); Console.WriteLine(" block is used to extract each article that appear in the ordered article list. It contains three"); Console.WriteLine(" repeated place holders. These are: the name, the price and the seller of the article."); Console.WriteLine(" The amazon email used as the extraction source is \"amazonEmail2.html\" and can be consulted in the"); Console.WriteLine(" current working directory. Also, \"ECommerceOrder.csv\" contains the parsing result"); Console.WriteLine("------------------------------------------------------------------------------------------------------------"); Console.WriteLine("------------------------------------------------------------------------------------------------------------"); Console.WriteLine("JSon String result:"); Console.WriteLine("------------------------------------------------------------------------------------------------------------"); Console.WriteLine(extractedResult.ToJsonString()); Console.WriteLine("------------------------------------------------------------------------------------------------------------"); AmazonTemplateFixedPlaceHolders amazonTemplateFixedPlaceHolders = extractedResult.Get <AmazonTemplateFixedPlaceHolders>(); StringBuilder sb1 = CsvExportHelper.ExportList(new List <AmazonTemplateFixedPlaceHolders>() { amazonTemplateFixedPlaceHolders }); var amazonTemplateOrderedItems = extractedResult.Get <AmazonTemplateRepeatedBlocks>().OrderedItems; StringBuilder sb2 = CsvExportHelper.ExportList(amazonTemplateOrderedItems); var sb3 = sb1 + "\n" + sb2; File.WriteAllText("ECommerceOrder.csv", sb3); Console.ReadLine(); }