public static void Main(string[] args) { C1.TextParser.LicenseManager.Key = License.Key; FileStream fst = File.Open(@"template.xml", FileMode.Open); FileStream fss = File.Open(@"input.txt", FileMode.Open); TemplateBasedExtractor templateBasedExtractor = new TemplateBasedExtractor(fst); IExtractionResult extractedResult = templateBasedExtractor.Extract(fss); fss.Close(); fst.Close(); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("GrapeCity, inc, all rights reserved"); Console.WriteLine("Demo of the C1TextParser library - TemplateBased extractor sample"); Console.WriteLine("Test case: From a file containing information about the users of a specific service"); Console.WriteLine(" extract all the fields related to each user, such as its name, age"); Console.WriteLine(" residency address, work address and contacts."); Console.WriteLine("Detail: A custom format is used to specify the information about an user. The custom data"); Console.WriteLine(" format described by the xml template is presented below."); Console.WriteLine(" Also, the extraction result was exported to \"ExtractUsersInfo.csv\" at the"); Console.WriteLine(" current working directory"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("Template:"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(new StreamReader(File.Open(@"template.xml", FileMode.Open)).ReadToEnd()); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("Input stream:"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(new StreamReader(File.Open(@"input.txt", FileMode.Open)).ReadToEnd()); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("JSon String result:"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(extractedResult.ToJsonString()); Console.WriteLine("------------------------------------------------------------------------------------------"); Users t = extractedResult.Get <Users>(); StringBuilder sb = CsvExportHelper.ExportList(t.User); string str = sb.ToString(); File.WriteAllText("ExtractUsersInfo.csv", sb.ToString()); Console.ReadLine(); }
public static void Main(string[] args) { C1.TextParser.LicenseManager.Key = License.Key; FileStream fst = File.Open(@"template.xml", FileMode.Open); FileStream fss = File.Open(@"input.txt", FileMode.Open); TemplateBasedExtractor templateBasedExtractor = new TemplateBasedExtractor(fst); IExtractionResult extractedResult = templateBasedExtractor.Extract(fss); fss.Close(); fst.Close(); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("GrapeCity, inc, all rights reserved"); Console.WriteLine("Demo of the C1TextParser library - TemplateBased extractor sample"); Console.WriteLine("Test case: From a server log file, extract all the ERROR logs"); Console.WriteLine("Detail: Each log follows a predefined fixed structure, that consists in 4 major elements."); Console.WriteLine(" These are: The date, the time (up to ms), the log type and finally, "); Console.WriteLine(" the description of the log"); Console.WriteLine(" The input stream content, the template and also the extracted result"); Console.WriteLine(" (in Json format) are displayed down below. Also, the extracted result was"); Console.WriteLine(" exported to \"ExtractErrorLogs.csv\" at the current working directory"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("Input stream:"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(new StreamReader(File.Open(@"input.txt", FileMode.Open)).ReadToEnd()); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("Template:"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(new StreamReader(File.Open(@"template.xml", FileMode.Open)).ReadToEnd()); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine("JSon String result:"); Console.WriteLine("------------------------------------------------------------------------------------------"); Console.WriteLine(extractedResult.ToJsonString()); Console.WriteLine("------------------------------------------------------------------------------------------"); Logs t = extractedResult.Get <Logs>(); StringBuilder sb = CsvExportHelper.ExportList(t.ErrorLogs); string str = sb.ToString(); File.WriteAllText("ExtractErrorLogs.csv", sb.ToString()); Console.ReadLine(); }
public static void Main() { C1.TextParser.LicenseManager.Key = License.Key; StartsAfterContinuesUntil startsAfterContinuesUntil = new StartsAfterContinuesUntil(@"//", @"\r\n"); Stream inputStream = File.Open(@"input.txt", FileMode.Open); IExtractionResult extractedResult = startsAfterContinuesUntil.Extract(inputStream); inputStream.Close(); Console.WriteLine("--------------------------------------------------------------------------------"); Console.WriteLine("GrapeCity, inc, all rights reserved"); Console.WriteLine("Demo of the C1TextParser library - StartsAfterContinuesUntil extractor sample"); Console.WriteLine("Test case: Extract all the comments from a c++ source file"); Console.WriteLine("Extractor specification: Starts After // Continues Until \\r\\n"); Console.WriteLine("Detail: The input stream content, as well as the extracted"); Console.WriteLine(" result (in Json format) are displayed down below"); Console.WriteLine(" Also, the extracted result was exported to \"ExtractComments.csv\""); Console.WriteLine(" at the current working directory"); Console.WriteLine("--------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("--------------------------------------------------------------------------------"); Console.WriteLine("Input stream:"); Console.WriteLine("--------------------------------------------------------------------------------"); Console.WriteLine(new StreamReader(File.Open(@"input.txt", FileMode.Open)).ReadToEnd()); Console.WriteLine("--------------------------------------------------------------------------------"); Console.WriteLine(""); Console.WriteLine("--------------------------------------------------------------------------------"); Console.WriteLine("JSon String result:"); Console.WriteLine("--------------------------------------------------------------------------------"); Console.WriteLine(extractedResult.ToJsonString()); Console.WriteLine("--------------------------------------------------------------------------------"); MyExtractionResultClass t = extractedResult.Get <MyExtractionResultClass>(); StringBuilder sb = CsvExportHelper.ExportList(t.Result); string str = sb.ToString(); File.WriteAllText("ExtractComments.csv", sb.ToString()); Console.ReadLine(); }
private void Button2_Click(object sender, EventArgs e) { StartsAfterContinuesUntil extractor = null; try { extractor = C1TextParserWrapper.GetStartsAfterContinuesUntilExtractor(textBox1.Text, textBox2.Text); } catch (Exception ex1) { MessageBox.Show("Regular expression parsing error:\n" + ex1.Message, "C1TextParser Winforms Edition", MessageBoxButtons.OK, MessageBoxIcon.Error); return; } var plainTextStream = new MemoryStream(); var writer = new StreamWriter(plainTextStream); writer.Write(textBox4.Text); writer.Flush(); plainTextStream.Position = 0; IExtractionResult extractedResult = extractor.Extract(plainTextStream); var results = extractedResult.Get <MyExtractionResultClass>(); this.c1FlexGrid1.Rows.RemoveRange(1, this.c1FlexGrid1.Rows.Count - 1); foreach (var result in results.Result) { this.c1FlexGrid1.AddItem(new string[2] { result.Index.ToString(), result.Text }); } writer.Dispose(); plainTextStream.Dispose(); MessageBox.Show(String.Format("{0} instance(s) extracted sucessfully from the input source!", results.Result.Count), "C1TextParser Winforms Edition", MessageBoxButtons.OK, MessageBoxIcon.Information); }
public static void Main() { C1.TextParser.LicenseManager.Key = License.Key; /***********************************************Vietjetair template********************************************/ Stream vietjetairTemplateStream = File.Open(@"vietjetairEmail1.html", FileMode.Open); HtmlExtractor vietjetairTemplate = new HtmlExtractor(vietjetairTemplateStream); //Fixed placeHolder for the passenger name String passengerNameXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[2]/tbody/tr[3]/td"; vietjetairTemplate.AddPlaceHolder("passenger name", passengerNameXPath); //Fixed placeHolder for the booking number String bookingNumberXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[1]/tbody/tr/td[2]/span"; vietjetairTemplate.AddPlaceHolder("booking number", bookingNumberXPath); //Fixed placeHolder for the booking status String bookingStatusXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[2]/tbody/tr[1]/td[1]"; vietjetairTemplate.AddPlaceHolder("booking status", bookingStatusXPath); //Fixed placeHolder for the fare type String fareTypeXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[4]/tbody/tr/td[3]"; vietjetairTemplate.AddPlaceHolder("fare type", fareTypeXPath); //Fixed placeHolder for total amount String totalAmountXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[6]/tbody/tr[2]/td/table[2]/tbody/tr[2]/td[3]"; vietjetairTemplate.AddPlaceHolder("total amount", totalAmountXPath); //Fixed placeHolder for city of departure String cityOfDepartureXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[4]/tbody/tr/td[4]/text()"; vietjetairTemplate.AddPlaceHolder("city of departure", cityOfDepartureXPath, 8, 12); //Fixed placeHolder for year of booking date String yearOfBookingXPath = @"/html/body/div/div[4]/div[1]/div[2]/div[2]/table[2]/tbody/tr[2]/td[1]"; vietjetairTemplate.AddPlaceHolder("year of booking", yearOfBookingXPath, 6, 4); /***************************************************************************************************************/ Stream source = File.Open(@"vietjetairEmail2.html", FileMode.Open); IExtractionResult extractedResult = vietjetairTemplate.Extract(source); Console.WriteLine("------------------------------------------------------------------------------------------------------------------"); Console.WriteLine("GrapeCity, inc, all rights reserved"); Console.WriteLine("Demo of the C1TextParser library - Html extractor sample"); Console.WriteLine("Test case: Test case: From a vietjetair e-ticket extract relevant information about the flight. Note that the"); Console.WriteLine(" email used as extraction source was modified on purpose (added random text at different locations)"); Console.WriteLine(" with the intent to show that html extractor is flexible enough to retrieve the intended text."); Console.WriteLine("Detail: This consists on seven fixed place holders. These are: the passenger name, the booking number, the"); Console.WriteLine(" booking status, the fare type, the total amount, the city of departure and, finally, the year of booking"); Console.WriteLine(" The vietjetair email used as the extraction source is \"vietjetairEmail2.html\" and can be consulted"); Console.WriteLine(" in the current working directory. Also, \"FlightETicket.csv\" contains the parsing result"); Console.WriteLine("------------------------------------------------------------------------------------------------------------------"); Console.WriteLine("------------------------------------------------------------------------------------------------------------------"); Console.WriteLine("JSon String result:"); Console.WriteLine("------------------------------------------------------------------------------------------------------------------"); Console.WriteLine(extractedResult.ToJsonString()); Console.WriteLine("------------------------------------------------------------------------------------------------------------------"); FlightTicket vietjetairResult = extractedResult.Get <FlightTicket>(); StringBuilder sb = CsvExportHelper.ExportList(new List <FlightTicket>() { vietjetairResult }); File.WriteAllText("FlightETicket.csv", sb.ToString()); Console.ReadLine(); }
public static void Main() { C1.TextParser.LicenseManager.Key = License.Key; /**************************************************Amazon template*********************************************/ Stream amazonTemplateStream = File.Open(@"amazonEmail1.html", FileMode.Open); HtmlExtractor amazonTemplate = new HtmlExtractor(amazonTemplateStream); //Repeated block for each article in the order String articleNameXPath = @"//*[@id=""shipmentDetails""]/table/tbody/tr[1]/td[2]/p/a"; amazonTemplate.AddPlaceHolder("ordered articles", "article name", articleNameXPath); String articlePriceXPath = @"//*[@id=""shipmentDetails""]/table/tbody/tr[1]/td[3]/strong"; amazonTemplate.AddPlaceHolder("ordered articles", "article price", articlePriceXPath); String articleSellerXPath = @"//*[@id=""shipmentDetails""]/table/tbody/tr[1]/td[2]/p/span"; amazonTemplate.AddPlaceHolder("ordered articles", "article seller", articleSellerXPath, 8, 18); //Fixed placeHolder for the expected delivery date String deliveryDateXPath = @"/html/body/div[2]/div/div/div/table/tbody/tr[3]/td/table/tbody/tr[1]/td[1]/p/strong"; amazonTemplate.AddPlaceHolder("delivery date", deliveryDateXPath); //Fixed placeHolder for the total amount of the order String totalAmountXPath = @"//*[@id=""shipmentDetails""]/table/tbody/tr[8]/td[2]/strong"; amazonTemplate.AddPlaceHolder("total order amount", totalAmountXPath); //Fixed placeHolder for the customer name String customerNameXPath = @"/html/body/div[2]/div/div/div/table/tbody/tr[2]/td/p[1]"; amazonTemplate.AddPlaceHolder("customer name", customerNameXPath, 6, 15); /***************************************************************************************************************/ Stream source = File.Open(@"amazonEmail2.html", FileMode.Open); IExtractionResult extractedResult = amazonTemplate.Extract(source); Console.WriteLine("------------------------------------------------------------------------------------------------------------"); Console.WriteLine("GrapeCity, inc, all rights reserved"); Console.WriteLine("Demo of the C1TextParser library - Html extractor sample"); Console.WriteLine("Test case: From amazon order emails extract relevant information about the order itself."); Console.WriteLine(" This sample pretends to demonstrate the repeated place holder extraction capabilities of"); Console.WriteLine(" C1TextParser - Html extractor"); Console.WriteLine("Detail: The sample consists on three fixed place holders and one repeated block. The fixed place holders are"); Console.WriteLine(" the customer name, the order delivery date and also the total amount of the order. The repeated "); Console.WriteLine(" block is used to extract each article that appear in the ordered article list. It contains three"); Console.WriteLine(" repeated place holders. These are: the name, the price and the seller of the article."); Console.WriteLine(" The amazon email used as the extraction source is \"amazonEmail2.html\" and can be consulted in the"); Console.WriteLine(" current working directory. Also, \"ECommerceOrder.csv\" contains the parsing result"); Console.WriteLine("------------------------------------------------------------------------------------------------------------"); Console.WriteLine("------------------------------------------------------------------------------------------------------------"); Console.WriteLine("JSon String result:"); Console.WriteLine("------------------------------------------------------------------------------------------------------------"); Console.WriteLine(extractedResult.ToJsonString()); Console.WriteLine("------------------------------------------------------------------------------------------------------------"); AmazonTemplateFixedPlaceHolders amazonTemplateFixedPlaceHolders = extractedResult.Get <AmazonTemplateFixedPlaceHolders>(); StringBuilder sb1 = CsvExportHelper.ExportList(new List <AmazonTemplateFixedPlaceHolders>() { amazonTemplateFixedPlaceHolders }); var amazonTemplateOrderedItems = extractedResult.Get <AmazonTemplateRepeatedBlocks>().OrderedItems; StringBuilder sb2 = CsvExportHelper.ExportList(amazonTemplateOrderedItems); var sb3 = sb1 + "\n" + sb2; File.WriteAllText("ECommerceOrder.csv", sb3); Console.ReadLine(); }