public void TestEmptyCells() { XSSFExcelExtractor extractor = GetExtractor("SimpleNormal.xlsx"); String text = extractor.Text; Assert.IsTrue(text.Length > 0); // This sheet demonstrates the preservation of empty cells, as // signified by sequential \t characters. Assert.AreEqual( // Sheet 1 "Sheet1\n" + "test\t\t1\n" + "test 2\t\t2\n" + "\t\t3\n" + "\t\t4\n" + "\t\t5\n" + "\t\t6\n" + // Sheet 2 "Sheet Number 2\n" + "This is sheet 2\n" + "Stuff\n" + "1\t2\t3\t4\t5\t6\n" + "1/1/90\n" + "10\t\t3\n", text); extractor.Close(); }
public void TestGetSimpleText() { // a very simple file XSSFExcelExtractor extractor = GetExtractor("sample.xlsx"); String text = extractor.Text; Assert.IsTrue(text.Length > 0); // Check sheet names Assert.IsTrue(text.StartsWith("Sheet1")); Assert.IsTrue(text.EndsWith("Sheet3\n")); // Now without, will have text extractor.SetIncludeSheetNames(false); text = extractor.Text; String CHUNK1 = "Lorem\t111\n" + "ipsum\t222\n" + "dolor\t333\n" + "sit\t444\n" + "amet\t555\n" + "consectetuer\t666\n" + "adipiscing\t777\n" + "elit\t888\n" + "Nunc\t999\n"; String CHUNK2 = "The quick brown fox jumps over the lazy dog\n" + "hello, xssf hello, xssf\n"+ "hello, xssf hello, xssf\n"+ "hello, xssf hello, xssf\n"+ "hello, xssf hello, xssf\n"; Assert.AreEqual( CHUNK1 + "at\t4995\n" + CHUNK2 , text); // Now Get formulas not their values extractor.SetFormulasNotResults(true); text = extractor.Text; Assert.AreEqual( CHUNK1 + "at\tSUM(B1:B9)\n" + CHUNK2, text); // With sheet names too extractor.SetIncludeSheetNames(true); text = extractor.Text; Assert.AreEqual( "Sheet1\n" + CHUNK1 + "at\tSUM(B1:B9)\n" + "rich test\n" + CHUNK2 + "Sheet3\n" , text); extractor.Close(); }
public void TestComments() { XSSFExcelExtractor extractor = GetExtractor("45544.xlsx"); String text = extractor.Text; // No comments there yet Assert.IsFalse(text.Contains("testdoc"), "Unable to find expected word in text\n" + text); Assert.IsFalse(text.Contains("test phrase"), "Unable to find expected word in text\n" + text); // Turn on comment extraction, will then be extractor.SetIncludeCellComments(true); text = extractor.Text; Assert.IsTrue(text.Contains("testdoc"), "Unable to find expected word in text\n" + text); Assert.IsTrue(text.Contains("test phrase"), "Unable to find expected word in text\n" + text); }
public void TestHeaderFooter() { String[] files = new String[] { "45540_classic_Header.xlsx", "45540_form_Header.xlsx", "45540_classic_Footer.xlsx", "45540_form_Footer.xlsx", }; foreach (String sampleName in files) { XSSFExcelExtractor extractor = GetExtractor(sampleName); String text = extractor.Text; Assert.IsTrue(text.Contains("testdoc"), "Unable to find expected word in text from " + sampleName + "\n" + text); Assert.IsTrue(text.Contains("test phrase"), "Unable to find expected word in text\n" + text); } }
public void TestGetComplexText() { // A fairly complex file XSSFExcelExtractor extractor = GetExtractor("AverageTaxRates.xlsx"); String text = extractor.Text; Assert.IsTrue(text.Length > 0); // Might not have all formatting it should do! // TODO decide if we should really have the "null" in there Assert.IsTrue(text.StartsWith( "Avgtxfull\n" + "\t(iii) AVERAGE TAX RATES ON ANNUAL" )); }
public override string Parse() { if (!File.Exists(Context.Path)) throw new FileNotFoundException("File " + Context.Path + " is not found"); IWorkbook workbook = WorkbookFactory.Create(Context.Path); bool extractHeaderFooter = false; if (Context.Properties.ContainsKey("IncludeHeaderFooter")) { extractHeaderFooter = Utility.IsTrue(Context.Properties["IncludeHeaderFooter"]); } bool showCalculatedResult = false; if (Context.Properties.ContainsKey("ShowCalculatedResult")) { showCalculatedResult = Utility.IsTrue(Context.Properties["ShowCalculatedResult"]); } bool includeSheetNames = true; if (Context.Properties.ContainsKey("IncludeSheetNames")) { includeSheetNames = Utility.IsTrue(Context.Properties["IncludeSheetNames"]); } bool includeComment = true; if (Context.Properties.ContainsKey("IncludeComments")) { includeComment = Utility.IsTrue(Context.Properties["IncludeComments"]); } if (workbook is XSSFWorkbook) { XSSFExcelExtractor extractor = new XSSFExcelExtractor((XSSFWorkbook)workbook); extractor.SetIncludeHeadersFooters(extractHeaderFooter); extractor.SetIncludeCellComments(includeComment); extractor.SetIncludeSheetNames(includeSheetNames); extractor.SetFormulasNotResults(!showCalculatedResult); return extractor.Text; } else //if (workbook is HSSFWorkbook) { ExcelExtractor extractor = new ExcelExtractor((HSSFWorkbook)workbook); extractor.IncludeHeaderFooter = extractHeaderFooter; extractor.IncludeCellComments= includeComment; extractor.IncludeSheetNames = includeSheetNames; extractor.FormulasNotResults = !showCalculatedResult; return extractor.Text; } }
public void TestComparedToOLE2() { // A fairly simple file - ooxml XSSFExcelExtractor ooxmlExtractor = GetExtractor("SampleSS.xlsx"); ExcelExtractor ole2Extractor = new ExcelExtractor(HSSFTestDataSamples.OpenSampleWorkbook("SampleSS.xls")); POITextExtractor[] extractors = new POITextExtractor[] { ooxmlExtractor, ole2Extractor }; for (int i = 0; i < extractors.Length; i++) { POITextExtractor extractor = extractors[i]; String text = Regex.Replace(extractor.Text, "[\r\t]", ""); Assert.IsTrue(text.StartsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n")); Regex pattern = new Regex(".*13(\\.0+)?\\s+Sheet3.*", RegexOptions.Compiled); Assert.IsTrue(pattern.IsMatch(text)); } }
public void TestInlineStrings() { XSSFExcelExtractor extractor = GetExtractor("InlineStrings.xlsx"); extractor.SetFormulasNotResults(true); String text = extractor.Text; // Numbers Assert.IsTrue(text.Contains("43"), "Unable to find expected word in text\n" + text); Assert.IsTrue(text.Contains("22"), "Unable to find expected word in text\n" + text); // Strings Assert.IsTrue(text.Contains("ABCDE"), "Unable to find expected word in text\n" + text); Assert.IsTrue(text.Contains("Long Text"), "Unable to find expected word in text\n" + text); // Inline Strings Assert.IsTrue(text.Contains("1st Inline String"), "Unable to find expected word in text\n" + text); Assert.IsTrue(text.Contains("And More"), "Unable to find expected word in text\n" + text); // Formulas Assert.IsTrue(text.Contains("A2"), "Unable to find expected word in text\n" + text); Assert.IsTrue(text.Contains("A5-A$2"), "Unable to find expected word in text\n" + text); }