Exemple #1
0
        public void TestEmptyCells()
        {
            XSSFExcelExtractor extractor = GetExtractor("SimpleNormal.xlsx");

            String text = extractor.Text;

            Assert.IsTrue(text.Length > 0);

            // This sheet demonstrates the preservation of empty cells, as
            // signified by sequential \t characters.
            Assert.AreEqual(
                // Sheet 1
                "Sheet1\n" +
                "test\t\t1\n" +
                "test 2\t\t2\n" +
                "\t\t3\n" +
                "\t\t4\n" +
                "\t\t5\n" +
                "\t\t6\n" +
                // Sheet 2
                "Sheet Number 2\n" +
                "This is sheet 2\n" +
                "Stuff\n" +
                "1\t2\t3\t4\t5\t6\n" +
                "1/1/90\n" +
                "10\t\t3\n",
                text);

            extractor.Close();
        }
Exemple #2
0
        public void TestGetSimpleText()
        {
            // a very simple file
            XSSFExcelExtractor extractor = GetExtractor("sample.xlsx");

            String text = extractor.Text;

            Assert.IsTrue(text.Length > 0);

            // Check sheet names
            Assert.IsTrue(text.StartsWith("Sheet1"));
            Assert.IsTrue(text.EndsWith("Sheet3\n"));

            // Now without, will have text
            extractor.SetIncludeSheetNames(false);
            text = extractor.Text;
            String CHUNK1 =
                "Lorem\t111\n" +
                "ipsum\t222\n" +
                "dolor\t333\n" +
                "sit\t444\n" +
                "amet\t555\n" +
                "consectetuer\t666\n" +
                "adipiscing\t777\n" +
                "elit\t888\n" +
                "Nunc\t999\n";
            String CHUNK2 =
                "The quick brown fox jumps over the lazy dog\n" +
                "hello, xssf	hello, xssf\n"+
                "hello, xssf	hello, xssf\n"+
                "hello, xssf	hello, xssf\n"+
                "hello, xssf	hello, xssf\n";

            Assert.AreEqual(
                CHUNK1 +
                "at\t4995\n" +
                CHUNK2
                , text);

            // Now Get formulas not their values
            extractor.SetFormulasNotResults(true);
            text = extractor.Text;
            Assert.AreEqual(
                CHUNK1 +
                "at\tSUM(B1:B9)\n" +
                CHUNK2, text);

            // With sheet names too
            extractor.SetIncludeSheetNames(true);
            text = extractor.Text;
            Assert.AreEqual(
                "Sheet1\n" +
                CHUNK1 +
                "at\tSUM(B1:B9)\n" +
                "rich test\n" +
                CHUNK2 +
                "Sheet3\n"
                , text);
            extractor.Close();
        }
        public void TestComments()
        {
            XSSFExcelExtractor extractor = GetExtractor("45544.xlsx");
            String             text      = extractor.Text;

            // No comments there yet
            Assert.IsFalse(text.Contains("testdoc"), "Unable to find expected word in text\n" + text);
            Assert.IsFalse(text.Contains("test phrase"), "Unable to find expected word in text\n" + text);

            // Turn on comment extraction, will then be
            extractor.SetIncludeCellComments(true);
            text = extractor.Text;
            Assert.IsTrue(text.Contains("testdoc"), "Unable to find expected word in text\n" + text);
            Assert.IsTrue(text.Contains("test phrase"), "Unable to find expected word in text\n" + text);
        }
        public void TestHeaderFooter()
        {
            String[] files = new String[] {
                "45540_classic_Header.xlsx", "45540_form_Header.xlsx",
                "45540_classic_Footer.xlsx", "45540_form_Footer.xlsx",
            };
            foreach (String sampleName in files)
            {
                XSSFExcelExtractor extractor = GetExtractor(sampleName);
                String             text      = extractor.Text;

                Assert.IsTrue(text.Contains("testdoc"), "Unable to find expected word in text from " + sampleName + "\n" + text);
                Assert.IsTrue(text.Contains("test phrase"), "Unable to find expected word in text\n" + text);
            }
        }
        public void TestGetComplexText()
        {
            // A fairly complex file
            XSSFExcelExtractor extractor = GetExtractor("AverageTaxRates.xlsx");

            String text = extractor.Text;

            Assert.IsTrue(text.Length > 0);

            // Might not have all formatting it should do!
            // TODO decide if we should really have the "null" in there
            Assert.IsTrue(text.StartsWith(
                              "Avgtxfull\n" +
                              "\t(iii) AVERAGE TAX RATES ON ANNUAL"
                              ));
        }
Exemple #6
0
        public override string Parse()
        {
            if (!File.Exists(Context.Path))
                throw new FileNotFoundException("File " + Context.Path + " is not found");

            IWorkbook workbook = WorkbookFactory.Create(Context.Path);

            bool extractHeaderFooter = false;
            if (Context.Properties.ContainsKey("IncludeHeaderFooter"))
            {
                extractHeaderFooter = Utility.IsTrue(Context.Properties["IncludeHeaderFooter"]);
            }
            bool showCalculatedResult = false;
            if (Context.Properties.ContainsKey("ShowCalculatedResult"))
            {
                showCalculatedResult = Utility.IsTrue(Context.Properties["ShowCalculatedResult"]);
            }
            bool includeSheetNames = true;
            if (Context.Properties.ContainsKey("IncludeSheetNames"))
            {
                includeSheetNames = Utility.IsTrue(Context.Properties["IncludeSheetNames"]);
            }
            bool includeComment = true;
            if (Context.Properties.ContainsKey("IncludeComments"))
            {
                includeComment = Utility.IsTrue(Context.Properties["IncludeComments"]);
            }

            if (workbook is XSSFWorkbook)
            {
                XSSFExcelExtractor extractor = new XSSFExcelExtractor((XSSFWorkbook)workbook);
                extractor.SetIncludeHeadersFooters(extractHeaderFooter);
                extractor.SetIncludeCellComments(includeComment);
                extractor.SetIncludeSheetNames(includeSheetNames);
                extractor.SetFormulasNotResults(!showCalculatedResult);
                return extractor.Text;
            }
            else //if (workbook is HSSFWorkbook)
            {
                ExcelExtractor extractor = new ExcelExtractor((HSSFWorkbook)workbook);
                extractor.IncludeHeaderFooter = extractHeaderFooter;
                extractor.IncludeCellComments= includeComment;
                extractor.IncludeSheetNames = includeSheetNames;
                extractor.FormulasNotResults = !showCalculatedResult;
                return extractor.Text;
            }
        }
        public void TestComparedToOLE2()
        {
            // A fairly simple file - ooxml
            XSSFExcelExtractor ooxmlExtractor = GetExtractor("SampleSS.xlsx");

            ExcelExtractor ole2Extractor =
                new ExcelExtractor(HSSFTestDataSamples.OpenSampleWorkbook("SampleSS.xls"));

            POITextExtractor[] extractors =
                new POITextExtractor[] { ooxmlExtractor, ole2Extractor };
            for (int i = 0; i < extractors.Length; i++)
            {
                POITextExtractor extractor = extractors[i];

                String text = Regex.Replace(extractor.Text, "[\r\t]", "");
                Assert.IsTrue(text.StartsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"));
                Regex pattern = new Regex(".*13(\\.0+)?\\s+Sheet3.*", RegexOptions.Compiled);
                Assert.IsTrue(pattern.IsMatch(text));
            }
        }
        public void TestInlineStrings()
        {
            XSSFExcelExtractor extractor = GetExtractor("InlineStrings.xlsx");

            extractor.SetFormulasNotResults(true);
            String text = extractor.Text;

            // Numbers
            Assert.IsTrue(text.Contains("43"), "Unable to find expected word in text\n" + text);
            Assert.IsTrue(text.Contains("22"), "Unable to find expected word in text\n" + text);

            // Strings
            Assert.IsTrue(text.Contains("ABCDE"), "Unable to find expected word in text\n" + text);
            Assert.IsTrue(text.Contains("Long Text"), "Unable to find expected word in text\n" + text);

            // Inline Strings
            Assert.IsTrue(text.Contains("1st Inline String"), "Unable to find expected word in text\n" + text);
            Assert.IsTrue(text.Contains("And More"), "Unable to find expected word in text\n" + text);

            // Formulas
            Assert.IsTrue(text.Contains("A2"), "Unable to find expected word in text\n" + text);
            Assert.IsTrue(text.Contains("A5-A$2"), "Unable to find expected word in text\n" + text);
        }