/** * Not all documents have all the images concatenated in the data stream * although MS claims so. The best approach is to scan all character Runs. * * @return a list of Picture objects found in current document */ public List <Picture> GetAllPictures() { List <Picture> pictures = new List <Picture>(); Range range = _document.GetOverallRange(); for (int i = 0; i < range.NumCharacterRuns; i++) { CharacterRun run = range.GetCharacterRun(i); if (run == null) { continue; } Picture picture = ExtractPicture(run, false); if (picture != null) { pictures.Add(picture); } } SearchForPictures(_dgg.EscherRecords, pictures); return(pictures); }
public void TestContents() { HWPFDocument doc = docAscii; for (int run = 0; run < 3; run++) { Range r; // Now check the real ranges r = doc.GetRange(); Assert.AreEqual( a_page_1 + page_break + "\r" + a_page_2, r.Text ); r = doc.GetHeaderStoryRange(); Assert.AreEqual( headerDef + a_header + footerDef + a_footer + endHeaderFooter, r.Text ); r = doc.GetOverallRange(); Assert.AreEqual( a_page_1 + page_break + "\r" + a_page_2 + headerDef + a_header + footerDef + a_footer + endHeaderFooter + "\r", r.Text ); // Write out and read back in again, Ready for // the next run of the Test // TODO run more than once if (run < 1) { doc = HWPFTestDataSamples.WriteOutAndReadBack(doc); } } }
public void TestRangeDeleteOne() { HWPFDocument daDoc = HWPFTestDataSamples.OpenSampleFile(illustrativeDocFile); Range range = daDoc.GetOverallRange(); Assert.AreEqual(1, range.NumSections); Section section = range.GetSection(0); Assert.AreEqual(5, section.NumParagraphs); Paragraph para = section.GetParagraph(2); String text = para.Text; Assert.AreEqual(originalText, text); int offset = text.IndexOf(searchText); Assert.AreEqual(192, offset); int absOffset = para.StartOffset + offset; Range subRange = new Range(absOffset, (absOffset + searchText.Length), para.GetDocument()); Assert.AreEqual(searchText, subRange.Text); subRange.Delete(); // we need to let the model re-calculate the Range before we Evaluate it range = daDoc.GetRange(); Assert.AreEqual(1, range.NumSections); section = range.GetSection(0); Assert.AreEqual(5, section.NumParagraphs); para = section.GetParagraph(2); text = para.Text; Assert.AreEqual(expectedText2, text); // this can lead to a StringBuilderOutOfBoundsException, so we will add it // even though we don't have an assertion for it Range daRange = daDoc.GetRange(); text = daRange.Text; }
public void TestContentsUnicode() { Range r; // Now check the real ranges r = docUnicode.GetRange(); Assert.AreEqual( u_page_1 + page_break + "\r" + u_page_2, r.Text ); r = docUnicode.GetHeaderStoryRange(); Assert.AreEqual( headerDef + u_header + footerDef + u_footer + endHeaderFooter, r.Text ); r = docUnicode.GetOverallRange(); Assert.AreEqual( u_page_1 + page_break + "\r" + u_page_2 + headerDef + u_header + footerDef + u_footer + endHeaderFooter + "\r", r.Text ); }
public void TestUnicodeParagraphDefInitions() { Range r = u.GetRange(); String[] p1_parts = u_page_1.Split('\r'); String[] p2_parts = u_page_2.Split('\r'); Assert.AreEqual( u_page_1 + page_break + "\r" + u_page_2, r.Text ); Assert.AreEqual( 408, r.Text.Length ); Assert.AreEqual(1, r.NumSections); Assert.AreEqual(1, u.SectionTable.GetSections().Count); Section s = r.GetSection(0); Assert.AreEqual( u_page_1 + page_break + "\r" + u_page_2, s.Text ); Assert.AreEqual(0, s.StartOffset); Assert.AreEqual(408, s.EndOffset); List <PAPX> pDefs = r._paragraphs; Assert.AreEqual(35, pDefs.Count); // Check that the last paragraph ends where it should do Assert.AreEqual(531, u.GetOverallRange().Text.Length); Assert.AreEqual(530, u.GetCPSplitCalculator().GetHeaderTextboxEnd()); PropertyNode pLast = (PropertyNode)pDefs[34]; // Assert.AreEqual(530, pLast.End); // Only care about the first few really though PropertyNode p0 = (PropertyNode)pDefs[0]; PropertyNode p1 = (PropertyNode)pDefs[1]; PropertyNode p2 = (PropertyNode)pDefs[2]; PropertyNode p3 = (PropertyNode)pDefs[3]; PropertyNode p4 = (PropertyNode)pDefs[4]; // 5 paragraphs should get us to the end of our text Assert.IsTrue(p0.Start < 408); Assert.IsTrue(p0.End < 408); Assert.IsTrue(p1.Start < 408); Assert.IsTrue(p1.End < 408); Assert.IsTrue(p2.Start < 408); Assert.IsTrue(p2.End < 408); Assert.IsTrue(p3.Start < 408); Assert.IsTrue(p3.End < 408); Assert.IsTrue(p4.Start < 408); Assert.IsTrue(p4.End < 408); // Paragraphs should match with lines Assert.AreEqual( 0, p0.Start ); Assert.AreEqual( p1_parts[0].Length + 1, p0.End ); Assert.AreEqual( p1_parts[0].Length + 1, p1.Start ); Assert.AreEqual( p1_parts[0].Length + 1 + p1_parts[1].Length + 1, p1.End ); Assert.AreEqual( p1_parts[0].Length + 1 + p1_parts[1].Length + 1, p2.Start ); Assert.AreEqual( p1_parts[0].Length + 1 + p1_parts[1].Length + 1 + p1_parts[2].Length + 1, p2.End ); }
public void TestDocStructure() { HWPFDocument daDoc = HWPFTestDataSamples.OpenSampleFile(illustrativeDocFile); Range range; Section section; Paragraph para; PAPX paraDef; // First, check overall range = daDoc.GetOverallRange(); Assert.AreEqual(1, range.NumSections); Assert.AreEqual(5, range.NumParagraphs); // Now, onto just the doc bit range = daDoc.GetRange(); Assert.AreEqual(1, range.NumSections); Assert.AreEqual(1, daDoc.SectionTable.GetSections().Count); section = range.GetSection(0); Assert.AreEqual(5, section.NumParagraphs); para = section.GetParagraph(0); Assert.AreEqual(1, para.NumCharacterRuns); Assert.AreEqual(introText, para.Text); para = section.GetParagraph(1); Assert.AreEqual(5, para.NumCharacterRuns); Assert.AreEqual(fillerText, para.Text); paraDef = (PAPX)daDoc.ParagraphTable.GetParagraphs()[2]; Assert.AreEqual(132, paraDef.Start); Assert.AreEqual(400, paraDef.End); para = section.GetParagraph(2); Assert.AreEqual(5, para.NumCharacterRuns); Assert.AreEqual(originalText, para.Text); paraDef = (PAPX)daDoc.ParagraphTable.GetParagraphs()[3]; Assert.AreEqual(400, paraDef.Start); Assert.AreEqual(438, paraDef.End); para = section.GetParagraph(3); Assert.AreEqual(1, para.NumCharacterRuns); Assert.AreEqual(lastText, para.Text); // Check things match on text length Assert.AreEqual(439, range.Text.Length); Assert.AreEqual(439, section.Text.Length); Assert.AreEqual(439, section.GetParagraph(0).Text.Length + section.GetParagraph(1).Text.Length + section.GetParagraph(2).Text.Length + section.GetParagraph(3).Text.Length + section.GetParagraph(4).Text.Length ); }