public void TestOrderedLists() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("Lists.doc"); Range r = doc.GetRange(); Assert.AreEqual(40, r.NumParagraphs); Assert.AreEqual("Next up is an ordered list:\r", r.GetParagraph(5).Text); Assert.AreEqual("Ordered list 1\r", r.GetParagraph(6).Text); Assert.AreEqual("OL 2\r", r.GetParagraph(7).Text); Assert.AreEqual("OL 3\r", r.GetParagraph(8).Text); Assert.AreEqual("Now for an un-ordered list with a different bullet style:\r", r.GetParagraph(9).Text); Assert.AreEqual(9, r.GetParagraph(5).GetLvl()); Assert.AreEqual(9, r.GetParagraph(6).GetLvl()); Assert.AreEqual(9, r.GetParagraph(7).GetLvl()); Assert.AreEqual(9, r.GetParagraph(8).GetLvl()); Assert.AreEqual(9, r.GetParagraph(9).GetLvl()); Assert.AreEqual(0, r.GetParagraph(5).GetIlvl()); Assert.AreEqual(0, r.GetParagraph(6).GetIlvl()); Assert.AreEqual(0, r.GetParagraph(7).GetIlvl()); Assert.AreEqual(0, r.GetParagraph(8).GetIlvl()); Assert.AreEqual(0, r.GetParagraph(9).GetIlvl()); }
public void Test49820() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("Bug49820.doc"); Range documentRange = doc.GetRange(); StyleSheet styleSheet = doc.GetStyleSheet(); // JUnit asserts assertLevels(documentRange, styleSheet, 0, 0, 0); assertLevels(documentRange, styleSheet, 1, 1, 1); assertLevels(documentRange, styleSheet, 2, 2, 2); assertLevels(documentRange, styleSheet, 3, 3, 3); assertLevels(documentRange, styleSheet, 4, 4, 4); assertLevels(documentRange, styleSheet, 5, 5, 5); assertLevels(documentRange, styleSheet, 6, 6, 6); assertLevels(documentRange, styleSheet, 7, 7, 7); assertLevels(documentRange, styleSheet, 8, 8, 8); assertLevels(documentRange, styleSheet, 9, 9, 9); assertLevels(documentRange, styleSheet, 10, 9, 0); assertLevels(documentRange, styleSheet, 11, 9, 4); // output to console for (int i = 0; i < documentRange.NumParagraphs; i++) { Paragraph par = documentRange.GetParagraph(i); int styleLvl = styleSheet.GetParagraphStyle(par.GetStyleIndex()).GetLvl(); int parLvl = par.GetLvl(); Console.WriteLine("Style level: " + styleLvl + ", paragraph level: " + parLvl + ", text: " + par.Text); } }
private static String GetHtmlText(String sampleFileName, bool emulatePictureStorage) { HWPFDocument hwpfDocument = new HWPFDocument(POIDataSamples .GetDocumentInstance().OpenResourceAsStream(sampleFileName)); XmlDocument newDocument = new XmlDocument(); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter( newDocument); if (emulatePictureStorage) { //wordToHtmlConverter.SetPicturesManager( new PicturesManager() //{ // public String SavePicture( byte[] content, // PictureType pictureType, String suggestedName ) // { // return suggestedName; // } //} ); } wordToHtmlConverter.ProcessDocument(hwpfDocument); ; return(wordToHtmlConverter.Document.InnerXml); }
public void TestAsciiParts() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("ThreeColHeadFoot.doc"); TextPieceTable tbl = doc.TextTable; // All ascii, so stored in one big lump Assert.AreEqual(1, tbl.TextPieces.Count); TextPiece tp = (TextPiece)tbl.TextPieces[0]; Assert.AreEqual(0, tp.Start); Assert.AreEqual(339, tp.End); Assert.AreEqual(339, tp.CharacterLength); Assert.AreEqual(339, tp.BytesLength); Assert.IsTrue(tp.GetStringBuilder().ToString().StartsWith("This is a sample word document")); // Save and re-load HWPFDocument docB = SaveAndReload(doc); tbl = docB.TextTable; Assert.AreEqual(1, tbl.TextPieces.Count); tp = (TextPiece)tbl.TextPieces[0]; Assert.AreEqual(0, tp.Start); Assert.AreEqual(339, tp.End); Assert.AreEqual(339, tp.CharacterLength); Assert.AreEqual(339, tp.BytesLength); Assert.IsTrue(tp.GetStringBuilder().ToString().StartsWith("This is a sample word document")); }
public void TestDocStructure() { HWPFDocument daDoc = HWPFTestDataSamples.OpenSampleFile(illustrativeDocFile); Range range = daDoc.GetRange(); Assert.AreEqual(1, range.NumSections); Section section = range.GetSection(0); Assert.AreEqual(3, section.NumParagraphs); Paragraph para = section.GetParagraph(2); Assert.AreEqual(originalText, para.Text); Assert.AreEqual(3, para.NumCharacterRuns); String text = para.GetCharacterRun(0).Text + para.GetCharacterRun(1).Text + para.GetCharacterRun(2).Text ; Assert.AreEqual(originalText, text); Assert.AreEqual(insertionPoint, para.StartOffset); }
public void Init(string path) { if (path.EndsWith(".doc")) { HWPFDocument hwpf; using (FileStream file = new FileStream(path, FileMode.Open, FileAccess.Read)) { hwpf = new HWPFDocument(file); } this.textBody = hwpf.Text.ToString(); } else if (path.EndsWith(".docx")) { XWPFDocument xwpf; using (FileStream file = new FileStream(path, FileMode.Open, FileAccess.Read)) { xwpf = new XWPFDocument(file); } XWPFWordExtractor ex = new XWPFWordExtractor(xwpf); this.textBody = ex.Text; } textBody = textBody.Replace("(", "(").Replace(")", ")"); }
public void TestIndentedText() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("Lists.doc"); Range r = doc.GetRange(); Assert.AreEqual(40, r.NumParagraphs); Assert.AreEqual("Finally we want some indents, to tell the difference\r", r.GetParagraph(34).Text); Assert.AreEqual("Indented once\r", r.GetParagraph(35).Text); Assert.AreEqual("Indented twice\r", r.GetParagraph(36).Text); Assert.AreEqual("Indented three times\r", r.GetParagraph(37).Text); Assert.AreEqual("The end!\r", r.GetParagraph(38).Text); Assert.AreEqual(9, r.GetParagraph(34).GetLvl()); Assert.AreEqual(9, r.GetParagraph(35).GetLvl()); Assert.AreEqual(9, r.GetParagraph(36).GetLvl()); Assert.AreEqual(9, r.GetParagraph(37).GetLvl()); Assert.AreEqual(9, r.GetParagraph(38).GetLvl()); Assert.AreEqual(9, r.GetParagraph(39).GetLvl()); Assert.AreEqual(0, r.GetParagraph(34).GetIlvl()); Assert.AreEqual(0, r.GetParagraph(35).GetIlvl()); Assert.AreEqual(0, r.GetParagraph(36).GetIlvl()); Assert.AreEqual(0, r.GetParagraph(37).GetIlvl()); Assert.AreEqual(0, r.GetParagraph(38).GetIlvl()); Assert.AreEqual(0, r.GetParagraph(39).GetIlvl()); // TODO Test the indent }
protected override void ProcessEndnoteAutonumbered(HWPFDocument wordDocument, int noteIndex, XmlElement block, Range endnoteTextRange) { String textIndex;// = (internalLinkCounter.incrementAndGet()).ToString(); lock (objLinkCounter) { internalLinkCounter++; textIndex = internalLinkCounter.ToString(); } String forwardLinkName = "endnote_" + textIndex; String backwardLinkName = "endnote_back_" + textIndex; XmlElement forwardLink = foDocumentFacade .CreateBasicLinkInternal(forwardLinkName); forwardLink.AppendChild(CreateNoteInline(textIndex)); SetId(forwardLink, backwardLinkName); block.AppendChild(forwardLink); XmlElement endnote = foDocumentFacade.CreateBlock(); XmlElement backwardLink = foDocumentFacade .CreateBasicLinkInternal(backwardLinkName); backwardLink.AppendChild(CreateNoteInline(textIndex + " ")); SetId(backwardLink, forwardLinkName); endnote.AppendChild(backwardLink); ProcessCharacters(wordDocument, int.MinValue, endnoteTextRange, endnote); WordToFoUtils.CompactInlines(endnote); this.endnotes.Add(endnote); }
public void TestDifferentImages() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("testPictures.doc"); List <Picture> pics = doc.GetPicturesTable().GetAllPictures(); Assert.IsNotNull(pics); Assert.AreEqual(7, pics.Count); for (int i = 0; i < pics.Count; i++) { Picture pic = (Picture)pics[i]; Assert.IsNotNull(pic.SuggestFileExtension()); Assert.IsNotNull(pic.SuggestFullFileName()); } Assert.AreEqual("jpg", pics[0].SuggestFileExtension()); Assert.AreEqual("image/jpeg", pics[0].MimeType); Assert.AreEqual("jpg", pics[1].SuggestFileExtension()); Assert.AreEqual("image/jpeg", pics[1].MimeType); Assert.AreEqual("png", pics[3].SuggestFileExtension()); Assert.AreEqual("image/png", pics[3].MimeType); Assert.AreEqual("png", pics[4].SuggestFileExtension()); Assert.AreEqual("image/png", pics[4].MimeType); Assert.AreEqual("wmf", pics[5].SuggestFileExtension()); Assert.AreEqual("image/x-wmf", pics[5].MimeType); Assert.AreEqual("jpg", pics[6].SuggestFileExtension()); Assert.AreEqual("image/jpeg", pics[6].MimeType); }
public void TestEmfComplexImage() { // Commenting out this Test case temporarily. The file emf_2003_image does not contain any // pictures. Instead it has an office drawing object. Need to rewrite this Test after // revisiting the implementation of office drawing objects. HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("Bug41898.doc"); List <Picture> pics = doc.GetPicturesTable().GetAllPictures(); Assert.IsNotNull(pics); Assert.AreEqual(1, pics.Count); Picture pic = pics[0]; Assert.IsNotNull(pic.SuggestFileExtension()); Assert.IsNotNull(pic.SuggestFullFileName()); // This one's tricky // TODO: Fix once we've sorted bug #41898 Assert.IsNotNull(pic.GetContent()); Assert.IsNotNull(pic.GetRawContent()); // These are probably some sort of offSet, need to figure them out Assert.AreEqual(4, pic.Size); Assert.AreEqual((uint)0x80000000, LittleEndian.GetUInt(pic.GetContent())); Assert.AreEqual((uint)0x80000000, LittleEndian.GetUInt(pic.GetRawContent())); }
public void TestReadWrite() { // This document is widely available on the internet as "blair.doc". // I tried stripping the content and saving the document but my version // of Word (from Office XP) strips this table out. HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("saved-by-table.doc"); // Check what we just Read. for(int i=0;i<expected.Count;i++) { Assert.AreEqual(expected[i],doc.GetSavedByTable().GetEntries()[i], "List of saved-by entries was not as expected"); } // Now write the entire document out, and read it back in... MemoryStream byteStream = new MemoryStream(); doc.Write(byteStream); Stream copyStream = new MemoryStream(byteStream.ToArray()); HWPFDocument copy = new HWPFDocument(copyStream); // And check again. for (int i = 0; i < expected.Count; i++) { Assert.AreEqual( expected[i], copy.GetSavedByTable().GetEntries()[i], "List of saved-by entries was incorrect after writing"); } }
/** * Test that we can replace text in our Range with Unicode text. */ public void TestRangeReplacementAll() { HWPFDocument daDoc = HWPFTestDataSamples.OpenSampleFile(illustrativeDocFile); Range range = daDoc.GetRange(); Assert.AreEqual(1, range.NumSections); Section section = range.GetSection(0); Assert.AreEqual(5, section.NumParagraphs); Paragraph para = section.GetParagraph(2); String text = para.Text; Assert.AreEqual(originalText, text); range.ReplaceText(searchText, ReplacementText); Assert.AreEqual(1, range.NumSections); section = range.GetSection(0); Assert.AreEqual(5, section.NumParagraphs); para = section.GetParagraph(2); text = para.Text; Assert.AreEqual(expectedText2, text); para = section.GetParagraph(3); text = para.Text; Assert.AreEqual(expectedText3, text); }
protected void ProcessDrawnObject(HWPFDocument doc, CharacterRun characterRun, XmlElement block) { if (GetPicturesManager() == null) { return; } // TODO: support headers OfficeDrawing officeDrawing = doc.GetOfficeDrawingsMain().GetOfficeDrawingAt(characterRun.StartOffset); if (officeDrawing == null) { logger.Log(POILogger.WARN, "Characters #" + characterRun + " references missing drawn object"); return; } byte[] pictureData = officeDrawing.GetPictureData(); if (pictureData == null) { // usual shape? return; } PictureType type = PictureType.FindMatchingType(pictureData); String path = GetPicturesManager().SavePicture(pictureData, type, "s" + characterRun.StartOffset + "." + type); ProcessDrawnObject(doc, characterRun, officeDrawing, path, block); }
private static HWPFDocument Reload(HWPFDocument hwpfDocument) { MemoryStream baos = new MemoryStream(); hwpfDocument.Write(baos); return(new HWPFDocument(new MemoryStream(baos.ToArray()))); }
public void TestUnicodeParts() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("HeaderFooterUnicode.doc"); TextPieceTable tbl = doc.TextTable; // In three bits, split every 512 bytes Assert.AreEqual(3, tbl.TextPieces.Count); TextPiece tpA = (TextPiece)tbl.TextPieces[0]; TextPiece tpB = (TextPiece)tbl.TextPieces[1]; TextPiece tpC = (TextPiece)tbl.TextPieces[2]; Assert.IsTrue(tpA.IsUnicode); Assert.IsTrue(tpB.IsUnicode); Assert.IsTrue(tpC.IsUnicode); Assert.AreEqual(256, tpA.CharacterLength); Assert.AreEqual(256, tpB.CharacterLength); Assert.AreEqual(19, tpC.CharacterLength); Assert.AreEqual(512, tpA.BytesLength); Assert.AreEqual(512, tpB.BytesLength); Assert.AreEqual(38, tpC.BytesLength); Assert.AreEqual(0, tpA.Start); Assert.AreEqual(256, tpA.End); Assert.AreEqual(256, tpB.Start); Assert.AreEqual(512, tpB.End); Assert.AreEqual(512, tpC.Start); Assert.AreEqual(531, tpC.End); // Save and re-load HWPFDocument docB = SaveAndReload(doc); tbl = docB.TextTable; Assert.AreEqual(3, tbl.TextPieces.Count); tpA = (TextPiece)tbl.TextPieces[0]; tpB = (TextPiece)tbl.TextPieces[1]; tpC = (TextPiece)tbl.TextPieces[2]; Assert.IsTrue(tpA.IsUnicode); Assert.IsTrue(tpB.IsUnicode); Assert.IsTrue(tpC.IsUnicode); Assert.AreEqual(256, tpA.CharacterLength); Assert.AreEqual(256, tpB.CharacterLength); Assert.AreEqual(19, tpC.CharacterLength); Assert.AreEqual(512, tpA.BytesLength); Assert.AreEqual(512, tpB.BytesLength); Assert.AreEqual(38, tpC.BytesLength); Assert.AreEqual(0, tpA.Start); Assert.AreEqual(256, tpA.End); Assert.AreEqual(256, tpB.Start); Assert.AreEqual(512, tpB.End); Assert.AreEqual(512, tpC.Start); Assert.AreEqual(531, tpC.End); }
protected void ProcessNote(HWPFDocument wordDocument, XmlElement block, Range noteTextRange) { int noteIndex; lock (objCounters) { noteIndex = noteCounters++; } block.AppendChild(textDocumentFacade .CreateText(UNICODECHAR_ZERO_WIDTH_SPACE + "[" + noteIndex + "]" + UNICODECHAR_ZERO_WIDTH_SPACE)); if (notes == null) { notes = textDocumentFacade.CreateBlock(); } XmlElement note = textDocumentFacade.CreateBlock(); notes.AppendChild(note); note.AppendChild(textDocumentFacade.CreateText("^" + noteIndex + "\t ")); ProcessCharacters(wordDocument, int.MinValue, noteTextRange, note); note.AppendChild(textDocumentFacade.CreateText("\n")); }
public void TestCroppedPictures() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("testCroppedPictures.doc"); List <Picture> pics = doc.GetPicturesTable().GetAllPictures(); Assert.IsNotNull(pics); Assert.AreEqual(2, pics.Count); Picture pic1 = pics[0]; Assert.AreEqual(27, pic1.AspectRatioX); Assert.AreEqual(270, pic1.HorizontalScalingFactor); Assert.AreEqual(27, pic1.AspectRatioY); Assert.AreEqual(271, pic1.VerticalScalingFactor); Assert.AreEqual(12000, pic1.DxaGoal); // 21.17 cm / 2.54 cm/inch * 72dpi * 20 = 12000 Assert.AreEqual(9000, pic1.DyaGoal); // 15.88 cm / 2.54 cm/inch * 72dpi * 20 = 9000 Assert.AreEqual(0, pic1.DxaCropLeft); Assert.AreEqual(0, pic1.DxaCropRight); Assert.AreEqual(0, pic1.DyaCropTop); Assert.AreEqual(0, pic1.DyaCropBottom); Picture pic2 = pics[1]; Assert.AreEqual(76, pic2.AspectRatioX); Assert.AreEqual(764, pic2.HorizontalScalingFactor); Assert.AreEqual(68, pic2.AspectRatioY); Assert.AreEqual(685, pic2.VerticalScalingFactor); Assert.AreEqual(12000, pic2.DxaGoal); // 21.17 cm / 2.54 cm/inch * 72dpi * 20 = 12000 Assert.AreEqual(9000, pic2.DyaGoal); // 15.88 cm / 2.54 cm/inch * 72dpi * 20 = 9000 Assert.AreEqual(0, pic2.DxaCropLeft); // TODO YK: The Picture is cropped but HWPF reads the crop parameters all zeros Assert.AreEqual(0, pic2.DxaCropRight); Assert.AreEqual(0, pic2.DyaCropTop); Assert.AreEqual(0, pic2.DyaCropBottom); }
public void TestReadWrite() { // This document is widely available on the internet as "blair.doc". // I tried stripping the content and saving the document but my version // of Word (from Office XP) strips this table out. HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("saved-by-table.doc"); // Check what we just Read. for (int i = 0; i < expected.Count; i++) { Assert.AreEqual(expected[i], doc.GetSavedByTable().GetEntries()[i], "List of saved-by entries was not as expected"); } // Now write the entire document out, and read it back in... MemoryStream byteStream = new MemoryStream(); doc.Write(byteStream); Stream copyStream = new MemoryStream(byteStream.ToArray()); HWPFDocument copy = new HWPFDocument(copyStream); // And check again. for (int i = 0; i < expected.Count; i++) { Assert.AreEqual( expected[i], copy.GetSavedByTable().GetEntries()[i], "List of saved-by entries was incorrect after writing"); } }
public void TestMultiLevelLists() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("Lists.doc"); Range r = doc.GetRange(); Assert.AreEqual(40, r.NumParagraphs); Assert.AreEqual("Multi-level un-ordered list:\r", r.GetParagraph(12).Text); Assert.AreEqual("ML 1:1\r", r.GetParagraph(13).Text); Assert.AreEqual("ML 1:2\r", r.GetParagraph(14).Text); Assert.AreEqual("ML 2:1\r", r.GetParagraph(15).Text); Assert.AreEqual("ML 2:2\r", r.GetParagraph(16).Text); Assert.AreEqual("ML 2:3\r", r.GetParagraph(17).Text); Assert.AreEqual("ML 3:1\r", r.GetParagraph(18).Text); Assert.AreEqual("ML 4:1\r", r.GetParagraph(19).Text); Assert.AreEqual("ML 5:1\r", r.GetParagraph(20).Text); Assert.AreEqual("ML 5:2\r", r.GetParagraph(21).Text); Assert.AreEqual("ML 2:4\r", r.GetParagraph(22).Text); Assert.AreEqual("ML 1:3\r", r.GetParagraph(23).Text); Assert.AreEqual("Multi-level ordered list:\r", r.GetParagraph(24).Text); Assert.AreEqual("OL 1\r", r.GetParagraph(25).Text); Assert.AreEqual("OL 2\r", r.GetParagraph(26).Text); Assert.AreEqual("OL 2.1\r", r.GetParagraph(27).Text); Assert.AreEqual("OL 2.2\r", r.GetParagraph(28).Text); Assert.AreEqual("OL 2.2.1\r", r.GetParagraph(29).Text); Assert.AreEqual("OL 2.2.2\r", r.GetParagraph(30).Text); Assert.AreEqual("OL 2.2.2.1\r", r.GetParagraph(31).Text); Assert.AreEqual("OL 2.2.3\r", r.GetParagraph(32).Text); Assert.AreEqual("OL 3\r", r.GetParagraph(33).Text); Assert.AreEqual("Finally we want some indents, to tell the difference\r", r.GetParagraph(34).Text); for (int i = 12; i <= 34; i++) { Assert.AreEqual(9, r.GetParagraph(12).GetLvl()); } Assert.AreEqual(0, r.GetParagraph(12).GetIlvl()); Assert.AreEqual(0, r.GetParagraph(13).GetIlvl()); Assert.AreEqual(0, r.GetParagraph(14).GetIlvl()); Assert.AreEqual(1, r.GetParagraph(15).GetIlvl()); Assert.AreEqual(1, r.GetParagraph(16).GetIlvl()); Assert.AreEqual(1, r.GetParagraph(17).GetIlvl()); Assert.AreEqual(2, r.GetParagraph(18).GetIlvl()); Assert.AreEqual(3, r.GetParagraph(19).GetIlvl()); Assert.AreEqual(4, r.GetParagraph(20).GetIlvl()); Assert.AreEqual(4, r.GetParagraph(21).GetIlvl()); Assert.AreEqual(1, r.GetParagraph(22).GetIlvl()); Assert.AreEqual(0, r.GetParagraph(23).GetIlvl()); Assert.AreEqual(0, r.GetParagraph(24).GetIlvl()); Assert.AreEqual(0, r.GetParagraph(25).GetIlvl()); Assert.AreEqual(0, r.GetParagraph(26).GetIlvl()); Assert.AreEqual(1, r.GetParagraph(27).GetIlvl()); Assert.AreEqual(1, r.GetParagraph(28).GetIlvl()); Assert.AreEqual(2, r.GetParagraph(29).GetIlvl()); Assert.AreEqual(2, r.GetParagraph(30).GetIlvl()); Assert.AreEqual(3, r.GetParagraph(31).GetIlvl()); Assert.AreEqual(2, r.GetParagraph(32).GetIlvl()); Assert.AreEqual(0, r.GetParagraph(33).GetIlvl()); Assert.AreEqual(0, r.GetParagraph(34).GetIlvl()); }
protected override void ProcessDrawnObject(HWPFDocument doc, CharacterRun characterRun, OfficeDrawing officeDrawing, String path, XmlElement block) { XmlElement externalGraphic = foDocumentFacade.CreateExternalGraphic(path); block.AppendChild(externalGraphic); }
public void TestPicturesInHeader() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("header_image.doc"); List <Picture> pics = doc.GetPicturesTable().GetAllPictures(); Assert.AreEqual(2, pics.Count); }
/** @link dependency * @stereotype instantiate*/ /*# Picture lnkPicture; */ /** * * @param _document * @param _dataStream */ public PicturesTable(HWPFDocument _document, byte[] _dataStream, byte[] _mainStream, FSPATable fspa, EscherRecordHolder dgg) { this._document = _document; this._dataStream = _dataStream; this._mainStream = _mainStream; this._fspa = fspa; this._dgg = dgg; }
public void TestEmptyDocument() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("empty.doc"); RevisionMarkAuthorTable rmt = doc.GetRevisionMarkAuthorTable(); Assert.IsNull(rmt); }
public void TestBug47731() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("Bug47731.doc"); String foundText = WordToTextConverter.GetText(doc); Assert.IsTrue(foundText .Contains("Soak the rice in water for three to four hours")); }
public void TestPicturesWithTable() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("Bug44603.doc"); List <Picture> pics = doc.GetPicturesTable().GetAllPictures(); Assert.AreEqual(2, pics.Count); }
public void TestParagraphPAPXNoParent45877() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("Bug45877.doc"); Assert.AreEqual(17, doc.GetRange().NumParagraphs); Assert.AreEqual("First paragraph\r", doc.GetRange().GetParagraph(0).Text); Assert.AreEqual("After Crashing Part\r", doc.GetRange().GetParagraph(13).Text); }
/** * Writes a spreadsheet to a <tt>MemoryStream</tt> and Reads it back * from a <tt>MemoryStream</tt>.<p/> * Useful for verifying that the serialisation round trip */ public static HWPFDocument WriteOutAndReadBack(HWPFDocument original) { MemoryStream baos = new MemoryStream(4096); original.Write(baos); MemoryStream bais = new MemoryStream(baos.ToArray()); return(new HWPFDocument(bais)); }
private static String getFoText(String sampleFileName) { HWPFDocument hwpfDocument = new HWPFDocument(POIDataSamples.GetDocumentInstance().OpenResourceAsStream(sampleFileName)); WordToFoConverter wordToFoConverter = new WordToFoConverter(new XmlDocument()); wordToFoConverter.ProcessDocument(hwpfDocument); return(wordToFoConverter.Document.InnerXml); }
protected HWPFDocument SaveAndReload(HWPFDocument doc) { MemoryStream baos = new MemoryStream(); doc.Write(baos); return(new HWPFDocument( new MemoryStream(baos.ToArray()) )); }
public ToxyDocument Parse() { if (!File.Exists(Context.Path)) { throw new FileNotFoundException("File " + Context.Path + " is not found"); } bool extractHeader = false; if (Context.Properties.ContainsKey("ExtractHeader")) { extractHeader = Utility.IsTrue(Context.Properties["ExtractHeader"]); } bool extractFooter = false; if (Context.Properties.ContainsKey("ExtractFooter")) { extractFooter = Utility.IsTrue(Context.Properties["ExtractFooter"]); } ToxyDocument rdoc = new ToxyDocument(); using (FileStream stream = File.OpenRead(Context.Path)) { HWPFDocument worddoc = new HWPFDocument(stream); if (extractHeader && worddoc.GetHeaderStoryRange() != null) { StringBuilder sb = new StringBuilder(); rdoc.Header = worddoc.GetHeaderStoryRange().Text; } if (extractFooter && worddoc.GetFootnoteRange() != null) { StringBuilder sb = new StringBuilder(); rdoc.Footer = worddoc.GetFootnoteRange().Text; } for (int i = 0; i < worddoc.GetRange().NumParagraphs; i++) { Paragraph para = worddoc.GetRange().GetParagraph(i); string text = para.Text; ToxyParagraph p = new ToxyParagraph(); p.Text = text; //var runs = para.Runs; p.StyleID = para.GetStyleIndex().ToString(); //for (int i = 0; i < runs.Count; i++) //{ // var run = runs[i]; //} rdoc.Paragraphs.Add(p); } } return(rdoc); }
public void TestInnerTable() { Stream resourceAsStream = POIDataSamples.GetDocumentInstance() .OpenResourceAsStream("innertable.doc"); HWPFDocument hwpfDocument = new HWPFDocument(resourceAsStream); resourceAsStream.Close(); TestInnerTable(hwpfDocument); hwpfDocument = Reload(hwpfDocument); TestInnerTable(hwpfDocument); }
public void TestWriteProperties() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("SampleDoc.doc"); Assert.AreEqual("Nick Burch", doc.SummaryInformation.Author); // Write and read HWPFDocument doc2 = WriteOutAndRead(doc); Assert.AreEqual("Nick Burch", doc2.SummaryInformation.Author); }
public void TestSprmPJc() { Stream resourceAsStream = POIDataSamples.GetDocumentInstance() .OpenResourceAsStream("Bug49820.doc"); HWPFDocument hwpfDocument = new HWPFDocument(resourceAsStream); resourceAsStream.Close(); Assert.AreEqual(1, hwpfDocument.GetStyleSheet().GetParagraphStyle(8) .GetJustification()); hwpfDocument = Reload(hwpfDocument); Assert.AreEqual(1, hwpfDocument.GetStyleSheet().GetParagraphStyle(8) .GetJustification()); }
private void TestInnerTable(HWPFDocument hwpfDocument) { Range range = hwpfDocument.GetRange(); for (int p = 0; p < range.NumParagraphs; p++) { Paragraph paragraph = range.GetParagraph(p); char first = paragraph.Text.ToLower()[0]; if ('1' <= first && first < '4') { Assert.IsTrue(paragraph.IsInTable()); Assert.AreEqual(2, paragraph.GetTableLevel()); } if ('a' <= first && first < 'z') { Assert.IsTrue(paragraph.IsInTable()); Assert.AreEqual(1, paragraph.GetTableLevel()); } } }
public HeaderStories(HWPFDocument doc) { this.headerStories = doc.GetHeaderStoryRange(); FileInformationBlock fib = doc.GetFileInformationBlock(); // If there's no PlcfHdd, nothing to do if (fib.GetCcpHdd() == 0) { return; } if (fib.GetPlcfHddSize() == 0) { return; } // Handle the PlcfHdd plcfHdd = new PlexOfCps( doc.GetTableStream(), fib.GetPlcfHddOffset(), fib.GetPlcfHddSize(), 0 ); }
protected HWPFDocument SaveAndReload(HWPFDocument doc) { MemoryStream baos = new MemoryStream(); doc.Write(baos); return new HWPFDocument( new MemoryStream(baos.ToArray()) ); }
protected override void ProcessDrawnObject(HWPFDocument doc, CharacterRun characterRun, OfficeDrawing officeDrawing, string path, XmlElement block) { XmlElement img = htmlDocumentFacade.CreateImage(path); block.AppendChild(img); }
protected override void ProcessEndnoteAutonumbered(HWPFDocument wordDocument, int noteIndex, XmlElement block, Range endnoteTextRange) { ProcessNoteAutonumbered(wordDocument, "end", noteIndex, block, endnoteTextRange); }
private void ProcessNoteAutonumbered(HWPFDocument wordDocument, string type, int noteIndex, XmlElement block, Range noteTextRange) { String textIndex = (noteIndex + 1).ToString(); String textIndexClass = htmlDocumentFacade.GetOrCreateCssClass("a", "a", "vertical-align:super;font-size:smaller;"); String forwardNoteLink = type + "note_" + textIndex; String backwardNoteLink = type + "note_back_" + textIndex; XmlElement anchor = htmlDocumentFacade.CreateHyperlink("#" + forwardNoteLink); anchor.SetAttribute("name", backwardNoteLink); anchor.SetAttribute("class", textIndexClass + " " + type + "noteanchor"); anchor.InnerText = textIndex; block.AppendChild(anchor); if (notes == null) { notes = htmlDocumentFacade.CreateBlock(); notes.SetAttribute("class", "notes"); } XmlElement note = htmlDocumentFacade.CreateBlock(); note.SetAttribute("class", type + "note"); notes.AppendChild(note); XmlElement bookmark = htmlDocumentFacade.CreateBookmark(forwardNoteLink); bookmark.SetAttribute("href", "#" + backwardNoteLink); bookmark.InnerText = (textIndex); bookmark.SetAttribute("class", textIndexClass + " " + type + "noteindex"); note.AppendChild(bookmark); note.AppendChild(htmlDocumentFacade.CreateText(" ")); XmlElement span = htmlDocumentFacade.Document.CreateElement("span"); span.SetAttribute("class", type + "notetext"); note.AppendChild(span); this.blocksProperies.Push(new BlockProperies("", -1)); try { ProcessCharacters(wordDocument, int.MinValue, noteTextRange, span); } finally { this.blocksProperies.Pop(); } }
/** * Create a new Word Extractor * @param doc The HWPFDocument to extract from */ public WordExtractor(HWPFDocument doc) : base(doc) { this.doc = doc; }
protected override void ProcessFootnoteAutonumbered(HWPFDocument wordDocument, int noteIndex, XmlElement block, Range footnoteTextRange) { ProcessNoteAutonumbered(wordDocument, "foot", noteIndex, block, footnoteTextRange); }
private static HWPFDocument Reload(HWPFDocument hwpfDocument) { MemoryStream baos = new MemoryStream(); hwpfDocument.Write(baos); return new HWPFDocument(new MemoryStream(baos.ToArray())); }
public DocumentPosition(HWPFDocument doc, int pos):base(pos, pos, doc) { }