public void TestDifferentImages() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("testPictures.doc"); List <Picture> pics = doc.GetPicturesTable().GetAllPictures(); Assert.IsNotNull(pics); Assert.AreEqual(7, pics.Count); for (int i = 0; i < pics.Count; i++) { Picture pic = (Picture)pics[i]; Assert.IsNotNull(pic.SuggestFileExtension()); Assert.IsNotNull(pic.SuggestFullFileName()); } Assert.AreEqual("jpg", pics[0].SuggestFileExtension()); Assert.AreEqual("image/jpeg", pics[0].MimeType); Assert.AreEqual("jpg", pics[1].SuggestFileExtension()); Assert.AreEqual("image/jpeg", pics[1].MimeType); Assert.AreEqual("png", pics[3].SuggestFileExtension()); Assert.AreEqual("image/png", pics[3].MimeType); Assert.AreEqual("png", pics[4].SuggestFileExtension()); Assert.AreEqual("image/png", pics[4].MimeType); Assert.AreEqual("wmf", pics[5].SuggestFileExtension()); Assert.AreEqual("image/x-wmf", pics[5].MimeType); Assert.AreEqual("jpg", pics[6].SuggestFileExtension()); Assert.AreEqual("image/jpeg", pics[6].MimeType); }
public void TestCroppedPictures() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("testCroppedPictures.doc"); List <Picture> pics = doc.GetPicturesTable().GetAllPictures(); Assert.IsNotNull(pics); Assert.AreEqual(2, pics.Count); Picture pic1 = pics[0]; Assert.AreEqual(27, pic1.AspectRatioX); Assert.AreEqual(270, pic1.HorizontalScalingFactor); Assert.AreEqual(27, pic1.AspectRatioY); Assert.AreEqual(271, pic1.VerticalScalingFactor); Assert.AreEqual(12000, pic1.DxaGoal); // 21.17 cm / 2.54 cm/inch * 72dpi * 20 = 12000 Assert.AreEqual(9000, pic1.DyaGoal); // 15.88 cm / 2.54 cm/inch * 72dpi * 20 = 9000 Assert.AreEqual(0, pic1.DxaCropLeft); Assert.AreEqual(0, pic1.DxaCropRight); Assert.AreEqual(0, pic1.DyaCropTop); Assert.AreEqual(0, pic1.DyaCropBottom); Picture pic2 = pics[1]; Assert.AreEqual(76, pic2.AspectRatioX); Assert.AreEqual(764, pic2.HorizontalScalingFactor); Assert.AreEqual(68, pic2.AspectRatioY); Assert.AreEqual(685, pic2.VerticalScalingFactor); Assert.AreEqual(12000, pic2.DxaGoal); // 21.17 cm / 2.54 cm/inch * 72dpi * 20 = 12000 Assert.AreEqual(9000, pic2.DyaGoal); // 15.88 cm / 2.54 cm/inch * 72dpi * 20 = 9000 Assert.AreEqual(0, pic2.DxaCropLeft); // TODO YK: The Picture is cropped but HWPF reads the crop parameters all zeros Assert.AreEqual(0, pic2.DxaCropRight); Assert.AreEqual(0, pic2.DyaCropTop); Assert.AreEqual(0, pic2.DyaCropBottom); }
public void TestEmfComplexImage() { // Commenting out this Test case temporarily. The file emf_2003_image does not contain any // pictures. Instead it has an office drawing object. Need to rewrite this Test after // revisiting the implementation of office drawing objects. HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("Bug41898.doc"); List <Picture> pics = doc.GetPicturesTable().GetAllPictures(); Assert.IsNotNull(pics); Assert.AreEqual(1, pics.Count); Picture pic = pics[0]; Assert.IsNotNull(pic.SuggestFileExtension()); Assert.IsNotNull(pic.SuggestFullFileName()); // This one's tricky // TODO: Fix once we've sorted bug #41898 Assert.IsNotNull(pic.GetContent()); Assert.IsNotNull(pic.GetRawContent()); // These are probably some sort of offSet, need to figure them out Assert.AreEqual(4, pic.Size); Assert.AreEqual((uint)0x80000000, LittleEndian.GetUInt(pic.GetContent())); Assert.AreEqual((uint)0x80000000, LittleEndian.GetUInt(pic.GetRawContent())); }
public void TestPicturesInHeader() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("header_image.doc"); List <Picture> pics = doc.GetPicturesTable().GetAllPictures(); Assert.AreEqual(2, pics.Count); }
public void TestPicturesWithTable() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("Bug44603.doc"); List <Picture> pics = doc.GetPicturesTable().GetAllPictures(); Assert.AreEqual(2, pics.Count); }
public void TestImageCount() { HWPFDocument docA = HWPFTestDataSamples.OpenSampleFile(docAFile); HWPFDocument docB = HWPFTestDataSamples.OpenSampleFile(docBFile); Assert.IsNotNull(docA.GetPicturesTable()); Assert.IsNotNull(docB.GetPicturesTable()); PicturesTable picA = docA.GetPicturesTable(); PicturesTable picB = docB.GetPicturesTable(); List <Picture> picturesA = picA.GetAllPictures(); List <Picture> picturesB = picB.GetAllPictures(); Assert.AreEqual(7, picturesA.Count); Assert.AreEqual(2, picturesB.Count); }
/** * Pending the missing files being uploaded to * bug #44937 */ //[TestMethod] public void BROKENtestEscherDrawing() { HWPFDocument docD = HWPFTestDataSamples.OpenSampleFile(docDFile); List <Picture> allPictures = docD.GetPicturesTable().GetAllPictures(); Assert.AreEqual(1, allPictures.Count); Picture pic = allPictures[0]; Assert.IsNotNull(pic); byte[] picD = ReadFile(imgDFile); Assert.AreEqual(picD.Length, pic.GetContent().Length); assertBytesSame(picD, pic.GetContent()); }
public void TestEquation() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("equation.doc"); PicturesTable pictures = doc.GetPicturesTable(); List <Picture> allPictures = pictures.GetAllPictures(); Assert.AreEqual(1, allPictures.Count); Picture picture = allPictures[0]; Assert.IsNotNull(picture); Assert.AreEqual(PictureType.EMF, picture.SuggestPictureType()); Assert.AreEqual(PictureType.EMF.Extension, picture.SuggestFileExtension()); Assert.AreEqual(PictureType.EMF.Mime, picture.MimeType); Assert.AreEqual("0.emf", picture.SuggestFullFileName()); }
public void TestCompressedImageData() { HWPFDocument docC = HWPFTestDataSamples.OpenSampleFile(docCFile); PicturesTable picC = docC.GetPicturesTable(); List <Picture> picturesC = picC.GetAllPictures(); Assert.AreEqual(1, picturesC.Count); Picture pic = picturesC[0]; Assert.IsNotNull(pic); // Check the same byte[] picBytes = ReadFile(imgCFile); Assert.AreEqual(picBytes.Length, pic.GetContent().Length); assertBytesSame(picBytes, pic.GetContent()); }
public void TestTwoImages() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("two_images.doc"); List <Picture> pics = doc.GetPicturesTable().GetAllPictures(); Assert.IsNotNull(pics); Assert.AreEqual(pics.Count, 2); for (int i = 0; i < pics.Count; i++) { Picture pic = (Picture)pics[i]; Assert.IsNotNull(pic.SuggestFileExtension()); Assert.IsNotNull(pic.SuggestFullFileName()); } Picture picA = pics[0]; Picture picB = pics[1]; Assert.AreEqual("jpg", picA.SuggestFileExtension()); Assert.AreEqual("jpg", picA.SuggestFileExtension()); }
public void TestFloatingPictures() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("FloatingPictures.doc"); PicturesTable pictures = doc.GetPicturesTable(); // There are 19 images in the picture, but some are // duplicate floating ones Assert.AreEqual(17, pictures.GetAllPictures().Count); int plain8s = 0; int escher8s = 0; int image1s = 0; Range r = doc.GetRange(); for (int np = 0; np < r.NumParagraphs; np++) { Paragraph p = r.GetParagraph(np); for (int nc = 0; nc < p.NumCharacterRuns; nc++) { CharacterRun cr = p.GetCharacterRun(nc); if (pictures.HasPicture(cr)) { image1s++; } else if (pictures.HasEscherPicture(cr)) { escher8s++; } else if (cr.Text.StartsWith("\u0008")) { plain8s++; } } } // Total is 20, as the 4 escher 8s all reference // the same regular image Assert.AreEqual(16, image1s); Assert.AreEqual(4, escher8s); Assert.AreEqual(0, plain8s); }
public void TestEmfImage() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("vector_image.doc"); List <Picture> pics = doc.GetPicturesTable().GetAllPictures(); Assert.IsNotNull(pics); Assert.AreEqual(1, pics.Count); Picture pic = pics[0]; Assert.IsNotNull(pic.SuggestFileExtension()); Assert.IsNotNull(pic.SuggestFullFileName()); Assert.IsTrue(pic.Size > 128); // Check right contents byte[] emf = POIDataSamples.GetDocumentInstance().ReadFile("vector_image.emf"); byte[] pemf = pic.GetContent(); Assert.AreEqual(emf.Length, pemf.Length); for (int i = 0; i < emf.Length; i++) { Assert.AreEqual(emf[i], pemf[i]); } }
public void TestImageData() { HWPFDocument docB = HWPFTestDataSamples.OpenSampleFile(docBFile); PicturesTable picB = docB.GetPicturesTable(); List <Picture> picturesB = picB.GetAllPictures(); Assert.AreEqual(2, picturesB.Count); Picture pic1 = picturesB[0]; Picture pic2 = picturesB[1]; Assert.IsNotNull(pic1); Assert.IsNotNull(pic2); // Check the same byte[] pic1B = ReadFile(imgAFile); byte[] pic2B = ReadFile(imgBFile); Assert.AreEqual(pic1B.Length, pic1.GetContent().Length); Assert.AreEqual(pic2B.Length, pic2.GetContent().Length); assertBytesSame(pic1B, pic1.GetContent()); assertBytesSame(pic2B, pic2.GetContent()); }
protected bool ProcessCharacters(HWPFDocumentCore wordDocument, int currentTableLevel, Range range, XmlElement block) { if (range == null) { return(false); } bool haveAnyText = false; /* * In text there can be fields, bookmarks, may be other structures (code * below allows extension). Those structures can overlaps, so either we * should process char-by-char (slow) or find a correct way to * reconstruct the structure of range -- sergey */ IList <Structure> structures = new List <Structure>(); if (wordDocument is HWPFDocument) { HWPFDocument doc = (HWPFDocument)wordDocument; Dictionary <int, List <Bookmark> > rangeBookmarks = doc.GetBookmarks() .GetBookmarksStartedBetween(range.StartOffset, range.EndOffset); if (rangeBookmarks != null) { foreach (KeyValuePair <int, List <Bookmark> > kv in rangeBookmarks) { List <Bookmark> lists = kv.Value; foreach (Bookmark bookmark in lists) { if (!bookmarkStack.Contains(bookmark)) { AddToStructures(structures, new Structure(bookmark)); } } } } // TODO: dead fields? for (int c = 0; c < range.NumCharacterRuns; c++) { CharacterRun characterRun = range.GetCharacterRun(c); if (characterRun == null) { throw new NullReferenceException(); } Field aliveField = ((HWPFDocument)wordDocument).GetFields() .GetFieldByStartOffset(FieldsDocumentPart.MAIN, characterRun.StartOffset); if (aliveField != null) { AddToStructures(structures, new Structure(aliveField)); } } } //structures = new ArrayList<Structure>( structures ); //Collections.sort( structures ); SortedList <Structure, Structure> sl = new SortedList <Structure, Structure>(); foreach (Structure s in structures) { sl.Add(s, s); } structures.Clear(); ((List <Structure>)structures).AddRange(sl.Values); int previous = range.StartOffset; foreach (Structure structure in structures) { if (structure.Start != previous) { Range subrange = new Range(previous, structure.Start, range); //{ // public String toString() // { // return "BetweenStructuresSubrange " + super.ToString(); // } //}; ProcessCharacters(wordDocument, currentTableLevel, subrange, block); } if (structure.StructureObject is Bookmark) { // other bookmarks with same boundaries IList <Bookmark> bookmarks = new List <Bookmark>(); IEnumerator <List <Bookmark> > iterator = ((HWPFDocument)wordDocument).GetBookmarks().GetBookmarksStartedBetween(structure.Start, structure.Start + 1).Values.GetEnumerator(); iterator.MoveNext(); foreach (Bookmark bookmark in iterator.Current) { if (bookmark.Start == structure.Start && bookmark.End == structure.End) { bookmarks.Add(bookmark); } } bookmarkStack.AddRange(bookmarks); try { int end = Math.Min(range.EndOffset, structure.End); Range subrange = new Range(structure.Start, end, range); /*{ * public String toString() * { * return "BookmarksSubrange " + super.ToString(); * } * };*/ ProcessBookmarks(wordDocument, block, subrange, currentTableLevel, bookmarks); } finally { bookmarkStack.RemoveAll((e) => { return(bookmarks.Contains(e)); }); } } else if (structure.StructureObject is Field) { Field field = (Field)structure.StructureObject; ProcessField((HWPFDocument)wordDocument, range, currentTableLevel, field, block); } else { throw new NotSupportedException("NYI: " + structure.StructureObject.GetType().ToString()); } previous = Math.Min(range.EndOffset, structure.End); } if (previous != range.StartOffset) { if (previous > range.EndOffset) { logger.Log(POILogger.WARN, "Latest structure in ", range, " ended at #" + previous, " after range boundaries [", range.StartOffset + "; " + range.EndOffset, ")"); return(true); } if (previous < range.EndOffset) { Range subrange = new Range(previous, range.EndOffset, range); /*{ * @Override * public String toString() * { * return "AfterStructureSubrange " + super.ToString(); * } * };*/ ProcessCharacters(wordDocument, currentTableLevel, subrange, block); } return(true); } for (int c = 0; c < range.NumCharacterRuns; c++) { CharacterRun characterRun = range.GetCharacterRun(c); if (characterRun == null) { throw new NullReferenceException(); } if (wordDocument is HWPFDocument && ((HWPFDocument)wordDocument).GetPicturesTable().HasPicture(characterRun)) { HWPFDocument newFormat = (HWPFDocument)wordDocument; Picture picture = newFormat.GetPicturesTable().ExtractPicture(characterRun, true); ProcessImage(block, characterRun.Text[0] == 0x01, picture); continue; } string text = characterRun.Text; byte[] textByte = System.Text.Encoding.GetEncoding("iso-8859-1").GetBytes(text); //if ( text.getBytes().length == 0 ) if (textByte.Length == 0) { continue; } if (characterRun.IsSpecialCharacter()) { if (text[0] == SPECCHAR_AUTONUMBERED_FOOTNOTE_REFERENCE && (wordDocument is HWPFDocument)) { HWPFDocument doc = (HWPFDocument)wordDocument; ProcessNoteAnchor(doc, characterRun, block); continue; } if (text[0] == SPECCHAR_DRAWN_OBJECT && (wordDocument is HWPFDocument)) { HWPFDocument doc = (HWPFDocument)wordDocument; ProcessDrawnObject(doc, characterRun, block); continue; } if (characterRun.IsOle2() && (wordDocument is HWPFDocument)) { HWPFDocument doc = (HWPFDocument)wordDocument; ProcessOle2(doc, characterRun, block); continue; } } if (textByte[0] == FIELD_BEGIN_MARK) //if ( text.getBytes()[0] == FIELD_BEGIN_MARK ) { if (wordDocument is HWPFDocument) { Field aliveField = ((HWPFDocument)wordDocument).GetFields().GetFieldByStartOffset( FieldsDocumentPart.MAIN, characterRun.StartOffset); if (aliveField != null) { ProcessField(((HWPFDocument)wordDocument), range, currentTableLevel, aliveField, block); int continueAfter = aliveField.GetFieldEndOffset(); while (c < range.NumCharacterRuns && range.GetCharacterRun(c).EndOffset <= continueAfter) { c++; } if (c < range.NumCharacterRuns) { c--; } continue; } } int skipTo = TryDeadField(wordDocument, range, currentTableLevel, c, block); if (skipTo != c) { c = skipTo; continue; } continue; } if (textByte[0] == FIELD_SEPARATOR_MARK) { // shall not appear without FIELD_BEGIN_MARK continue; } if (textByte[0] == FIELD_END_MARK) { // shall not appear without FIELD_BEGIN_MARK continue; } if (characterRun.IsSpecialCharacter() || characterRun.IsObj() || characterRun.IsOle2()) { continue; } if (text.EndsWith("\r") || (text[text.Length - 1] == BEL_MARK && currentTableLevel != int.MinValue)) { text = text.Substring(0, text.Length - 1); } { // line breaks StringBuilder stringBuilder = new StringBuilder(); foreach (char charChar in text.ToCharArray()) { if (charChar == 11) { if (stringBuilder.Length > 0) { OutputCharacters(block, characterRun, stringBuilder.ToString()); stringBuilder.Length = 0; } ProcessLineBreak(block, characterRun); } else if (charChar == 30) { // Non-breaking hyphens are stored as ASCII 30 stringBuilder.Append(UNICODECHAR_NONBREAKING_HYPHEN); } else if (charChar == 31) { // Non-required hyphens to zero-width space stringBuilder.Append(UNICODECHAR_ZERO_WIDTH_SPACE); } else if (charChar >= 0x20 || charChar == 0x09 || charChar == 0x0A || charChar == 0x0D) { stringBuilder.Append(charChar); } } if (stringBuilder.Length > 0) { OutputCharacters(block, characterRun, stringBuilder.ToString()); stringBuilder.Length = 0; } } haveAnyText |= text.Trim().Length != 0; } return(haveAnyText); }
public void TestEmbededDocumentIcon() { // This file has two embeded excel files, an embeded powerpoint // file and an embeded word file, in that order HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("word_with_embeded.doc"); // Check we don't break loading the pictures doc.GetPicturesTable().GetAllPictures(); PicturesTable pictureTable = doc.GetPicturesTable(); // Check the Text, and its embeded images Paragraph p; Range r = doc.GetRange(); Assert.AreEqual(1, r.NumSections); Assert.AreEqual(5, r.NumParagraphs); p = r.GetParagraph(0); Assert.AreEqual(2, p.NumCharacterRuns); Assert.AreEqual("I have lots of embedded files in me\r", p.Text); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(0))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(1))); p = r.GetParagraph(1); Assert.AreEqual(5, p.NumCharacterRuns); Assert.AreEqual("\u0013 EMBED Excel.Sheet.8 \u0014\u0001\u0015\r", p.Text); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(0))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(1))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(2))); Assert.AreEqual(true, pictureTable.HasPicture(p.GetCharacterRun(3))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(4))); p = r.GetParagraph(2); Assert.AreEqual(6, p.NumCharacterRuns); Assert.AreEqual("\u0013 EMBED Excel.Sheet.8 \u0014\u0001\u0015\r", p.Text); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(0))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(1))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(2))); Assert.AreEqual(true, pictureTable.HasPicture(p.GetCharacterRun(3))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(4))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(5))); p = r.GetParagraph(3); Assert.AreEqual(6, p.NumCharacterRuns); Assert.AreEqual("\u0013 EMBED PowerPoint.Show.8 \u0014\u0001\u0015\r", p.Text); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(0))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(1))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(2))); Assert.AreEqual(true, pictureTable.HasPicture(p.GetCharacterRun(3))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(4))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(5))); p = r.GetParagraph(4); Assert.AreEqual(6, p.NumCharacterRuns); Assert.AreEqual("\u0013 EMBED Word.Document.8 \\s \u0014\u0001\u0015\r", p.Text); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(0))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(1))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(2))); Assert.AreEqual(true, pictureTable.HasPicture(p.GetCharacterRun(3))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(4))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(5))); // Look at the pictures table List <Picture> pictures = pictureTable.GetAllPictures(); Assert.AreEqual(4, pictures.Count); Picture picture = pictures[0]; Assert.AreEqual("", picture.SuggestFileExtension()); Assert.AreEqual("0", picture.SuggestFullFileName()); Assert.AreEqual("image/unknown", picture.MimeType); picture = pictures[1]; Assert.AreEqual("", picture.SuggestFileExtension()); Assert.AreEqual("469", picture.SuggestFullFileName()); Assert.AreEqual("image/unknown", picture.MimeType); picture = pictures[2]; Assert.AreEqual("", picture.SuggestFileExtension()); Assert.AreEqual("8c7", picture.SuggestFullFileName()); Assert.AreEqual("image/unknown", picture.MimeType); picture = pictures[3]; Assert.AreEqual("", picture.SuggestFileExtension()); Assert.AreEqual("10a8", picture.SuggestFullFileName()); Assert.AreEqual("image/unknown", picture.MimeType); }
public void TestFastSaved3() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("ob_is.doc"); doc.GetPicturesTable().GetAllPictures(); // just check that we do not throw Exception }