public void TestBookmarks() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("pageref.doc"); Bookmarks bookmarks = doc.GetBookmarks(); Assert.AreEqual(1, bookmarks.Count); Bookmark bookmark = bookmarks.GetBookmark(0); Assert.AreEqual("userref", bookmark.Name); Assert.AreEqual(27, bookmark.Start); Assert.AreEqual(38, bookmark.End); }
protected bool ProcessCharacters(HWPFDocumentCore wordDocument, int currentTableLevel, Range range, XmlElement block) { if (range == null) { return(false); } bool haveAnyText = false; /* * In text there can be fields, bookmarks, may be other structures (code * below allows extension). Those structures can overlaps, so either we * should process char-by-char (slow) or find a correct way to * reconstruct the structure of range -- sergey */ IList <Structure> structures = new List <Structure>(); if (wordDocument is HWPFDocument) { HWPFDocument doc = (HWPFDocument)wordDocument; Dictionary <int, List <Bookmark> > rangeBookmarks = doc.GetBookmarks() .GetBookmarksStartedBetween(range.StartOffset, range.EndOffset); if (rangeBookmarks != null) { foreach (KeyValuePair <int, List <Bookmark> > kv in rangeBookmarks) { List <Bookmark> lists = kv.Value; foreach (Bookmark bookmark in lists) { if (!bookmarkStack.Contains(bookmark)) { AddToStructures(structures, new Structure(bookmark)); } } } } // TODO: dead fields? for (int c = 0; c < range.NumCharacterRuns; c++) { CharacterRun characterRun = range.GetCharacterRun(c); if (characterRun == null) { throw new NullReferenceException(); } Field aliveField = ((HWPFDocument)wordDocument).GetFields() .GetFieldByStartOffset(FieldsDocumentPart.MAIN, characterRun.StartOffset); if (aliveField != null) { AddToStructures(structures, new Structure(aliveField)); } } } //structures = new ArrayList<Structure>( structures ); //Collections.sort( structures ); SortedList <Structure, Structure> sl = new SortedList <Structure, Structure>(); foreach (Structure s in structures) { sl.Add(s, s); } structures.Clear(); ((List <Structure>)structures).AddRange(sl.Values); int previous = range.StartOffset; foreach (Structure structure in structures) { if (structure.Start != previous) { Range subrange = new Range(previous, structure.Start, range); //{ // public String toString() // { // return "BetweenStructuresSubrange " + super.ToString(); // } //}; ProcessCharacters(wordDocument, currentTableLevel, subrange, block); } if (structure.StructureObject is Bookmark) { // other bookmarks with same boundaries IList <Bookmark> bookmarks = new List <Bookmark>(); IEnumerator <List <Bookmark> > iterator = ((HWPFDocument)wordDocument).GetBookmarks().GetBookmarksStartedBetween(structure.Start, structure.Start + 1).Values.GetEnumerator(); iterator.MoveNext(); foreach (Bookmark bookmark in iterator.Current) { if (bookmark.Start == structure.Start && bookmark.End == structure.End) { bookmarks.Add(bookmark); } } bookmarkStack.AddRange(bookmarks); try { int end = Math.Min(range.EndOffset, structure.End); Range subrange = new Range(structure.Start, end, range); /*{ * public String toString() * { * return "BookmarksSubrange " + super.ToString(); * } * };*/ ProcessBookmarks(wordDocument, block, subrange, currentTableLevel, bookmarks); } finally { bookmarkStack.RemoveAll((e) => { return(bookmarks.Contains(e)); }); } } else if (structure.StructureObject is Field) { Field field = (Field)structure.StructureObject; ProcessField((HWPFDocument)wordDocument, range, currentTableLevel, field, block); } else { throw new NotSupportedException("NYI: " + structure.StructureObject.GetType().ToString()); } previous = Math.Min(range.EndOffset, structure.End); } if (previous != range.StartOffset) { if (previous > range.EndOffset) { logger.Log(POILogger.WARN, "Latest structure in ", range, " ended at #" + previous, " after range boundaries [", range.StartOffset + "; " + range.EndOffset, ")"); return(true); } if (previous < range.EndOffset) { Range subrange = new Range(previous, range.EndOffset, range); /*{ * @Override * public String toString() * { * return "AfterStructureSubrange " + super.ToString(); * } * };*/ ProcessCharacters(wordDocument, currentTableLevel, subrange, block); } return(true); } for (int c = 0; c < range.NumCharacterRuns; c++) { CharacterRun characterRun = range.GetCharacterRun(c); if (characterRun == null) { throw new NullReferenceException(); } if (wordDocument is HWPFDocument && ((HWPFDocument)wordDocument).GetPicturesTable().HasPicture(characterRun)) { HWPFDocument newFormat = (HWPFDocument)wordDocument; Picture picture = newFormat.GetPicturesTable().ExtractPicture(characterRun, true); ProcessImage(block, characterRun.Text[0] == 0x01, picture); continue; } string text = characterRun.Text; byte[] textByte = System.Text.Encoding.GetEncoding("iso-8859-1").GetBytes(text); //if ( text.getBytes().length == 0 ) if (textByte.Length == 0) { continue; } if (characterRun.IsSpecialCharacter()) { if (text[0] == SPECCHAR_AUTONUMBERED_FOOTNOTE_REFERENCE && (wordDocument is HWPFDocument)) { HWPFDocument doc = (HWPFDocument)wordDocument; ProcessNoteAnchor(doc, characterRun, block); continue; } if (text[0] == SPECCHAR_DRAWN_OBJECT && (wordDocument is HWPFDocument)) { HWPFDocument doc = (HWPFDocument)wordDocument; ProcessDrawnObject(doc, characterRun, block); continue; } if (characterRun.IsOle2() && (wordDocument is HWPFDocument)) { HWPFDocument doc = (HWPFDocument)wordDocument; ProcessOle2(doc, characterRun, block); continue; } } if (textByte[0] == FIELD_BEGIN_MARK) //if ( text.getBytes()[0] == FIELD_BEGIN_MARK ) { if (wordDocument is HWPFDocument) { Field aliveField = ((HWPFDocument)wordDocument).GetFields().GetFieldByStartOffset( FieldsDocumentPart.MAIN, characterRun.StartOffset); if (aliveField != null) { ProcessField(((HWPFDocument)wordDocument), range, currentTableLevel, aliveField, block); int continueAfter = aliveField.GetFieldEndOffset(); while (c < range.NumCharacterRuns && range.GetCharacterRun(c).EndOffset <= continueAfter) { c++; } if (c < range.NumCharacterRuns) { c--; } continue; } } int skipTo = TryDeadField(wordDocument, range, currentTableLevel, c, block); if (skipTo != c) { c = skipTo; continue; } continue; } if (textByte[0] == FIELD_SEPARATOR_MARK) { // shall not appear without FIELD_BEGIN_MARK continue; } if (textByte[0] == FIELD_END_MARK) { // shall not appear without FIELD_BEGIN_MARK continue; } if (characterRun.IsSpecialCharacter() || characterRun.IsObj() || characterRun.IsOle2()) { continue; } if (text.EndsWith("\r") || (text[text.Length - 1] == BEL_MARK && currentTableLevel != int.MinValue)) { text = text.Substring(0, text.Length - 1); } { // line breaks StringBuilder stringBuilder = new StringBuilder(); foreach (char charChar in text.ToCharArray()) { if (charChar == 11) { if (stringBuilder.Length > 0) { OutputCharacters(block, characterRun, stringBuilder.ToString()); stringBuilder.Length = 0; } ProcessLineBreak(block, characterRun); } else if (charChar == 30) { // Non-breaking hyphens are stored as ASCII 30 stringBuilder.Append(UNICODECHAR_NONBREAKING_HYPHEN); } else if (charChar == 31) { // Non-required hyphens to zero-width space stringBuilder.Append(UNICODECHAR_ZERO_WIDTH_SPACE); } else if (charChar >= 0x20 || charChar == 0x09 || charChar == 0x0A || charChar == 0x0D) { stringBuilder.Append(charChar); } } if (stringBuilder.Length > 0) { OutputCharacters(block, characterRun, stringBuilder.ToString()); stringBuilder.Length = 0; } } haveAnyText |= text.Trim().Length != 0; } return(haveAnyText); }