Exemple #1
0
        public void TestBookmarks()
        {
            HWPFDocument doc       = HWPFTestDataSamples.OpenSampleFile("pageref.doc");
            Bookmarks    bookmarks = doc.GetBookmarks();

            Assert.AreEqual(1, bookmarks.Count);

            Bookmark bookmark = bookmarks.GetBookmark(0);

            Assert.AreEqual("userref", bookmark.Name);
            Assert.AreEqual(27, bookmark.Start);
            Assert.AreEqual(38, bookmark.End);
        }
Exemple #2
0
        protected bool ProcessCharacters(HWPFDocumentCore wordDocument, int currentTableLevel, Range range, XmlElement block)
        {
            if (range == null)
            {
                return(false);
            }

            bool haveAnyText = false;

            /*
             * In text there can be fields, bookmarks, may be other structures (code
             * below allows extension). Those structures can overlaps, so either we
             * should process char-by-char (slow) or find a correct way to
             * reconstruct the structure of range -- sergey
             */
            IList <Structure> structures = new List <Structure>();

            if (wordDocument is HWPFDocument)
            {
                HWPFDocument doc = (HWPFDocument)wordDocument;

                Dictionary <int, List <Bookmark> > rangeBookmarks = doc.GetBookmarks()
                                                                    .GetBookmarksStartedBetween(range.StartOffset, range.EndOffset);

                if (rangeBookmarks != null)
                {
                    foreach (KeyValuePair <int, List <Bookmark> > kv in rangeBookmarks)
                    {
                        List <Bookmark> lists = kv.Value;
                        foreach (Bookmark bookmark in lists)
                        {
                            if (!bookmarkStack.Contains(bookmark))
                            {
                                AddToStructures(structures, new Structure(bookmark));
                            }
                        }
                    }
                }

                // TODO: dead fields?
                for (int c = 0; c < range.NumCharacterRuns; c++)
                {
                    CharacterRun characterRun = range.GetCharacterRun(c);
                    if (characterRun == null)
                    {
                        throw new NullReferenceException();
                    }
                    Field aliveField = ((HWPFDocument)wordDocument).GetFields()
                                       .GetFieldByStartOffset(FieldsDocumentPart.MAIN,
                                                              characterRun.StartOffset);
                    if (aliveField != null)
                    {
                        AddToStructures(structures, new Structure(aliveField));
                    }
                }
            }

            //structures = new ArrayList<Structure>( structures );
            //Collections.sort( structures );
            SortedList <Structure, Structure> sl = new SortedList <Structure, Structure>();

            foreach (Structure s in structures)
            {
                sl.Add(s, s);
            }
            structures.Clear();
            ((List <Structure>)structures).AddRange(sl.Values);

            int previous = range.StartOffset;

            foreach (Structure structure in structures)
            {
                if (structure.Start != previous)
                {
                    Range subrange = new Range(previous, structure.Start, range);
                    //{
                    //    public String toString()
                    //    {
                    //        return "BetweenStructuresSubrange " + super.ToString();
                    //    }
                    //};
                    ProcessCharacters(wordDocument, currentTableLevel, subrange, block);
                }

                if (structure.StructureObject is Bookmark)
                {
                    // other bookmarks with same boundaries
                    IList <Bookmark> bookmarks = new List <Bookmark>();
                    IEnumerator <List <Bookmark> > iterator = ((HWPFDocument)wordDocument).GetBookmarks().GetBookmarksStartedBetween(structure.Start, structure.Start + 1).Values.GetEnumerator();
                    iterator.MoveNext();
                    foreach (Bookmark bookmark in iterator.Current)
                    {
                        if (bookmark.Start == structure.Start &&
                            bookmark.End == structure.End)
                        {
                            bookmarks.Add(bookmark);
                        }
                    }

                    bookmarkStack.AddRange(bookmarks);
                    try
                    {
                        int   end      = Math.Min(range.EndOffset, structure.End);
                        Range subrange = new Range(structure.Start, end, range);

                        /*{
                         *  public String toString()
                         *  {
                         *      return "BookmarksSubrange " + super.ToString();
                         *  }
                         * };*/

                        ProcessBookmarks(wordDocument, block, subrange,
                                         currentTableLevel, bookmarks);
                    }
                    finally
                    {
                        bookmarkStack.RemoveAll((e) => { return(bookmarks.Contains(e)); });
                    }
                }
                else if (structure.StructureObject is Field)
                {
                    Field field = (Field)structure.StructureObject;
                    ProcessField((HWPFDocument)wordDocument, range, currentTableLevel, field, block);
                }
                else
                {
                    throw new NotSupportedException("NYI: " + structure.StructureObject.GetType().ToString());
                }

                previous = Math.Min(range.EndOffset, structure.End);
            }

            if (previous != range.StartOffset)
            {
                if (previous > range.EndOffset)
                {
                    logger.Log(POILogger.WARN, "Latest structure in ", range,
                               " ended at #" + previous, " after range boundaries [",
                               range.StartOffset + "; " + range.EndOffset,
                               ")");
                    return(true);
                }

                if (previous < range.EndOffset)
                {
                    Range subrange = new Range(previous, range.EndOffset, range);

                    /*{
                     *  @Override
                     *  public String toString()
                     *  {
                     *      return "AfterStructureSubrange " + super.ToString();
                     *  }
                     * };*/
                    ProcessCharacters(wordDocument, currentTableLevel, subrange,
                                      block);
                }
                return(true);
            }

            for (int c = 0; c < range.NumCharacterRuns; c++)
            {
                CharacterRun characterRun = range.GetCharacterRun(c);

                if (characterRun == null)
                {
                    throw new NullReferenceException();
                }

                if (wordDocument is HWPFDocument && ((HWPFDocument)wordDocument).GetPicturesTable().HasPicture(characterRun))
                {
                    HWPFDocument newFormat = (HWPFDocument)wordDocument;
                    Picture      picture   = newFormat.GetPicturesTable().ExtractPicture(characterRun, true);

                    ProcessImage(block, characterRun.Text[0] == 0x01, picture);
                    continue;
                }

                string text     = characterRun.Text;
                byte[] textByte = System.Text.Encoding.GetEncoding("iso-8859-1").GetBytes(text);
                //if ( text.getBytes().length == 0 )
                if (textByte.Length == 0)
                {
                    continue;
                }

                if (characterRun.IsSpecialCharacter())
                {
                    if (text[0] == SPECCHAR_AUTONUMBERED_FOOTNOTE_REFERENCE && (wordDocument is HWPFDocument))
                    {
                        HWPFDocument doc = (HWPFDocument)wordDocument;
                        ProcessNoteAnchor(doc, characterRun, block);
                        continue;
                    }
                    if (text[0] == SPECCHAR_DRAWN_OBJECT &&
                        (wordDocument is HWPFDocument))
                    {
                        HWPFDocument doc = (HWPFDocument)wordDocument;
                        ProcessDrawnObject(doc, characterRun, block);
                        continue;
                    }
                    if (characterRun.IsOle2() && (wordDocument is HWPFDocument))
                    {
                        HWPFDocument doc = (HWPFDocument)wordDocument;
                        ProcessOle2(doc, characterRun, block);
                        continue;
                    }
                }
                if (textByte[0] == FIELD_BEGIN_MARK)
                //if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
                {
                    if (wordDocument is HWPFDocument)
                    {
                        Field aliveField = ((HWPFDocument)wordDocument).GetFields().GetFieldByStartOffset(
                            FieldsDocumentPart.MAIN, characterRun.StartOffset);
                        if (aliveField != null)
                        {
                            ProcessField(((HWPFDocument)wordDocument), range,
                                         currentTableLevel, aliveField, block);

                            int continueAfter = aliveField.GetFieldEndOffset();
                            while (c < range.NumCharacterRuns &&
                                   range.GetCharacterRun(c).EndOffset <= continueAfter)
                            {
                                c++;
                            }

                            if (c < range.NumCharacterRuns)
                            {
                                c--;
                            }

                            continue;
                        }
                    }

                    int skipTo = TryDeadField(wordDocument, range,
                                              currentTableLevel, c, block);

                    if (skipTo != c)
                    {
                        c = skipTo;
                        continue;
                    }

                    continue;
                }
                if (textByte[0] == FIELD_SEPARATOR_MARK)
                {
                    // shall not appear without FIELD_BEGIN_MARK
                    continue;
                }
                if (textByte[0] == FIELD_END_MARK)
                {
                    // shall not appear without FIELD_BEGIN_MARK
                    continue;
                }

                if (characterRun.IsSpecialCharacter() || characterRun.IsObj() ||
                    characterRun.IsOle2())
                {
                    continue;
                }

                if (text.EndsWith("\r") ||
                    (text[text.Length - 1] == BEL_MARK && currentTableLevel != int.MinValue))
                {
                    text = text.Substring(0, text.Length - 1);
                }

                {
                    // line breaks
                    StringBuilder stringBuilder = new StringBuilder();
                    foreach (char charChar in text.ToCharArray())
                    {
                        if (charChar == 11)
                        {
                            if (stringBuilder.Length > 0)
                            {
                                OutputCharacters(block, characterRun,
                                                 stringBuilder.ToString());
                                stringBuilder.Length = 0;
                            }
                            ProcessLineBreak(block, characterRun);
                        }
                        else if (charChar == 30)
                        {
                            // Non-breaking hyphens are stored as ASCII 30
                            stringBuilder.Append(UNICODECHAR_NONBREAKING_HYPHEN);
                        }
                        else if (charChar == 31)
                        {
                            // Non-required hyphens to zero-width space
                            stringBuilder.Append(UNICODECHAR_ZERO_WIDTH_SPACE);
                        }
                        else if (charChar >= 0x20 || charChar == 0x09 ||
                                 charChar == 0x0A || charChar == 0x0D)
                        {
                            stringBuilder.Append(charChar);
                        }
                    }
                    if (stringBuilder.Length > 0)
                    {
                        OutputCharacters(block, characterRun,
                                         stringBuilder.ToString());
                        stringBuilder.Length = 0;
                    }
                }

                haveAnyText |= text.Trim().Length != 0;
            }

            return(haveAnyText);
        }