Exemple #1
0
 public bool HasEscherPicture(CharacterRun run)
 {
     if (run.IsSpecialCharacter() && !run.IsObj() && !run.IsOle2() && !run.IsData() && run.Text.StartsWith("\u0008"))
     {
         return(true);
     }
     return(false);
 }
Exemple #2
0
 /**
  * determines whether specified CharacterRun Contains reference to a picture
  * @param run
  */
 public bool HasPicture(CharacterRun run)
 {
     if (run.IsSpecialCharacter() && !run.IsObj() && !run.IsOle2() && !run.IsData())
     {
         // Image should be in it's own run, or in a run with the end-of-special marker
         if ("\u0001".Equals(run.Text) || "\u0001\u0015".Equals(run.Text))
         {
             return(IsBlockContainsImage(run.GetPicOffset()));
         }
     }
     return(false);
 }
Exemple #3
0
        protected void ProcessField(HWPFDocument wordDocument, Range parentRange,
                                    int currentTableLevel, Field field, XmlElement currentBlock)
        {
            switch (field.Type)
            {
            case 37:     // page reference
            {
                Range firstSubrange = field.FirstSubrange(parentRange);
                if (firstSubrange != null)
                {
                    String formula        = firstSubrange.Text;
                    Regex  pagerefPattern = new Regex("[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*");
                    Match  match          = pagerefPattern.Match(formula);
                    if (match.Success)
                    {
                        String pageref = match.Groups[1].Value;
                        ProcessPageref(wordDocument, currentBlock,
                                       field.SecondSubrange(parentRange),
                                       currentTableLevel, pageref);
                        return;
                    }
                    //Pattern pagerefPattern = Pattern
                    //        .compile("[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*");
                    //Matcher matcher = pagerefPattern.matcher(formula);
                    //if (matcher.find())
                    //{
                    //    String pageref = matcher.group(1);
                    //    processPageref(wordDocument, currentBlock,
                    //            field.secondSubrange(parentRange),
                    //            currentTableLevel, pageref);
                    //    return;
                    //}
                }
                break;
            }

            case 58:     // Embedded Object
            {
                if (!field.HasSeparator())
                {
                    logger.Log(POILogger.WARN, parentRange + " contains " + field
                               + " with 'Embedded Object' but without separator mark");
                    return;
                }

                CharacterRun separator = field.GetMarkSeparatorCharacterRun(parentRange);

                if (separator.IsOle2())
                {
                    // the only supported so far
                    bool processed = ProcessOle2(wordDocument, separator,
                                                 currentBlock);

                    // if we didn't output OLE - output field value
                    if (!processed)
                    {
                        ProcessCharacters(wordDocument, currentTableLevel,
                                          field.SecondSubrange(parentRange), currentBlock);
                    }

                    return;
                }

                break;
            }

            case 88:     // hyperlink
            {
                Range firstSubrange = field.FirstSubrange(parentRange);
                if (firstSubrange != null)
                {
                    String formula          = firstSubrange.Text;
                    Regex  hyperlinkPattern = new Regex("[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*");
                    Match  match            = hyperlinkPattern.Match(formula);
                    if (match.Success)
                    {
                        String hyperlink = match.Groups[1].Value;
                        ProcessHyperlink(wordDocument, currentBlock,
                                         field.SecondSubrange(parentRange),
                                         currentTableLevel, hyperlink);
                        return;
                    }
                    //Pattern hyperlinkPattern = Pattern
                    //        .compile("[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*");
                    //Matcher matcher = hyperlinkPattern.matcher(formula);
                    //if (matcher.find())
                    //{
                    //    String hyperlink = matcher.group(1);
                    //    processHyperlink(wordDocument, currentBlock,
                    //            field.secondSubrange(parentRange),
                    //            currentTableLevel, hyperlink);
                    //    return;
                    //}
                }
                break;
            }
            }

            logger.Log(POILogger.WARN, parentRange + " contains " + field
                       + " with unsupported type or format");
            ProcessCharacters(wordDocument, currentTableLevel,
                              field.SecondSubrange(parentRange), currentBlock);
        }
Exemple #4
0
        protected bool ProcessCharacters(HWPFDocumentCore wordDocument, int currentTableLevel, Range range, XmlElement block)
        {
            if (range == null)
            {
                return(false);
            }

            bool haveAnyText = false;

            /*
             * In text there can be fields, bookmarks, may be other structures (code
             * below allows extension). Those structures can overlaps, so either we
             * should process char-by-char (slow) or find a correct way to
             * reconstruct the structure of range -- sergey
             */
            IList <Structure> structures = new List <Structure>();

            if (wordDocument is HWPFDocument)
            {
                HWPFDocument doc = (HWPFDocument)wordDocument;

                Dictionary <int, List <Bookmark> > rangeBookmarks = doc.GetBookmarks()
                                                                    .GetBookmarksStartedBetween(range.StartOffset, range.EndOffset);

                if (rangeBookmarks != null)
                {
                    foreach (KeyValuePair <int, List <Bookmark> > kv in rangeBookmarks)
                    {
                        List <Bookmark> lists = kv.Value;
                        foreach (Bookmark bookmark in lists)
                        {
                            if (!bookmarkStack.Contains(bookmark))
                            {
                                AddToStructures(structures, new Structure(bookmark));
                            }
                        }
                    }
                }

                // TODO: dead fields?
                for (int c = 0; c < range.NumCharacterRuns; c++)
                {
                    CharacterRun characterRun = range.GetCharacterRun(c);
                    if (characterRun == null)
                    {
                        throw new NullReferenceException();
                    }
                    Field aliveField = ((HWPFDocument)wordDocument).GetFields()
                                       .GetFieldByStartOffset(FieldsDocumentPart.MAIN,
                                                              characterRun.StartOffset);
                    if (aliveField != null)
                    {
                        AddToStructures(structures, new Structure(aliveField));
                    }
                }
            }

            //structures = new ArrayList<Structure>( structures );
            //Collections.sort( structures );
            SortedList <Structure, Structure> sl = new SortedList <Structure, Structure>();

            foreach (Structure s in structures)
            {
                sl.Add(s, s);
            }
            structures.Clear();
            ((List <Structure>)structures).AddRange(sl.Values);

            int previous = range.StartOffset;

            foreach (Structure structure in structures)
            {
                if (structure.Start != previous)
                {
                    Range subrange = new Range(previous, structure.Start, range);
                    //{
                    //    public String toString()
                    //    {
                    //        return "BetweenStructuresSubrange " + super.ToString();
                    //    }
                    //};
                    ProcessCharacters(wordDocument, currentTableLevel, subrange, block);
                }

                if (structure.StructureObject is Bookmark)
                {
                    // other bookmarks with same boundaries
                    IList <Bookmark> bookmarks = new List <Bookmark>();
                    IEnumerator <List <Bookmark> > iterator = ((HWPFDocument)wordDocument).GetBookmarks().GetBookmarksStartedBetween(structure.Start, structure.Start + 1).Values.GetEnumerator();
                    iterator.MoveNext();
                    foreach (Bookmark bookmark in iterator.Current)
                    {
                        if (bookmark.Start == structure.Start &&
                            bookmark.End == structure.End)
                        {
                            bookmarks.Add(bookmark);
                        }
                    }

                    bookmarkStack.AddRange(bookmarks);
                    try
                    {
                        int   end      = Math.Min(range.EndOffset, structure.End);
                        Range subrange = new Range(structure.Start, end, range);

                        /*{
                         *  public String toString()
                         *  {
                         *      return "BookmarksSubrange " + super.ToString();
                         *  }
                         * };*/

                        ProcessBookmarks(wordDocument, block, subrange,
                                         currentTableLevel, bookmarks);
                    }
                    finally
                    {
                        bookmarkStack.RemoveAll((e) => { return(bookmarks.Contains(e)); });
                    }
                }
                else if (structure.StructureObject is Field)
                {
                    Field field = (Field)structure.StructureObject;
                    ProcessField((HWPFDocument)wordDocument, range, currentTableLevel, field, block);
                }
                else
                {
                    throw new NotSupportedException("NYI: " + structure.StructureObject.GetType().ToString());
                }

                previous = Math.Min(range.EndOffset, structure.End);
            }

            if (previous != range.StartOffset)
            {
                if (previous > range.EndOffset)
                {
                    logger.Log(POILogger.WARN, "Latest structure in ", range,
                               " ended at #" + previous, " after range boundaries [",
                               range.StartOffset + "; " + range.EndOffset,
                               ")");
                    return(true);
                }

                if (previous < range.EndOffset)
                {
                    Range subrange = new Range(previous, range.EndOffset, range);

                    /*{
                     *  @Override
                     *  public String toString()
                     *  {
                     *      return "AfterStructureSubrange " + super.ToString();
                     *  }
                     * };*/
                    ProcessCharacters(wordDocument, currentTableLevel, subrange,
                                      block);
                }
                return(true);
            }

            for (int c = 0; c < range.NumCharacterRuns; c++)
            {
                CharacterRun characterRun = range.GetCharacterRun(c);

                if (characterRun == null)
                {
                    throw new NullReferenceException();
                }

                if (wordDocument is HWPFDocument && ((HWPFDocument)wordDocument).GetPicturesTable().HasPicture(characterRun))
                {
                    HWPFDocument newFormat = (HWPFDocument)wordDocument;
                    Picture      picture   = newFormat.GetPicturesTable().ExtractPicture(characterRun, true);

                    ProcessImage(block, characterRun.Text[0] == 0x01, picture);
                    continue;
                }

                string text     = characterRun.Text;
                byte[] textByte = System.Text.Encoding.GetEncoding("iso-8859-1").GetBytes(text);
                //if ( text.getBytes().length == 0 )
                if (textByte.Length == 0)
                {
                    continue;
                }

                if (characterRun.IsSpecialCharacter())
                {
                    if (text[0] == SPECCHAR_AUTONUMBERED_FOOTNOTE_REFERENCE && (wordDocument is HWPFDocument))
                    {
                        HWPFDocument doc = (HWPFDocument)wordDocument;
                        ProcessNoteAnchor(doc, characterRun, block);
                        continue;
                    }
                    if (text[0] == SPECCHAR_DRAWN_OBJECT &&
                        (wordDocument is HWPFDocument))
                    {
                        HWPFDocument doc = (HWPFDocument)wordDocument;
                        ProcessDrawnObject(doc, characterRun, block);
                        continue;
                    }
                    if (characterRun.IsOle2() && (wordDocument is HWPFDocument))
                    {
                        HWPFDocument doc = (HWPFDocument)wordDocument;
                        ProcessOle2(doc, characterRun, block);
                        continue;
                    }
                }
                if (textByte[0] == FIELD_BEGIN_MARK)
                //if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
                {
                    if (wordDocument is HWPFDocument)
                    {
                        Field aliveField = ((HWPFDocument)wordDocument).GetFields().GetFieldByStartOffset(
                            FieldsDocumentPart.MAIN, characterRun.StartOffset);
                        if (aliveField != null)
                        {
                            ProcessField(((HWPFDocument)wordDocument), range,
                                         currentTableLevel, aliveField, block);

                            int continueAfter = aliveField.GetFieldEndOffset();
                            while (c < range.NumCharacterRuns &&
                                   range.GetCharacterRun(c).EndOffset <= continueAfter)
                            {
                                c++;
                            }

                            if (c < range.NumCharacterRuns)
                            {
                                c--;
                            }

                            continue;
                        }
                    }

                    int skipTo = TryDeadField(wordDocument, range,
                                              currentTableLevel, c, block);

                    if (skipTo != c)
                    {
                        c = skipTo;
                        continue;
                    }

                    continue;
                }
                if (textByte[0] == FIELD_SEPARATOR_MARK)
                {
                    // shall not appear without FIELD_BEGIN_MARK
                    continue;
                }
                if (textByte[0] == FIELD_END_MARK)
                {
                    // shall not appear without FIELD_BEGIN_MARK
                    continue;
                }

                if (characterRun.IsSpecialCharacter() || characterRun.IsObj() ||
                    characterRun.IsOle2())
                {
                    continue;
                }

                if (text.EndsWith("\r") ||
                    (text[text.Length - 1] == BEL_MARK && currentTableLevel != int.MinValue))
                {
                    text = text.Substring(0, text.Length - 1);
                }

                {
                    // line breaks
                    StringBuilder stringBuilder = new StringBuilder();
                    foreach (char charChar in text.ToCharArray())
                    {
                        if (charChar == 11)
                        {
                            if (stringBuilder.Length > 0)
                            {
                                OutputCharacters(block, characterRun,
                                                 stringBuilder.ToString());
                                stringBuilder.Length = 0;
                            }
                            ProcessLineBreak(block, characterRun);
                        }
                        else if (charChar == 30)
                        {
                            // Non-breaking hyphens are stored as ASCII 30
                            stringBuilder.Append(UNICODECHAR_NONBREAKING_HYPHEN);
                        }
                        else if (charChar == 31)
                        {
                            // Non-required hyphens to zero-width space
                            stringBuilder.Append(UNICODECHAR_ZERO_WIDTH_SPACE);
                        }
                        else if (charChar >= 0x20 || charChar == 0x09 ||
                                 charChar == 0x0A || charChar == 0x0D)
                        {
                            stringBuilder.Append(charChar);
                        }
                    }
                    if (stringBuilder.Length > 0)
                    {
                        OutputCharacters(block, characterRun,
                                         stringBuilder.ToString());
                        stringBuilder.Length = 0;
                    }
                }

                haveAnyText |= text.Trim().Length != 0;
            }

            return(haveAnyText);
        }