Beispiel #1
0
        public static String GetText(HWPFDocumentCore wordDocument)
        {
            WordToTextConverter wordToTextConverter = new WordToTextConverter(new XmlDocument());

            wordToTextConverter.ProcessDocument(wordDocument);
            return(wordToTextConverter.GetText());
        }
Beispiel #2
0
        protected override bool ProcessOle2(HWPFDocument wordDocument, XmlElement block, Entry entry)
        {
            if (!(entry is DirectoryNode))
            {
                return(false);
            }
            DirectoryNode directoryNode = (DirectoryNode)entry;

            /*
             * even if there is no ExtractorFactory in classpath, still support
             * included Word's objects
             */

            //TODO: Not completed
            if (directoryNode.HasEntry("WordDocument"))
            {
                String text = WordToTextConverter.GetText((DirectoryNode)entry);
                block.AppendChild(textDocumentFacade
                                  .CreateText(UNICODECHAR_ZERO_WIDTH_SPACE + text
                                              + UNICODECHAR_ZERO_WIDTH_SPACE));
                return(true);
            }

            Object extractor;

            /*try
             * {
             *  Class<?> cls = Class
             *          .ForName( "org.apache.poi.extractor.ExtractorFactory" );
             *  Method createExtractor = cls.GetMethod( "createExtractor",
             *          DirectoryNode.class );
             *  extractor = createExtractor.Invoke( null, directoryNode );
             * }
             * catch ( Error exc )
             * {
             *  // no extractor in classpath
             *  logger.Log( POILogger.WARN, "There is an OLE object entry '",
             *          entry.GetName(),
             *          "', but there is no text extractor for this object type ",
             *          "or text extractor factory is not available: ", "" + exc );
             *  return false;
             * }
             *
             * try
             * {
             *  Method getText = extractor.GetClass().GetMethod( "getText" );
             *  String text = (String) getText.Invoke( extractor );
             *
             *  block.AppendChild( textDocumentFacade
             *          .CreateText( UNICODECHAR_ZERO_WIDTH_SPACE + text
             + UNICODECHAR_ZERO_WIDTH_SPACE ) );
             +  return true;
             + }
             + catch ( Exception exc )
             + {
             +  logger.Log( POILogger.ERROR,
             +          "Unable to extract text from OLE entry '", entry.GetName(),
             +          "': ", exc, exc );
             +  return false;
             + }
             * */
            return(false);
        }
 public static String GetText(HWPFDocumentCore wordDocument)
 {
     WordToTextConverter wordToTextConverter = new WordToTextConverter(new XmlDocument());
     wordToTextConverter.ProcessDocument(wordDocument);
     return wordToTextConverter.GetText();
 }