Example #1
0
    public void Convert()
    {
        byte[]                file       = Files.toByteArray(new File(@"filename.doc"));
        AutoDetectParser      tikaParser = new AutoDetectParser();
        ByteArrayOutputStream output     = new ByteArrayOutputStream();
        SAXTransformerFactory factory    = (SAXTransformerFactory)TransformerFactory.newInstance();
        var inputStream = new ByteArrayInputStream(file);
        //           ToHTMLContentHandler handler = new ToHTMLContentHandler();
        var metaData = new Metadata();
        EncodingDetector encodingDetector = new UniversalEncodingDetector();
        var encode = encodingDetector.detect(inputStream, metaData) ?? new UTF_32();
        TransformerHandler handler = factory.newTransformerHandler();

        handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html");
        handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
        handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, encode.toString());
        handler.setResult(new StreamResult(output));
        ContentHandler imageRewriting = new ImageRewritingContentHandler(handler);
        //  ExpandedTitleContentHandler handler1 = new ExpandedTitleContentHandler(handler);
        ParseContext context = new ParseContext();

        context.set(typeof(EmbeddedDocumentExtractor), new FileEmbeddedDocumentEtractor());
        tikaParser.parse(inputStream, imageRewriting, new Metadata(), context);

        byte[] array = output.toByteArray();
        System.IO.File.WriteAllBytes(@"C:\toHtml\text.html", array);
    }
Example #2
0
        private TransformerHandler getTransformerHandler()
        {
            SAXTransformerFactory factory            = (SAXTransformerFactory)TransformerFactory.newInstance();
            TransformerHandler    transformerHandler = factory.newTransformerHandler();

            transformerHandler.getTransformer().setOutputProperty(OutputKeys.METHOD, "text");
            transformerHandler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");

            _outputWriter = new StringWriter();
            transformerHandler.setResult(new StreamResult(_outputWriter));
            return(transformerHandler);
        }