public static String GetText(string docFile) { HWPFDocumentCore wordDocument = AbstractWordUtils .LoadDoc(docFile); return(GetText(wordDocument)); }
protected void ProcessDeadField(HWPFDocumentCore wordDocument, XmlElement currentBlock, Range range, int currentTableLevel, int beginMark, int separatorMark, int endMark) { StringBuilder debug = new StringBuilder("Unsupported field type: \n"); for (int i = beginMark; i <= endMark; i++) { debug.Append("\t"); debug.Append(range.GetCharacterRun(i)); debug.Append("\n"); } logger.Log(POILogger.WARN, debug); Range deadFieldValueSubrage = new Range(range.GetCharacterRun( separatorMark).StartOffset + 1, range.GetCharacterRun( endMark).StartOffset, range); //{ // @Override // public String toString() // { // return "DeadFieldValueSubrange (" + super.ToString() + ")"; // } //}; // just output field value if (separatorMark + 1 < endMark) { ProcessCharacters(wordDocument, currentTableLevel, deadFieldValueSubrage, currentBlock); } return; }
protected override void ProcessTable(HWPFDocumentCore wordDocument, XmlElement flow, Table table) { int tableRows = table.NumRows; for (int r = 0; r < tableRows; r++) { TableRow tableRow = table.GetRow(r); XmlElement tableRowElement = textDocumentFacade.CreateTableRow(); int rowCells = tableRow.NumCells(); for (int c = 0; c < rowCells; c++) { TableCell tableCell = tableRow.GetCell(c); XmlElement tableCellElement = textDocumentFacade.CreateTableCell(); if (c != 0) { tableCellElement.AppendChild(textDocumentFacade .CreateText("\t")); } ProcessCharacters(wordDocument, table.TableLevel, tableCell, tableCellElement); tableRowElement.AppendChild(tableCellElement); } tableRowElement.AppendChild(textDocumentFacade.CreateText("\n")); flow.AppendChild(tableRowElement); } }
protected override void ProcessPageBreak(HWPFDocumentCore wordDocument, XmlElement flow) { XmlElement block = textDocumentFacade.CreateBlock(); block.AppendChild(textDocumentFacade.CreateText("\n")); flow.AppendChild(block); }
protected override void ProcessPageBreak(HWPFDocumentCore wordDocument, XmlElement flow) { XmlElement block = null; XmlNodeList childNodes = flow.ChildNodes; if (childNodes.Count > 0) { XmlNode lastChild = childNodes[childNodes.Count - 1]; if (lastChild is XmlElement) { XmlElement lastElement = (XmlElement)lastChild; if (!lastElement.HasAttribute("break-after")) { block = lastElement; } } } if (block == null) { block = foDocumentFacade.CreateBlock(); flow.AppendChild(block); } block.SetAttribute("break-after", "page"); }
protected override void ProcessPageref(HWPFDocumentCore wordDocument, XmlElement currentBlock, Range textRange, int currentTableLevel, String pageref) { ProcessCharacters(wordDocument, currentTableLevel, textRange, currentBlock); }
protected override void ProcessBookmarks(HWPFDocumentCore wordDocument, XmlElement currentBlock, Range range, int currentTableLevel, IList <Bookmark> rangeBookmarks) { XmlElement parent = currentBlock; foreach (Bookmark bookmark in rangeBookmarks) { XmlElement bookmarkElement = foDocumentFacade.CreateInline(); String idName = "bookmark_" + bookmark.Name; // make sure ID used once if (SetId(bookmarkElement, idName)) { /* * if it just empty fo:inline without "id" attribute doesn't * making sense to add it to DOM */ parent.AppendChild(bookmarkElement); parent = bookmarkElement; } } if (range != null) { ProcessCharacters(wordDocument, currentTableLevel, range, parent); } }
protected void processSingleSection(HWPFDocumentCore wordDocument, Section section) { htmlDocumentFacade.AddStyleClass(htmlDocumentFacade.Body, "b", GetSectionStyle(section)); ProcessParagraphes(wordDocument, htmlDocumentFacade.Body, section, int.MinValue); }
public void ProcessDocument(HWPFDocumentCore wordDocument) { try { NPOI.HPSF.SummaryInformation summaryInformation = wordDocument.SummaryInformation; if (summaryInformation != null) { ProcessDocumentInformation(summaryInformation); } } catch (Exception exc) { logger.Log(POILogger.WARN, "Unable to process document summary information: ", exc, exc); } Range docRange = wordDocument.GetRange(); if (docRange.NumSections == 1) { ProcessSingleSection(wordDocument, docRange.GetSection(0)); AfterProcess(); return; } ProcessDocumentPart(wordDocument, docRange); AfterProcess(); }
public static String GetText(HWPFDocumentCore wordDocument) { WordToTextConverter wordToTextConverter = new WordToTextConverter(new XmlDocument()); wordToTextConverter.ProcessDocument(wordDocument); return(wordToTextConverter.GetText()); }
public static XmlDocument Process(string docFile) { HWPFDocumentCore hwpfDocument = WordToFoUtils.LoadDoc(docFile); WordToFoConverter wordToFoConverter = new WordToFoConverter(new XmlDocument()); wordToFoConverter.ProcessDocument(hwpfDocument); return(wordToFoConverter.Document); }
protected virtual void ProcessDocumentPart(HWPFDocumentCore wordDocument, Range range) { for (int s = 0; s < range.NumSections; s++) { ProcessSection(wordDocument, range.GetSection(s), s); } }
protected override void ProcessParagraph(HWPFDocumentCore wordDocument, XmlElement parentElement, int currentTableLevel, Paragraph paragraph, string bulletText) { XmlElement pElement = htmlDocumentFacade.CreateParagraph(); parentElement.AppendChild(pElement); StringBuilder style = new StringBuilder(); WordToHtmlUtils.AddParagraphProperties(paragraph, style); int charRuns = paragraph.NumCharacterRuns; if (charRuns == 0) { return; } { String pFontName; int pFontSize; CharacterRun characterRun = paragraph.GetCharacterRun(0); if (characterRun != null) { Triplet triplet = GetCharacterRunTriplet(characterRun); pFontSize = characterRun.GetFontSize() / 2; pFontName = triplet.fontName; WordToHtmlUtils.AddFontFamily(pFontName, style); WordToHtmlUtils.AddFontSize(pFontSize, style); } else { pFontSize = -1; pFontName = string.Empty; } blocksProperies.Push(new BlockProperies(pFontName, pFontSize)); } try { if (!string.IsNullOrEmpty(bulletText)) { XmlText textNode = htmlDocumentFacade.CreateText(bulletText); pElement.AppendChild(textNode); } ProcessCharacters(wordDocument, currentTableLevel, paragraph, pElement); } finally { blocksProperies.Pop(); } if (style.Length > 0) { htmlDocumentFacade.AddStyleClass(pElement, "p", style.ToString()); } WordToHtmlUtils.CompactSpans(pElement); }
public static XmlDocument Process(string docFile) { HWPFDocumentCore wordDocument = WordToHtmlUtils.LoadDoc(docFile); XmlDocument xmlDoc = new XmlDocument(); //WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(xmlDoc); // wordToHtmlConverter.ProcessDocument(wordDocument); return(xmlDoc); }
protected override void ProcessSection(HWPFDocumentCore wordDocument, NPOI.HWPF.UserModel.Section section, int s) { XmlElement sectionElement = textDocumentFacade.CreateBlock(); ProcessParagraphes(wordDocument, sectionElement, section, int.MinValue); sectionElement.AppendChild(textDocumentFacade.CreateText("\n")); textDocumentFacade.Body.AppendChild(sectionElement); }
public static XmlDocument Process(string docFile) { HWPFDocumentCore wordDocument = AbstractWordUtils .LoadDoc(docFile); WordToTextConverter wordToTextConverter = new WordToTextConverter(new XmlDocument()); wordToTextConverter.ProcessDocument(wordDocument); return(wordToTextConverter.Document); }
protected override void ProcessSection(HWPFDocumentCore wordDocument, Section section, int s) { XmlElement div = htmlDocumentFacade.CreateBlock(); htmlDocumentFacade.AddStyleClass(div, "d", GetSectionStyle(section)); htmlDocumentFacade.Body.AppendChild(div); ProcessParagraphes(wordDocument, div, section, int.MinValue); }
protected override void ProcessPageref(HWPFDocumentCore wordDocument, XmlElement currentBlock, Range textRange, int currentTableLevel, string pageref) { XmlElement basicLink = htmlDocumentFacade.CreateHyperlink("#" + pageref); currentBlock.AppendChild(basicLink); if (textRange != null) { ProcessCharacters(wordDocument, currentTableLevel, textRange, basicLink); } }
protected override void ProcessParagraph(HWPFDocumentCore wordDocument, XmlElement parentElement, int currentTableLevel, Paragraph paragraph, String bulletText) { XmlElement pElement = textDocumentFacade.CreateParagraph(); pElement.AppendChild(textDocumentFacade.CreateText(bulletText)); ProcessCharacters(wordDocument, currentTableLevel, paragraph, pElement); pElement.AppendChild(textDocumentFacade.CreateText("\n")); parentElement.AppendChild(pElement); }
protected override void ProcessHyperlink(HWPFDocumentCore wordDocument, XmlElement currentBlock, Range textRange, int currentTableLevel, String hyperlink) { ProcessCharacters(wordDocument, currentTableLevel, textRange, currentBlock); currentBlock.AppendChild(textDocumentFacade.CreateText(" (" + UNICODECHAR_ZERO_WIDTH_SPACE + hyperlink.Replace("\\/", UNICODECHAR_ZERO_WIDTH_SPACE + "\\/" + UNICODECHAR_ZERO_WIDTH_SPACE) + UNICODECHAR_ZERO_WIDTH_SPACE + ")")); }
// protected Range() // { // // } /** * Used to construct a Range from a document. This is generally used to * create a Range that spans the whole document, or at least one whole part * of the document (eg main text, header, comment) * * @param start * Starting character offset of the range. * @param end * Ending character offset of the range. * @param doc * The HWPFDocument the range is based on. */ public Range(int start, int end, HWPFDocumentCore doc) { _start = start; _end = end; _doc = doc; _sections = _doc.SectionTable.GetSections(); _paragraphs = _doc.ParagraphTable.GetParagraphs(); _characters = _doc.CharacterTable.GetTextRuns(); _text = _doc.Text; _parent = null; SanityCheckStartEnd(); }
/** * Used to create Ranges that are children of other Ranges. * * @param start * Starting character offset of the range. * @param end * Ending character offset of the range. * @param parent * The parent this range belongs to. */ internal Range(int start, int end, Range parent) { _start = start; _end = end; _doc = parent._doc; _sections = parent._sections; _paragraphs = parent._paragraphs; _characters = parent._characters; _text = parent._text; _parent = parent; SanityCheckStartEnd(); SanityCheck(); }
protected override void ProcessHyperlink(HWPFDocumentCore wordDocument, XmlElement currentBlock, Range textRange, int currentTableLevel, String hyperlink) { XmlElement basicLink = foDocumentFacade .CreateBasicLinkExternal(hyperlink); currentBlock.AppendChild(basicLink); if (textRange != null) { ProcessCharacters(wordDocument, currentTableLevel, textRange, basicLink); } }
protected override void ProcessPageref(HWPFDocumentCore hwpfDocument, XmlElement currentBlock, Range textRange, int currentTableLevel, String pageref) { XmlElement basicLink = foDocumentFacade .CreateBasicLinkInternal("bookmark_" + pageref); currentBlock.AppendChild(basicLink); if (textRange != null) { ProcessCharacters(hwpfDocument, currentTableLevel, textRange, basicLink); } }
protected override void ProcessBookmarks(HWPFDocumentCore wordDocument, XmlElement currentBlock, Range range, int currentTableLevel, IList <Bookmark> rangeBookmarks) { XmlElement parent = currentBlock; foreach (Bookmark bookmark in rangeBookmarks) { XmlElement bookmarkElement = htmlDocumentFacade.CreateBookmark(bookmark.Name); parent.AppendChild(bookmarkElement); parent = bookmarkElement; } if (range != null) { ProcessCharacters(wordDocument, currentTableLevel, range, parent); } throw new NotImplementedException(); }
protected override void ProcessSection(HWPFDocumentCore wordDocument, NPOI.HWPF.UserModel.Section section, int sectionCounter) { String regularPage = CreatePageMaster(section, "page", sectionCounter); XmlElement pageSequence = foDocumentFacade.AddPageSequence(regularPage); XmlElement flow = foDocumentFacade.AddFlowToPageSequence(pageSequence, "xsl-region-body"); ProcessParagraphes(wordDocument, flow, section, int.MinValue); if (endnotes != null && endnotes.Count != 0) { foreach (XmlElement endnote in endnotes) { flow.AppendChild(endnote); } endnotes.Clear(); } }
protected override void ProcessParagraph(HWPFDocumentCore hwpfDocument, XmlElement parentFopElement, int currentTableLevel, Paragraph paragraph, String bulletText) { XmlElement block = foDocumentFacade.CreateBlock(); parentFopElement.AppendChild(block); WordToFoUtils.SetParagraphProperties(paragraph, block); int charRuns = paragraph.NumCharacterRuns; if (charRuns == 0) { return; } bool haveAnyText = false; if (!string.IsNullOrEmpty(bulletText)) { XmlElement inline = foDocumentFacade.CreateInline(); block.AppendChild(inline); XmlText textNode = foDocumentFacade.CreateText(bulletText); inline.AppendChild(textNode); haveAnyText |= bulletText.Trim().Length != 0; } haveAnyText = ProcessCharacters(hwpfDocument, currentTableLevel, paragraph, block); if (!haveAnyText) { XmlElement leader = foDocumentFacade.CreateLeader(); block.AppendChild(leader); } WordToFoUtils.CompactInlines(block); return; }
protected Field ProcessDeadField(HWPFDocumentCore wordDocument, Range charactersRange, int currentTableLevel, int startOffset, XmlElement currentBlock) { if (!(wordDocument is HWPFDocument)) { return(null); } HWPFDocument hwpfDocument = (HWPFDocument)wordDocument; Field field = hwpfDocument.GetFields().GetFieldByStartOffset( FieldsDocumentPart.MAIN, startOffset); if (field == null) { return(null); } ProcessField(hwpfDocument, charactersRange, currentTableLevel, field, currentBlock); return(field); }
protected override void ProcessParagraph(HWPFDocumentCore wordDocument, XmlElement parentElement, int currentTableLevel, Paragraph paragraph, string bulletText) { XmlElement pElement = htmlDocumentFacade.CreateParagraph(); parentElement.AppendChild(pElement); StringBuilder style = new StringBuilder(); WordToHtmlUtils.AddParagraphProperties(paragraph, style); int charRuns = paragraph.NumCharacterRuns; if (charRuns == 0) { return; } { String pFontName; int pFontSize; CharacterRun characterRun = paragraph.GetCharacterRun(0); if (characterRun != null) { Triplet triplet = GetCharacterRunTriplet(characterRun); pFontSize = characterRun.GetFontSize() / 2; pFontName = triplet.fontName; WordToHtmlUtils.AddFontFamily(pFontName, style); WordToHtmlUtils.AddFontSize(pFontSize, style); } else { pFontSize = -1; pFontName = string.Empty; } blocksProperies.Push(new BlockProperies(pFontName, pFontSize)); } try { if (!string.IsNullOrEmpty(bulletText)) { XmlText textNode = htmlDocumentFacade.CreateText(bulletText); pElement.AppendChild(textNode); } ProcessCharacters(wordDocument, currentTableLevel, paragraph, pElement); } finally { blocksProperies.Pop(); } if (style.Length > 0) htmlDocumentFacade.AddStyleClass(pElement, "p", style.ToString()); WordToHtmlUtils.CompactSpans(pElement); }
protected override void ProcessBookmarks(HWPFDocumentCore wordDocument, XmlElement currentBlock, Range range, int currentTableLevel, IList<Bookmark> rangeBookmarks) { XmlElement parent = currentBlock; foreach (Bookmark bookmark in rangeBookmarks) { XmlElement bookmarkElement = htmlDocumentFacade.CreateBookmark(bookmark.Name); parent.AppendChild(bookmarkElement); parent = bookmarkElement; } if (range != null) ProcessCharacters(wordDocument, currentTableLevel, range, parent); throw new NotImplementedException(); }
public static HWPFDocumentCore LoadDoc(Stream inputStream) { return(LoadDoc(HWPFDocumentCore.VerifyAndBuildPOIFS(inputStream))); }
protected override void ProcessTable(HWPFDocumentCore wordDocument, XmlElement flow, Table table) { XmlElement tableHeader = foDocumentFacade.CreateTableHeader(); XmlElement tableBody = foDocumentFacade.CreateTableBody(); int[] tableCellEdges = WordToHtmlUtils.BuildTableCellEdgesArray(table); int tableRows = table.NumRows; int maxColumns = int.MinValue; for (int r = 0; r < tableRows; r++) { maxColumns = Math.Max(maxColumns, table.GetRow(r).NumCells()); } for (int r = 0; r < tableRows; r++) { TableRow tableRow = table.GetRow(r); XmlElement tableRowElement = foDocumentFacade.CreateTableRow(); WordToFoUtils.SetTableRowProperties(tableRow, tableRowElement); // index of current element in tableCellEdges[] int currentEdgeIndex = 0; int rowCells = tableRow.NumCells(); for (int c = 0; c < rowCells; c++) { TableCell tableCell = tableRow.GetCell(c); if (tableCell.IsVerticallyMerged() && !tableCell.IsFirstVerticallyMerged()) { currentEdgeIndex += getTableCellEdgesIndexSkipCount(table, r, tableCellEdges, currentEdgeIndex, c, tableCell); continue; } XmlElement tableCellElement = foDocumentFacade.CreateTableCell(); WordToFoUtils.SetTableCellProperties(tableRow, tableCell, tableCellElement, r == 0, r == tableRows - 1, c == 0, c == rowCells - 1); int colSpan = GetNumberColumnsSpanned(tableCellEdges, currentEdgeIndex, tableCell); currentEdgeIndex += colSpan; if (colSpan == 0) { continue; } if (colSpan != 1) { tableCellElement.SetAttribute("number-columns-spanned", (colSpan).ToString()); } int rowSpan = GetNumberRowsSpanned(table, r, c, tableCell); if (rowSpan > 1) { tableCellElement.SetAttribute("number-rows-spanned", (rowSpan).ToString()); } ProcessParagraphes(wordDocument, tableCellElement, tableCell, table.TableLevel); if (!tableCellElement.HasChildNodes) { tableCellElement.AppendChild(foDocumentFacade .CreateBlock()); } tableRowElement.AppendChild(tableCellElement); } if (tableRowElement.HasChildNodes) { if (tableRow.isTableHeader()) { tableHeader.AppendChild(tableRowElement); } else { tableBody.AppendChild(tableRowElement); } } } XmlElement tableElement = foDocumentFacade.CreateTable(); tableElement.SetAttribute("table-layout", "fixed"); if (tableHeader.HasChildNodes) { tableElement.AppendChild(tableHeader); } if (tableBody.HasChildNodes) { tableElement.AppendChild(tableBody); flow.AppendChild(tableElement); } else { logger.Log(POILogger.WARN, "Table without body starting on offset " + table.StartOffset + " -- " + table.EndOffset); } }
public void processDocumentPart(HWPFDocumentCore wordDocument, Range range) { base.ProcessDocumentPart(wordDocument, range); AfterProcess(); }
protected override void ProcessPageBreak(HWPFDocumentCore wordDocument, XmlElement flow) { flow.AppendChild(htmlDocumentFacade.CreateLineBreak()); }
protected override void ProcessPageref(HWPFDocumentCore wordDocument, XmlElement currentBlock, Range textRange, int currentTableLevel, string pageref) { XmlElement basicLink = htmlDocumentFacade.CreateHyperlink("#" + pageref); currentBlock.AppendChild(basicLink); if (textRange != null) ProcessCharacters(wordDocument, currentTableLevel, textRange, basicLink); }
protected override void ProcessTable(HWPFDocumentCore wordDocument, XmlElement flow, Table table) { XmlElement tableHeader = htmlDocumentFacade.CreateTableHeader(); XmlElement tableBody = htmlDocumentFacade.CreateTableBody(); int[] tableCellEdges = WordToHtmlUtils.BuildTableCellEdgesArray(table); int tableRows = table.NumRows; int maxColumns = int.MinValue; for (int r = 0; r < tableRows; r++) { maxColumns = Math.Max(maxColumns, table.GetRow(r).NumCells()); } for (int r = 0; r < tableRows; r++) { TableRow tableRow = table.GetRow(r); XmlElement tableRowElement = htmlDocumentFacade.CreateTableRow(); StringBuilder tableRowStyle = new StringBuilder(); WordToHtmlUtils.AddTableRowProperties(tableRow, tableRowStyle); // index of current element in tableCellEdges[] int currentEdgeIndex = 0; int rowCells = tableRow.NumCells(); for (int c = 0; c < rowCells; c++) { TableCell tableCell = tableRow.GetCell(c); if (tableCell.IsVerticallyMerged() && !tableCell.IsFirstVerticallyMerged()) { currentEdgeIndex += getTableCellEdgesIndexSkipCount(table, r, tableCellEdges, currentEdgeIndex, c, tableCell); continue; } XmlElement tableCellElement; if (tableRow.isTableHeader()) { tableCellElement = htmlDocumentFacade.CreateTableHeaderCell(); } else { tableCellElement = htmlDocumentFacade.CreateTableCell(); } StringBuilder tableCellStyle = new StringBuilder(); WordToHtmlUtils.AddTableCellProperties(tableRow, tableCell, r == 0, r == tableRows - 1, c == 0, c == rowCells - 1, tableCellStyle); int colSpan = GetNumberColumnsSpanned(tableCellEdges, currentEdgeIndex, tableCell); currentEdgeIndex += colSpan; if (colSpan == 0) continue; if (colSpan != 1) tableCellElement.SetAttribute("colspan", colSpan.ToString()); int rowSpan = GetNumberRowsSpanned(table, r, c, tableCell); if (rowSpan > 1) tableCellElement.SetAttribute("rowspan", rowSpan.ToString()); ProcessParagraphes(wordDocument, tableCellElement, tableCell, 0 /*table.TableLevel Todo: */); if (!tableCellElement.HasChildNodes) { tableCellElement.AppendChild(htmlDocumentFacade.CreateParagraph()); } if (tableCellStyle.Length > 0) htmlDocumentFacade.AddStyleClass(tableCellElement, tableCellElement.LocalName, tableCellStyle.ToString()); tableRowElement.AppendChild(tableCellElement); } if (tableRowStyle.Length > 0) tableRowElement.SetAttribute("class", htmlDocumentFacade.GetOrCreateCssClass("tr", "r", tableRowStyle.ToString())); if (tableRow.isTableHeader()) { tableHeader.AppendChild(tableRowElement); } else { tableBody.AppendChild(tableRowElement); } } XmlElement tableElement = htmlDocumentFacade.CreateTable(); tableElement.SetAttribute("class", htmlDocumentFacade.GetOrCreateCssClass(tableElement.LocalName, "t", "table-layout:fixed;border-collapse:collapse;border-spacing:0;")); if (tableHeader.HasChildNodes) { tableElement.AppendChild(tableHeader); } if (tableBody.HasChildNodes) { tableElement.AppendChild(tableBody); flow.AppendChild(tableElement); } else { logger.Log(POILogger.WARN, "Table without body starting at [", table.StartOffset.ToString(), "; ", table.EndOffset.ToString(), ")"); } }