public static String GetText(string docFile) { HWPFDocumentCore wordDocument = AbstractWordUtils .LoadDoc(docFile); return(GetText(wordDocument)); }
public static XmlDocument Process(string docFile) { HWPFDocumentCore wordDocument = AbstractWordUtils .LoadDoc(docFile); WordToTextConverter wordToTextConverter = new WordToTextConverter(new XmlDocument()); wordToTextConverter.ProcessDocument(wordDocument); return(wordToTextConverter.Document); }
public Triplet Update(Triplet original) { if (!string.IsNullOrEmpty(original.fontName)) { String fontName = original.fontName; if (fontName.EndsWith(" Regular")) { fontName = AbstractWordUtils.SubstringBeforeLast(fontName, " Regular"); } if (fontName .EndsWith(" \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439")) { fontName = AbstractWordUtils .SubstringBeforeLast(fontName, " \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439") + " Bold"; } if (fontName .EndsWith(" \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439 \u041A\u0443\u0440\u0441\u0438\u0432")) { fontName = AbstractWordUtils .SubstringBeforeLast( fontName, " \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439 \u041A\u0443\u0440\u0441\u0438\u0432") + " Bold Italic"; } if (fontName.EndsWith(" \u041A\u0443\u0440\u0441\u0438\u0432")) { fontName = AbstractWordUtils.SubstringBeforeLast(fontName, " \u041A\u0443\u0440\u0441\u0438\u0432") + " Italic"; } original.fontName = fontName; } if (!string.IsNullOrEmpty(original.fontName)) { if ("Times Regular".Equals(original.fontName) || "Times-Regular".Equals(original.fontName)) { original.fontName = "Times"; original.bold = false; original.italic = false; } if ("Times Bold".Equals(original.fontName) || "Times-Bold".Equals(original.fontName)) { original.fontName = "Times"; original.bold = true; original.italic = false; } if ("Times Italic".Equals(original.fontName) || "Times-Italic".Equals(original.fontName)) { original.fontName = "Times"; original.bold = false; original.italic = true; } if ("Times Bold Italic".Equals(original.fontName) || "Times-BoldItalic".Equals(original.fontName)) { original.fontName = "Times"; original.bold = true; original.italic = true; } } return(original); }
protected void ProcessParagraphes(HWPFDocumentCore wordDocument, XmlElement flow, Range range, int currentTableLevel) { ListTables listTables = wordDocument.GetListTables(); int currentListInfo = 0; int paragraphs = range.NumParagraphs; for (int p = 0; p < paragraphs; p++) { Paragraph paragraph = range.GetParagraph(p); if (paragraph.IsInTable() && paragraph.GetTableLevel() != currentTableLevel) { if (paragraph.GetTableLevel() < currentTableLevel) { throw new InvalidOperationException( "Trying to process table cell with higher level (" + paragraph.GetTableLevel() + ") than current table level (" + currentTableLevel + ") as inner table part"); } Table table = range.GetTable(paragraph); ProcessTable(wordDocument, flow, table); p += table.NumParagraphs; p--; continue; } if (paragraph.Text.Equals("\u000c")) { ProcessPageBreak(wordDocument, flow); } if (paragraph.GetIlfo() != currentListInfo) { currentListInfo = paragraph.GetIlfo(); } if (currentListInfo != 0) { if (listTables != null) { ListFormatOverride listFormatOverride = listTables.GetOverride(paragraph.GetIlfo()); String label = AbstractWordUtils.GetBulletText(listTables, paragraph, listFormatOverride.GetLsid()); ProcessParagraph(wordDocument, flow, currentTableLevel, paragraph, label); } else { logger.Log(POILogger.WARN, "Paragraph #" + paragraph.StartOffset + "-" + paragraph.EndOffset + " has reference to list structure #" + currentListInfo + ", but listTables not defined in file"); ProcessParagraph(wordDocument, flow, currentTableLevel, paragraph, string.Empty); } } else { ProcessParagraph(wordDocument, flow, currentTableLevel, paragraph, string.Empty); } } }
public static String GetText(DirectoryNode root) { HWPFDocumentCore wordDocument = AbstractWordUtils.LoadDoc(root); return(GetText(wordDocument)); }