Example #1
0
        public static String GetText(string docFile)
        {
            HWPFDocumentCore wordDocument = AbstractWordUtils
                                            .LoadDoc(docFile);

            return(GetText(wordDocument));
        }
Example #2
0
        public static XmlDocument Process(string docFile)
        {
            HWPFDocumentCore wordDocument = AbstractWordUtils
                                            .LoadDoc(docFile);
            WordToTextConverter wordToTextConverter = new WordToTextConverter(new XmlDocument());

            wordToTextConverter.ProcessDocument(wordDocument);
            return(wordToTextConverter.Document);
        }
Example #3
0
        public Triplet Update(Triplet original)
        {
            if (!string.IsNullOrEmpty(original.fontName))
            {
                String fontName = original.fontName;

                if (fontName.EndsWith(" Regular"))
                {
                    fontName = AbstractWordUtils.SubstringBeforeLast(fontName,
                                                                     " Regular");
                }

                if (fontName
                    .EndsWith(" \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439"))
                {
                    fontName = AbstractWordUtils
                               .SubstringBeforeLast(fontName,
                                                    " \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439")
                               + " Bold";
                }

                if (fontName
                    .EndsWith(" \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439 \u041A\u0443\u0440\u0441\u0438\u0432"))
                {
                    fontName = AbstractWordUtils
                               .SubstringBeforeLast(
                        fontName,
                        " \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439 \u041A\u0443\u0440\u0441\u0438\u0432")
                               + " Bold Italic";
                }

                if (fontName.EndsWith(" \u041A\u0443\u0440\u0441\u0438\u0432"))
                {
                    fontName = AbstractWordUtils.SubstringBeforeLast(fontName,
                                                                     " \u041A\u0443\u0440\u0441\u0438\u0432") + " Italic";
                }

                original.fontName = fontName;
            }

            if (!string.IsNullOrEmpty(original.fontName))
            {
                if ("Times Regular".Equals(original.fontName) ||
                    "Times-Regular".Equals(original.fontName))
                {
                    original.fontName = "Times";
                    original.bold     = false;
                    original.italic   = false;
                }
                if ("Times Bold".Equals(original.fontName) ||
                    "Times-Bold".Equals(original.fontName))
                {
                    original.fontName = "Times";
                    original.bold     = true;
                    original.italic   = false;
                }
                if ("Times Italic".Equals(original.fontName) ||
                    "Times-Italic".Equals(original.fontName))
                {
                    original.fontName = "Times";
                    original.bold     = false;
                    original.italic   = true;
                }
                if ("Times Bold Italic".Equals(original.fontName) ||
                    "Times-BoldItalic".Equals(original.fontName))
                {
                    original.fontName = "Times";
                    original.bold     = true;
                    original.italic   = true;
                }
            }

            return(original);
        }
Example #4
0
        protected void ProcessParagraphes(HWPFDocumentCore wordDocument,
                                          XmlElement flow, Range range, int currentTableLevel)
        {
            ListTables listTables      = wordDocument.GetListTables();
            int        currentListInfo = 0;

            int paragraphs = range.NumParagraphs;

            for (int p = 0; p < paragraphs; p++)
            {
                Paragraph paragraph = range.GetParagraph(p);

                if (paragraph.IsInTable() && paragraph.GetTableLevel() != currentTableLevel)
                {
                    if (paragraph.GetTableLevel() < currentTableLevel)
                    {
                        throw new InvalidOperationException(
                                  "Trying to process table cell with higher level ("
                                  + paragraph.GetTableLevel()
                                  + ") than current table level ("
                                  + currentTableLevel
                                  + ") as inner table part");
                    }

                    Table table = range.GetTable(paragraph);
                    ProcessTable(wordDocument, flow, table);

                    p += table.NumParagraphs;
                    p--;
                    continue;
                }

                if (paragraph.Text.Equals("\u000c"))
                {
                    ProcessPageBreak(wordDocument, flow);
                }

                if (paragraph.GetIlfo() != currentListInfo)
                {
                    currentListInfo = paragraph.GetIlfo();
                }

                if (currentListInfo != 0)
                {
                    if (listTables != null)
                    {
                        ListFormatOverride listFormatOverride = listTables.GetOverride(paragraph.GetIlfo());

                        String label = AbstractWordUtils.GetBulletText(listTables,
                                                                       paragraph, listFormatOverride.GetLsid());

                        ProcessParagraph(wordDocument, flow, currentTableLevel,
                                         paragraph, label);
                    }
                    else
                    {
                        logger.Log(POILogger.WARN,
                                   "Paragraph #" + paragraph.StartOffset + "-"
                                   + paragraph.EndOffset
                                   + " has reference to list structure #"
                                   + currentListInfo
                                   + ", but listTables not defined in file");

                        ProcessParagraph(wordDocument, flow, currentTableLevel,
                                         paragraph, string.Empty);
                    }
                }
                else
                {
                    ProcessParagraph(wordDocument, flow, currentTableLevel,
                                     paragraph, string.Empty);
                }
            }
        }
Example #5
0
        public static String GetText(DirectoryNode root)
        {
            HWPFDocumentCore wordDocument = AbstractWordUtils.LoadDoc(root);

            return(GetText(wordDocument));
        }