Exemple #1
0
        /// <summary>
        /// Merge all paragraphs into a single in an existing PDF document.
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/merge-paragraphs-in-pdf-document-net-csharp-vb.php
        /// </remarks>
        static void MergeParagraphs()
        {
            string       inpFile = @"..\..\example.pdf";
            string       outFile = @"Result.pdf";
            DocumentCore dc      = DocumentCore.Load(inpFile);

            Paragraph firstPar = dc.GetChildElements(true, ElementType.Paragraph).First() as Paragraph;

            int lastIndex = firstPar.Inlines.Count;

            foreach (Paragraph par in dc.GetChildElements(true, ElementType.Paragraph).Reverse().Where(p => p != firstPar))
            {
                int last = lastIndex;
                foreach (Inline inline in par.Inlines)
                {
                    firstPar.Inlines.Insert(last++, inline.Clone(true));
                }
                par.Content.Delete();
            }

            dc.Save(outFile);
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(inpFile)
            {
                UseShellExecute = true
            });
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
            {
                UseShellExecute = true
            });
        }
Exemple #2
0
        /// <summary>
        /// How to get a content from a document.
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/get-content-net-csharp-vb.php
        /// </remarks>
        public static void GetContent()
        {
            // Path to an input document.
            string documentPath = @"..\..\example.docx";

            DocumentCore dc = DocumentCore.Load(documentPath);

            StringBuilder sb = new StringBuilder();

            // Get content of each paragraph in the document.
            foreach (Paragraph par in dc.GetChildElements(true, ElementType.Paragraph))
            {
                // The property 'Content' returns the content as ContentRange.
                // Get content and append it into StringBuilder.
                sb.AppendFormat("Paragraph: {0}", par.Content.ToString());
                sb.AppendLine();
            }

            // Get content of each Run where the text color is Red.
            foreach (Run run in dc.GetChildElements(true, ElementType.Run))
            {
                if (run.CharacterFormat.FontColor == Color.Red)
                {
                    // The property 'Content' returns the content as ContentRange.
                    // Get content and append it into StringBuilder.
                    sb.AppendFormat("Red color: {0}", run.Content.ToString());
                    sb.AppendLine();
                }
            }
            Console.WriteLine(sb.ToString());
            Console.ReadKey();
        }
 public void LoadPages(int pagesCount)
 {
     for (int i = 0; i < pagesCount; i++)
     {
         List <SautinSoft.Document.Tables.TableRow> rowContent = new List <SautinSoft.Document.Tables.TableRow>();
         DocumentCore dc = DocumentCore.Load(folderPath + @"\Page - " + (i + 1).ToString() + ".pdf");
         foreach (SautinSoft.Document.Tables.TableRow run in dc.GetChildElements(true, ElementType.TableRow))
         {
             rowContent.Add(run);
         }
         ;
         foreach (SautinSoft.Document.Section run in dc.GetChildElements(true, ElementType.Section))
         {
             date = run.Blocks[run.Blocks.Count - 2].Content.ToString().Replace("\r\n", "").Substring(4, 10);
             break;
         }
         ;
         foreach (SautinSoft.Document.Paragraph run in dc.GetChildElements(true, ElementType.Paragraph))
         {
             if (run.Content.ToString().Contains("Дата принятия уполномоченным банком"))
             {
                 adoptionDate = run.Inlines[7].Content.ToString();
                 break;
             }
         }
         ;
         ParseAndGetInfo(rowContent, i + 1);
     }
 }
Exemple #4
0
        // How to modify an existing table in a document.
        public static void ModifyTable()
        {
            string sourcePath = @"..\..\..\..\..\..\Testing Files\table.docx";
            string destPath   = Path.ChangeExtension(sourcePath, ".modified.pdf");

            // Load a document with a table.
            DocumentCore dc = DocumentCore.Load(sourcePath);

            // Find a first table in the document.
            Table table = (Table)dc.GetChildElements(true, ElementType.Table).First();

            // Set dashed borders and yellow background for all cells.
            for (int r = 0; r < table.Rows.Count; r++)
            {
                for (int c = 0; c < table.Rows[r].Cells.Count; c++)
                {
                    TableCell cell = table.Rows[r].Cells[c];
                    cell.CellFormat.Borders.SetBorders(MultipleBorderTypes.Outside, BorderStyle.Dashed, Color.Black, 1);
                    cell.CellFormat.BackgroundColor = new Color("#FFCC00");
                }
            }

            // Save the document as PDF.
            dc.Save(destPath, new PdfSaveOptions());

            // Show the source and the dest documents.
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(sourcePath)
            {
                UseShellExecute = true
            });
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(destPath)
            {
                UseShellExecute = true
            });
        }
Exemple #5
0
        /// <summary>
        /// Creates a document containing FormDropDown element.
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/advanced.php
        /// </remarks>
        static void FormDropDown()
        {
            string filePath = @"Advanced.pdf";

            // Let's create document.
            DocumentCore dc = new DocumentCore();

            dc.Content.End.Insert(new Paragraph(dc, "The paragraph with FormDropDown element: ").Content);
            Paragraph par = dc.GetChildElements(true, ElementType.Paragraph).FirstOrDefault() as Paragraph;

            FormDropDownData field = new Field(dc, FieldType.FormDropDown).FormData as FormDropDownData;

            field.Items.Add("First Item");
            field.Items.Add("Second Item");
            field.Items.Add("Third Item");
            field.SelectedItemIndex = 2;

            par.Inlines.Add(field.Field);

            // Save our document.
            dc.Save(filePath);

            // Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(filePath)
            {
                UseShellExecute = true
            });
        }
        private void buttonGetDocument_Click(object sender, EventArgs e)
        {
            SetDefaultDataTable();
            List <string>  lS1 = new List <string>(); // Лист всех элементов из .docx
            OpenFileDialog opf = new OpenFileDialog();

            opf.Filter = "Word 2007 Documents (*.docx)|*.docx";
            if (opf.ShowDialog() == DialogResult.OK)
            {
                string        filename = opf.FileName; // Path to Docx file.
                DocumentCore  dc       = DocumentCore.Load(filename);
                StringBuilder sb       = new StringBuilder();
                // Get content of each Run where the text color is Red.
                foreach (Paragraph run in dc.GetChildElements(true, ElementType.Paragraph))
                {
                    string   str     = run.Content.ToString();
                    string[] strpath = str.Split('\r'); // Не удавалось расплитить по '\r\n'. Расплититл по '\r'.
                    str = strpath[0];                   // Хвост отбросил.
                    if (str != "")                      // Проверка на какой-либо элемент. В том числе и филды.
                    {
                        lS1.Add(str);                   // Список всех элементов документа, где каждый первый - Id, а каждый второй - дата.
                    }
                }
                ToTable TTable    = new ToTable();
                string  FieldId   = lS1[0];
                string  FieldData = lS1[1];
                lS1.Remove(FieldId);
                lS1.Remove(FieldData);
                DataTable DT = TTable.ConvertToTable(lS1, FieldId, FieldData, GetPathAndName(opf.FileName)[1]);
                dataGridView1.DataSource = DT;
            }
        }
Exemple #7
0
        /// <summary>
        /// Replace all Run elements with Bold formatting to Italic and mark them by yellow.
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/manipulation.php
        /// </remarks>
        static void Manipulation()
        {
            string       filePath       = @"..\..\example.docx";
            DocumentCore dc             = DocumentCore.Load(filePath);
            string       filePathResult = @"Result-file.pdf";

            foreach (Run run in dc.GetChildElements(true, ElementType.Run))
            {
                if (run.CharacterFormat.Bold == true)
                {
                    run.CharacterFormat.Bold            = false;
                    run.CharacterFormat.Italic          = true;
                    run.CharacterFormat.BackgroundColor = Color.Yellow;
                }
            }
            dc.Save(filePathResult);
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(filePath)
            {
                UseShellExecute = true
            });
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(filePathResult)
            {
                UseShellExecute = true
            });
        }
Exemple #8
0
        public static void ExtractPictures()
        {
            // Path to a document where to extract pictures.
            string filePath = @"..\..\..\..\..\..\Testing Files\example.pdf";

            // Directory to store extracted pictures:
            DirectoryInfo imageDirectory = new DirectoryInfo(Path.GetDirectoryName(filePath));
            string        imageTemplate  = "Picture";

            // Here we store extracted images.
            List <ImageData> imageInventory = new List <ImageData>();

            // Load the document.
            DocumentCore dc = DocumentCore.Load(filePath);

            // Extract all images from document, skip duplicates.
            foreach (Picture pict in dc.GetChildElements(true, ElementType.Picture))
            {
                // Let's avoid the adding of duplicates.
                if (imageInventory.Exists((img => (img.GetStream().Length == pict.ImageData.GetStream().Length))) == false)
                {
                    imageInventory.Add(pict.ImageData);
                }
            }

            // Save and show all images.
            for (int i = 0; i < imageInventory.Count; i++)
            {
                string imagePath = Path.Combine(imageDirectory.FullName, String.Format("{0}{1}.{2}", imageTemplate, i + 1, imageInventory[i].Format.ToString().ToLower()));
                File.WriteAllBytes(imagePath, imageInventory[i].GetStream().ToArray());
                System.Diagnostics.Process.Start(imagePath);
            }
        }
Exemple #9
0
        /// <summary>
        /// How to delete all hyperlink objects.
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/hyperlinks-delete-url-csharp-vb-net.php
        /// </remarks>
        public static void DeleteHyperlinksObjects()
        {
            // Let us say, we've a DOCX document.
            // And we've to remove the hyperlink objects.

            string inpFile = @"..\..\Hyperlinks example.docx";
            string outFile = @"Result - Delete Hyperlinks completely.pdf";

            // Let's open our document.
            DocumentCore dc = DocumentCore.Load(inpFile);

            // Loop by all hyperlinks and replace the URL (address).
            foreach (Hyperlink hpl in dc.GetChildElements(true, ElementType.Hyperlink).Reverse())
            {
                hpl.ParentCollection.Remove(hpl);
            }

            // Save our document back, but in PDF format.
            dc.Save(outFile, new PdfSaveOptions()
            {
                Compliance = PdfCompliance.PDF_14
            });

            // Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
            {
                UseShellExecute = true
            });
        }
Exemple #10
0
        /// <summary>
        /// How to replace a hyperlink URL by a new address.
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/hyperlinks-replace-url-csharp-vb-net.php
        /// </remarks>
        public static void ReplaceHyperlinksURL()
        {
            // Let us say, we've a DOCX document.
            // And we've to replace the all URLs by the custom.
            // Furthermore, let's save the result as PDF.

            string inpFile = @"..\..\Hyperlinks example.docx";
            string outFile = @"Result - URL.pdf";

            // Let's open our document.
            DocumentCore dc = DocumentCore.Load(inpFile);

            // Specify the custom URL.
            string customURL = "https://www.sautinsoft.com";

            // Loop by all hyperlinks and replace the URL (address).
            foreach (Hyperlink hpl in dc.GetChildElements(true, ElementType.Hyperlink))
            {
                hpl.Address = customURL;
            }

            // Save our document back, but in PDF format.
            dc.Save(outFile, new PdfSaveOptions()
            {
                Compliance = PdfCompliance.PDF_14
            });

            // Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
            {
                UseShellExecute = true
            });
        }
Exemple #11
0
        public static void TOC_Update()
        {
            // Let's create a simple document.
            DocumentCore dc = new DocumentCore();

            // DocumentCore.Serial = "put your serial here";

            //It's easy to load any document.
            dc = DocumentCore.Load(@"..\..\..\..\..\..\Testing Files\toc.docx");

            // Update TOC (TOC can be updated only after all document content is added).
            var toc = (TableOfEntries)dc.GetChildElements(true, ElementType.TableOfEntries).FirstOrDefault();

            toc.Update();

            // Update TOC's page numbers.
            // Page numbers are automatically updated in that case.
            dc.GetPaginator(new PaginatorOptions()
            {
                UpdateFields = true
            });

            // Save DOCX to a file
            dc.Save(@"..\..\..\..\..\..\Testing Files\TOC_Updated.docx");
            ShowResult(@"..\..\..\..\..\..\Testing Files\TOC_Updated.docx");
        }
Exemple #12
0
        public List <Paragraph> GetParagraphs(DocumentCore dc)
        {
            ParagraphList = new List <Paragraph>();
            foreach (Paragraph par in dc.GetChildElements(true, ElementType.Paragraph))
            {
                ParagraphList.Add(par);
            }

            return(ParagraphList);
        }
Exemple #13
0
        /// <summary>
        /// Calculate sections, paragraphs, inlines, runs and fields in DOCX document.
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/iteration-in-element-collection-net-csharp-vb.php
        /// </remarks>
        static void IterationElement()
        {
            DocumentCore  dc = DocumentCore.Load(@"..\..\Parsing.docx", LoadOptions.DocxDefault);
            int           numberOfSections      = dc.Sections.Count;
            int           numberOfParagraphs    = dc.GetChildElements(true, ElementType.Paragraph).Count();
            int           numberOfRunsAndFields = dc.GetChildElements(true, ElementType.Run, ElementType.Field).Count();
            int           numberOfInlines       = dc.GetChildElements(true).OfType <Inline>().Count();
            int           elements = dc.Sections[0].GetChildElements(true).Count();
            StringBuilder sb       = new StringBuilder();

            sb.AppendLine("File has:");
            sb.AppendLine(numberOfSections + " section");
            sb.AppendLine(numberOfParagraphs + " paragraphs");
            sb.AppendLine(numberOfRunsAndFields + " runs and fields");
            sb.AppendLine(numberOfInlines + " inlines");
            sb.AppendLine("First section contains " + elements + " elements");
            Console.WriteLine(sb.ToString());
            Console.ReadKey();
        }
Exemple #14
0
        /// <summary>
        /// Find all paragraphs in a document marked as list (ordered or unordered).
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/elementcollection-linq.php
        /// </remarks>
        static void ShowLists()
        {
            string       filePath = @"..\..\example.docx";
            DocumentCore dc       = DocumentCore.Load(filePath);

            foreach (Paragraph p in dc.GetChildElements(true, ElementType.Paragraph).Where(p => (p as Paragraph).ListFormat.IsList))
            {
                Console.WriteLine(p.Content.ToString());
            }
            Console.ReadKey();
        }
Exemple #15
0
        /// <summary>
        /// Loads an existing DOCX document and renders all paragraphs to Console.
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/get-paragraphs-from-docx-document-net-csharp-vb.php
        /// </remarks>
        static void GetParagraphs()
        {
            string       filePath = @"..\..\example.docx";
            DocumentCore dc       = DocumentCore.Load(filePath);

            foreach (Paragraph par in dc.GetChildElements(true, ElementType.Paragraph))
            {
                Console.WriteLine(par.Content.ToString());
            }
            Console.ReadKey();
        }
Exemple #16
0
        public static void SingleDigitalSignature()
        {
            // Path to a loadable document.
            string loadPath = @"C:\workspace\PDFDigitalSign\Resource\digitalsignature.docx";
            string savePath = @"C:\workspace\PDFDigitalSign\Resource\Result1.pdf";

            DocumentCore dc = DocumentCore.Load(loadPath);

            // Create a new invisible Shape for the digital signature.
            // Place the Shape into top-left corner (0 mm, 0 mm) of page.
            Shape signatureShape = new Shape(dc, Layout.Floating(new HorizontalPosition(0f, LengthUnit.Millimeter, HorizontalPositionAnchor.LeftMargin),
                                                                 new VerticalPosition(0f, LengthUnit.Millimeter, VerticalPositionAnchor.TopMargin), new Size(1, 1)));

            ((FloatingLayout)signatureShape.Layout).WrappingStyle = WrappingStyle.InFrontOfText;
            signatureShape.Outline.Fill.SetEmpty();

            // Find a first paragraph and insert our Shape inside it.
            Paragraph firstPar = dc.GetChildElements(true).OfType <Paragraph>().FirstOrDefault();

            firstPar.Inlines.Add(signatureShape);

            Picture signaturePict = new Picture(dc, @"C:\workspace\PDFDigitalSign\Resource\sign1.png");

            // Signature picture will be positioned:
            // 14.5 cm from Top of the Shape.
            // 4.5 cm from Left of the Shape.
            signaturePict.Layout = Layout.Floating(
                new HorizontalPosition(4.5, LengthUnit.Centimeter, HorizontalPositionAnchor.Page),
                new VerticalPosition(14.5, LengthUnit.Centimeter, VerticalPositionAnchor.Page),
                new Size(20, 10, LengthUnit.Millimeter));

            PdfSaveOptions options = new PdfSaveOptions();

            // Path to the certificate (*.pfx).
            options.DigitalSignature.CertificatePath = @"C:\workspace\PDFDigitalSign\Resource\sautinsoft.pfx";

            options.DigitalSignature.CertificatePassword = "******";

            options.DigitalSignature.Location    = "World Wide Web";
            options.DigitalSignature.Reason      = "Test Signature 1";
            options.DigitalSignature.ContactInfo = "*****@*****.**";

            options.DigitalSignature.SignatureLine = signatureShape;

            options.DigitalSignature.Signature = signaturePict;

            dc.Save(savePath, options);

            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(savePath)
            {
                UseShellExecute = true
            });
        }
Exemple #17
0
        /// <summary>
        /// How to replace a hyperlink content and formatting.
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/hyperlinks-replace-url-csharp-vb-net.php
        /// </remarks>
        public static void ReplaceHyperlinksByText()
        {
            // Let us say, we've a DOCX document.
            // And we need to replace all hyperlinks by their text, color this text by red.
            // Also we have to preserve the rest formatting: font family, size and so on.

            string inpFile = @"..\..\Hyperlinks example.docx";
            string outFile = @"Result - Replace By Text.docx";

            // Let's open our document.
            DocumentCore dc = DocumentCore.Load(inpFile);

            // Loop by all hyperlinks in a reverse, to remove the "Hyperlink" objects
            // and replace them by "Inline" objects.
            foreach (Hyperlink hpl in dc.GetChildElements(true, ElementType.Hyperlink).Reverse())
            {
                // Check that the Hyperlink is specified for a text element.
                if (hpl.DisplayInlines != null && hpl.DisplayInlines.Count > 0 && hpl.DisplayInlines[0] is Run)
                {
                    // Get the "Hyperlink" index in the parent collection.
                    InlineCollection parentCollection = hpl.ParentCollection;
                    int index = parentCollection.IndexOf(hpl);

                    // Get the "Hyperlink" text as the Inline collection.
                    InlineCollection textInlines = hpl.DisplayInlines;

                    // Remove the "Hyperlink" object from the parent collection by index.
                    parentCollection.RemoveAt(index);

                    // Insert the text (collection of Inlines) instead of the removed "Hyperlink" object
                    // into the parent collection.
                    for (int i = 0; i < textInlines.Count; i++)
                    {
                        // Set the red font color, remove underline.
                        if (textInlines[i] is Run)
                        {
                            (textInlines[i] as Run).CharacterFormat.FontColor      = Color.Red;
                            (textInlines[i] as Run).CharacterFormat.UnderlineStyle = UnderlineType.None;
                        }
                        parentCollection.Insert(index + i, textInlines[i].Clone(true));
                    }
                }
            }

            // Save our document back.
            dc.Save(outFile);

            // Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
            {
                UseShellExecute = true
            });
        }
Exemple #18
0
        /// <summary>
        /// How to delete all hyperlinks but preserve only their text.
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/hyperlinks-delete-url-csharp-vb-net.php
        /// </remarks>
        public static void DeleteHyperlinksURL()
        {
            // Let us say, we've a DOCX document.
            // And we've to remove all hyperlinks, preserve only their text.

            // Note, we can't make the property 'Hyperlink.Address' empty, this is not allowed.
            // Therefore we have to remove the all 'Hyperlinks' object and
            // insert the text objects 'Inline' instead of them.

            string inpFile = @"..\..\Hyperlinks example.docx";
            string outFile = @"Result - delete links and preserve text.docx";

            // Let's open our document.
            DocumentCore dc = DocumentCore.Load(inpFile);

            // Loop by all hyperlinks in a reverse, to remove the "Hyperlink" objects
            // and replace them by their text ("Inline" objects).
            foreach (Hyperlink hpl in dc.GetChildElements(true, ElementType.Hyperlink).Reverse())
            {
                // Get the "Hyperlink" index in the parent collection.
                InlineCollection parentCollection = hpl.ParentCollection;
                int index = parentCollection.IndexOf(hpl);

                // Get the "Hyperlink" text as the Inline collection.
                InlineCollection textInlines = hpl.DisplayInlines;

                // Remove the "Hyperlink" object from the parent collection by index.
                parentCollection.RemoveAt(index);

                // Insert the text (collection of Inlines) instead of the removed "Hyperlink" object
                // into the parent collection.
                for (int i = 0; i < textInlines.Count; i++)
                {
                    // Set the Auto font color (Black for the most cases) and remove the underline.
                    // Hide these lines if you want to preserve the formatting the same as the hyperlink had.
                    if (textInlines[i] is Run)
                    {
                        (textInlines[i] as Run).CharacterFormat.FontColor      = Color.Auto;
                        (textInlines[i] as Run).CharacterFormat.UnderlineStyle = UnderlineType.None;
                    }
                    parentCollection.Insert(index + i, textInlines[i].Clone(true));
                }
            }
            // Save the document back.
            dc.Save(outFile);

            // Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
            {
                UseShellExecute = true
            });
        }
Exemple #19
0
        /// <summary>
        /// Get all Text (Run objects) from DOCX document and show it on Console.
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/get-text-from-docx-document-net-csharp-vb.php
        /// </remarks>
        static void GetText()
        {
            string       filePath = @"..\..\example.docx";
            DocumentCore dc       = DocumentCore.Load(filePath);

            // Get all Run elements from document.
            foreach (Run run in dc.GetChildElements(true, ElementType.Run))
            {
                Console.WriteLine(run.Text);
            }

            Console.ReadKey();
        }
Exemple #20
0
        /// <summary>
        /// How to remove the rows with the specified text from a table.
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/from-customers-find-text-from-table-net-csharp-vb.php
        /// </remarks>
        public static void FindTextFromTable()
        {
            int longLiverMinYears = 90;

            string inpFile = @"..\..\example.docx";
            string outFile = Path.ChangeExtension(inpFile, ".pdf");

            // Load a document with a table containing various persons with different age.
            DocumentCore dc = DocumentCore.Load(inpFile);

            // Find a first table in the document.
            Table table = (Table)dc.GetChildElements(true, ElementType.Table).First();

            // Loop by the all rows from the end.
            // Find long-livers.
            bool isLongLiver = false;

            for (int r = table.Rows.Count - 1; r > 0; r--)
            {
                isLongLiver = false;

                // Take the 3rd cell with the birth date.
                TableCell tc = table.Rows[r].Cells[2];

                // Get the birth date.
                DateTime birthDate = DateTime.Now;
                if (DateTime.TryParse(tc.Content.ToString(), CultureInfo.CreateSpecificCulture("en-US"), DateTimeStyles.None, out birthDate))
                {
                    // Get the person age.
                    // Remove the row if the person isn't long-liver.
                    if (CalculateAge(birthDate) >= longLiverMinYears)
                    {
                        isLongLiver = true;
                    }
                }
                // Remove the row if it doesn't contain a long-liver.
                if (!isLongLiver)
                {
                    table.Rows.RemoveAt(r);
                }
            }

            // Save the document as PDF.
            dc.Save(outFile, new PdfSaveOptions());

            // Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
            {
                UseShellExecute = true
            });
        }
Exemple #21
0
        /// <summary>
        /// Load a scanned PDF document with help of Tesseract OCR (free OCR library) and save the result as DOCX document.
        /// </summary>
        /// <remarks>
        /// Details: https://www.sautinsoft.com/products/document/help/net/developer-guide/ocr-load-scanned-pdf-using-tesseract-and-save-as-docx-net-csharp-vb.php
        /// </remarks>
        static void LoadScannedPdf()
        {
            DocumentCore.Serial = "12345";
            // Here we'll load a scanned PDF document (perform OCR) containing a text on English, Russian and Vietnamese.
            // Next save the OCR result as a new DOCX document.

            // First steps:

            // 1. Download data files for English, Russian and Vietnamese languages.
            // Please download the files: eng.traineddata, rus.traineddata and vie.traineddata.
            // From here (good and fast): https://github.com/tesseract-ocr/tessdata_fast
            // or (best and slow): https://github.com/tesseract-ocr/tessdata_best

            // 2. Copy the files: eng.traineddata, rus.traineddata and vie.traineddata to
            // the folder "tessdata" in the Project root.

            // 3. Be sure that the folder "tessdata" also contains "pdf.ttf" file.

            // Let's start:
            string inpFile = @"..\..\scan.pdf";
            string outFile = "Result.docx";

            PdfLoadOptions lo = new PdfLoadOptions();

            lo.OCROptions.OCRMode    = OCRMode.Enabled;
            lo.PreserveEmbeddedFonts = true;

            // You can specify all Tesseract parameters inside the method PerformOCR.
            lo.OCROptions.Method = PerformOCRTesseract;
            DocumentCore dc = DocumentCore.Load(inpFile, lo);

            // Make all text visible after Tesseract OCR (change font color to Black).
            // The matter is that Tesseract returns OCR result PDF document with invisible text.
            // But with help of Document .Net, we can change the text color,
            // char scaling and spacing to desired.
            foreach (Run r in dc.GetChildElements(true, ElementType.Run))
            {
                r.CharacterFormat.FontColor = SautinSoft.Document.Color.Black;
                r.CharacterFormat.Scaling   = 100;
                r.CharacterFormat.Spacing   = 0;
            }

            dc.Save(outFile);

            // Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
            {
                UseShellExecute = true
            });
        }
        /// Load an existing document (*.docx, *.rtf, *.pdf, *.html, *.txt, *.pdf) and save it in a PDF document with the digital signature.
        public static void DigitalSignature()
        {
            // Path to a loadable document.
            string loadPath = @"C:\Users\admin\Desktop\Test\Test\Test.pdf";

            DocumentCore dc = DocumentCore.Load(loadPath);

            // Signature line added with MS Word -> Insert tab -> Signature Line button by default has description 'Microsoft Office Signature Line...'.
            ShapeBase signatureLine = dc.GetChildElements(true).OfType <ShapeBase>().FirstOrDefault();

            // This picture symbolizes a handwritten signature
            Picture signature = new Picture(dc, "C:\\Users\\admin\\Desktop\\Test\\Test\\signature.png");

            // Signature in this document will be 4.5 cm right of TopLeft position of signature line
            // and 4.5 cm below of TopLeft position of signature line.
            signature.Layout = Layout.Floating(
                new HorizontalPosition(4.5, LengthUnit.Centimeter, HorizontalPositionAnchor.Page),
                new VerticalPosition(-4.5, LengthUnit.Centimeter, VerticalPositionAnchor.Page),
                signature.Layout.Size);

            dc.Sections.Last().Blocks.Add(
                new Paragraph(dc, signature));

            //signature.Layout = Layout.Inline(signature.Layout.Size);
            PdfSaveOptions options = new PdfSaveOptions();

            // Path to the certificate (*.pfx).
            options.DigitalSignature.CertificatePath = "C:\\Users\\admin\\Desktop\\Test\\Test\\xyz.pfx";

            // Password of the certificate.
            options.DigitalSignature.CertificatePassword = "******";

            // Additional information about the certificate.
            options.DigitalSignature.Location    = "World Wide Web";
            options.DigitalSignature.Reason      = "Document.Net by SautiSoft";
            options.DigitalSignature.ContactInfo = "*****@*****.**";

            // Placeholder where signature should be visualized.
            options.DigitalSignature.SignatureLine = signatureLine;

            // Visual representation of digital signature.
            options.DigitalSignature.Signature = signature;

            string savePath = Path.ChangeExtension(loadPath, ".pdf");

            dc.Save(savePath, options);
            ShowResult(savePath);
        }
Exemple #23
0
        private string[] ReadWords(DocumentCore dc)
        {
            var allWords = new List <string>();

            foreach (Run element in dc.GetChildElements(true, ElementType.Run))
            {
                var matches = Regex.Matches(element.Text, @"\b[\w']+\b");

                var words = from m in matches.Cast <Match>()
                            select m.Value;

                allWords.AddRange(words);
            }

            return(allWords.ToArray());
        }
Exemple #24
0
        /// <summary>
        /// Find all paragraphs aligned by center in DOCX document and mark it by yellow.
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/find-paragraphs-in-docx-document-net-csharp-vb.php
        /// </remarks>
        static void FindParagraph()
        {
            string       filePath   = @"..\..\example.docx";
            string       fileResult = @"Result.docx";
            DocumentCore dc         = DocumentCore.Load(filePath);

            foreach (Paragraph par in dc.GetChildElements(true, ElementType.Paragraph).
                     Where(p => (p as Paragraph).ParagraphFormat.Alignment == HorizontalAlignment.Center))
            {
                par.ParagraphFormat.BackgroundColor = Color.Yellow;
            }
            dc.Save(fileResult);
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(fileResult)
            {
                UseShellExecute = true
            });
        }
Exemple #25
0
        /// <summary>
        /// The method converts a PDF document with scanned images to Word. But it works only if the PDF document contains a hidden text atop of the images.
        /// </summary>
        /// <remarks>
        /// Details: https://www.sautinsoft.com/products/document/help/net/developer-guide/from-customers-scanned-pdf-to-word-in-csharp-vb-net.php
        /// </remarks>
        static void ScannedPdfToWord()
        {
            // Actually there are a lot of PDF documents which looks like created using a scanner,
            // but they also contain a hidden text atop of the contents.
            // This hidden text duplicates the content of the scanned images.
            // This is made specially to have the ability to perform the 'find' operation.

            // Our steps:
            // 1. Load the PDF with the these settings:
            // - show hidden text;
            // - skip all images during the loading.
            // 2. Change the font color to the 'Black' for the all text.
            // 3. Save the document as DOCX.
            string inpFile = @"..\..\Scanned.pdf";
            string outFile = @"Result.docx";

            PdfLoadOptions pdfLO = new PdfLoadOptions()
            {
                PreserveEmbeddedFonts = true,
                PreserveImages        = false,
                ShowInvisibleText     = true,
            };

            DocumentCore dc = DocumentCore.Load(inpFile, pdfLO);

            dc.DefaultCharacterFormat.FontColor = Color.Black;
            foreach (Element element in dc.GetChildElements(true, ElementType.Paragraph))
            {
                foreach (Inline inline in (element as Paragraph).Inlines)
                {
                    if (inline is Run)
                    {
                        (inline as Run).CharacterFormat.FontColor = Color.Black;
                    }
                }
                (element as Paragraph).CharacterFormatForParagraphMark.FontColor = Color.Black;
            }
            dc.Save(outFile);

            // Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
            {
                UseShellExecute = true
            });
        }
Exemple #26
0
        private void Button_Click_1(object sender, RoutedEventArgs e)
        {
            FileInfo pathToDocx = new FileInfo(@"C:\Users\Faizi\Desktop\zzzz.docx");

            // Let's parse docx docuemnt and get all text from it.
            DocumentCore docx = DocumentCore.Load(pathToDocx.FullName);

            StringBuilder text = new StringBuilder();

            foreach (var par in docx.GetChildElements(true, ElementType.TableRow))
            {
                // MessageBox.Show((par.Content.ToString()));
                Console.WriteLine((par.Content.ToString()));
            }

            // Show extracted text.

            //Console.ReadLine();
        }
Exemple #27
0
        static void Main(string[] args)
        {
            Console.WriteLine("Processing...............");

            DocumentCore dcObj = DocumentCore.Load(blankdoc);


            Shape signatureShape = new Shape(dcObj, Layout.Floating(new HorizontalPosition(0f, LengthUnit.Millimeter, HorizontalPositionAnchor.LeftMargin),
                                                                    new VerticalPosition(0f, LengthUnit.Millimeter, VerticalPositionAnchor.TopMargin), new Size(3, 3)));

            ((FloatingLayout)signatureShape.Layout).WrappingStyle = WrappingStyle.InFrontOfText;
            signatureShape.Outline.Fill.SetEmpty();

            Paragraph firstPar = dcObj.GetChildElements(true).OfType <Paragraph>().FirstOrDefault();

            firstPar.Inlines.Add(signatureShape);


            Picture signaturePict = new Picture(dcObj, sign);

            signaturePict.Layout = Layout.Floating(
                new HorizontalPosition(2.5, LengthUnit.Centimeter, HorizontalPositionAnchor.Page),
                new VerticalPosition(4.5, LengthUnit.Centimeter, VerticalPositionAnchor.Page),
                new Size(10, 5, LengthUnit.Centimeter));

            PdfSaveOptions options = new PdfSaveOptions();


            options.DigitalSignature.CertificatePath     = @"..\..\sautinsoft.pfx";
            options.DigitalSignature.CertificatePassword = "******";


            options.DigitalSignature.SignatureLine = signatureShape;
            options.DigitalSignature.Signature     = signaturePict;


            dcObj.Save(resultpdf, options);
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(resultpdf)
            {
                UseShellExecute = true
            });
        }
Exemple #28
0
        /// <summary>
        /// Get a Picture size, Change it and Save the document back.
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/get-and-change-picture-size-in-docx-csharp-vb-net.php
        /// </remarks>
        public static void GetAndChangePictureSize()
        {
            // Path to a document where to extract pictures.
            string inpFile = @"..\..\example.docx";
            string outFile = "Result.docx";


            // Load the document.
            DocumentCore dc = DocumentCore.Load(inpFile);

            // Get the physical size of the first picture from the document.
            Picture pict = dc.GetChildElements(true, ElementType.Picture).FirstOrDefault() as Picture;
            Size    size = pict.Layout.Size;

            Console.WriteLine("The 1st picture has this size:\r\n");
            Console.WriteLine("W: {0}, H: {1} (In points)", size.Width, size.Height);
            Console.WriteLine("W: {0}, H: {1} (In pixels)", LengthUnitConverter.Convert(size.Width, LengthUnit.Point, LengthUnit.Pixel),
                              LengthUnitConverter.Convert(size.Height, LengthUnit.Point, LengthUnit.Pixel));
            Console.WriteLine("W: {0:F2}, H: {1:F2} (In mm)", LengthUnitConverter.Convert(size.Width, LengthUnit.Point, LengthUnit.Millimeter),
                              LengthUnitConverter.Convert(size.Height, LengthUnit.Point, LengthUnit.Millimeter));
            Console.WriteLine("W: {0:F2}, H: {1:F2} (In cm)", LengthUnitConverter.Convert(size.Width, LengthUnit.Point, LengthUnit.Centimeter),
                              LengthUnitConverter.Convert(size.Height, LengthUnit.Point, LengthUnit.Centimeter));
            Console.WriteLine("W: {0:F2}, H: {1:F2} (In inches)", LengthUnitConverter.Convert(size.Width, LengthUnit.Point, LengthUnit.Inch),
                              LengthUnitConverter.Convert(size.Height, LengthUnit.Point, LengthUnit.Inch));

            Console.WriteLine("\r\nNow let\'s increase the picture size in x1.5 times. Press any key ...");
            Console.ReadKey();

            // Note, we don't change the physical picture size  we only scale/stretch the it.
            pict.Layout.Size = new Size(size.Width * 1.5, size.Height * 1.5);

            // Save the document as a new docx file.
            dc.Save(outFile);

            // Open the result for demonstration purposes.
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile)
            {
                UseShellExecute = true
            });
        }
        public string GetText(string fileName)
        {
            var filename = _hostingEnvironment.WebRootPath + fileName;

            FileInfo pathToDocx = new FileInfo(filename);

            // Let's parse docx docuemnt and get all text from it.
            DocumentCore docx = DocumentCore.Load(pathToDocx.FullName);

            StringBuilder text = new StringBuilder();

            foreach (Paragr par in docx.GetChildElements(true, ElementType.Paragraph))
            {
                foreach (Run run in par.GetChildElements(true, ElementType.Run))
                {
                    text.Append(run.Text);
                }
                text.AppendLine();
            }

            return(text.ToString());
        }
Exemple #30
0
        /// <summary>
        /// Loads an existing DOCX document and calculates all 'Run' objects.
        /// </summary>
        /// <remarks>
        /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/run-element-text-in-docx-document-net-csharp-vb.php
        /// </remarks>
        static void CalculateRuns()
        {
            string       filePath       = @"..\..\example.docx";
            DocumentCore dc             = DocumentCore.Load(filePath);
            string       filePathResult = @"Result-file.docx";

            foreach (Paragraph par in dc.GetChildElements(true, ElementType.Paragraph))
            {
                int totalRuns = par.GetChildElements(true, ElementType.Run).Count();

                Run r = new Run(dc, "<<This paragraph contains " + totalRuns.ToString() + " Run(s)>>", new CharacterFormat()
                {
                    BackgroundColor = Color.Yellow, Size = 10, FontColor = Color.Black
                });
                par.Content.End.Insert(r.Content);
            }
            dc.Save(filePathResult);
            System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(filePathResult)
            {
                UseShellExecute = true
            });
        }