/// <summary> /// Merge all paragraphs into a single in an existing PDF document. /// </summary> /// <remarks> /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/merge-paragraphs-in-pdf-document-net-csharp-vb.php /// </remarks> static void MergeParagraphs() { string inpFile = @"..\..\example.pdf"; string outFile = @"Result.pdf"; DocumentCore dc = DocumentCore.Load(inpFile); Paragraph firstPar = dc.GetChildElements(true, ElementType.Paragraph).First() as Paragraph; int lastIndex = firstPar.Inlines.Count; foreach (Paragraph par in dc.GetChildElements(true, ElementType.Paragraph).Reverse().Where(p => p != firstPar)) { int last = lastIndex; foreach (Inline inline in par.Inlines) { firstPar.Inlines.Insert(last++, inline.Clone(true)); } par.Content.Delete(); } dc.Save(outFile); System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(inpFile) { UseShellExecute = true }); System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); }
/// <summary> /// How to get a content from a document. /// </summary> /// <remarks> /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/get-content-net-csharp-vb.php /// </remarks> public static void GetContent() { // Path to an input document. string documentPath = @"..\..\example.docx"; DocumentCore dc = DocumentCore.Load(documentPath); StringBuilder sb = new StringBuilder(); // Get content of each paragraph in the document. foreach (Paragraph par in dc.GetChildElements(true, ElementType.Paragraph)) { // The property 'Content' returns the content as ContentRange. // Get content and append it into StringBuilder. sb.AppendFormat("Paragraph: {0}", par.Content.ToString()); sb.AppendLine(); } // Get content of each Run where the text color is Red. foreach (Run run in dc.GetChildElements(true, ElementType.Run)) { if (run.CharacterFormat.FontColor == Color.Red) { // The property 'Content' returns the content as ContentRange. // Get content and append it into StringBuilder. sb.AppendFormat("Red color: {0}", run.Content.ToString()); sb.AppendLine(); } } Console.WriteLine(sb.ToString()); Console.ReadKey(); }
public void LoadPages(int pagesCount) { for (int i = 0; i < pagesCount; i++) { List <SautinSoft.Document.Tables.TableRow> rowContent = new List <SautinSoft.Document.Tables.TableRow>(); DocumentCore dc = DocumentCore.Load(folderPath + @"\Page - " + (i + 1).ToString() + ".pdf"); foreach (SautinSoft.Document.Tables.TableRow run in dc.GetChildElements(true, ElementType.TableRow)) { rowContent.Add(run); } ; foreach (SautinSoft.Document.Section run in dc.GetChildElements(true, ElementType.Section)) { date = run.Blocks[run.Blocks.Count - 2].Content.ToString().Replace("\r\n", "").Substring(4, 10); break; } ; foreach (SautinSoft.Document.Paragraph run in dc.GetChildElements(true, ElementType.Paragraph)) { if (run.Content.ToString().Contains("Дата принятия уполномоченным банком")) { adoptionDate = run.Inlines[7].Content.ToString(); break; } } ; ParseAndGetInfo(rowContent, i + 1); } }
// How to modify an existing table in a document. public static void ModifyTable() { string sourcePath = @"..\..\..\..\..\..\Testing Files\table.docx"; string destPath = Path.ChangeExtension(sourcePath, ".modified.pdf"); // Load a document with a table. DocumentCore dc = DocumentCore.Load(sourcePath); // Find a first table in the document. Table table = (Table)dc.GetChildElements(true, ElementType.Table).First(); // Set dashed borders and yellow background for all cells. for (int r = 0; r < table.Rows.Count; r++) { for (int c = 0; c < table.Rows[r].Cells.Count; c++) { TableCell cell = table.Rows[r].Cells[c]; cell.CellFormat.Borders.SetBorders(MultipleBorderTypes.Outside, BorderStyle.Dashed, Color.Black, 1); cell.CellFormat.BackgroundColor = new Color("#FFCC00"); } } // Save the document as PDF. dc.Save(destPath, new PdfSaveOptions()); // Show the source and the dest documents. System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(sourcePath) { UseShellExecute = true }); System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(destPath) { UseShellExecute = true }); }
/// <summary> /// Creates a document containing FormDropDown element. /// </summary> /// <remarks> /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/advanced.php /// </remarks> static void FormDropDown() { string filePath = @"Advanced.pdf"; // Let's create document. DocumentCore dc = new DocumentCore(); dc.Content.End.Insert(new Paragraph(dc, "The paragraph with FormDropDown element: ").Content); Paragraph par = dc.GetChildElements(true, ElementType.Paragraph).FirstOrDefault() as Paragraph; FormDropDownData field = new Field(dc, FieldType.FormDropDown).FormData as FormDropDownData; field.Items.Add("First Item"); field.Items.Add("Second Item"); field.Items.Add("Third Item"); field.SelectedItemIndex = 2; par.Inlines.Add(field.Field); // Save our document. dc.Save(filePath); // Open the result for demonstration purposes. System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(filePath) { UseShellExecute = true }); }
private void buttonGetDocument_Click(object sender, EventArgs e) { SetDefaultDataTable(); List <string> lS1 = new List <string>(); // Лист всех элементов из .docx OpenFileDialog opf = new OpenFileDialog(); opf.Filter = "Word 2007 Documents (*.docx)|*.docx"; if (opf.ShowDialog() == DialogResult.OK) { string filename = opf.FileName; // Path to Docx file. DocumentCore dc = DocumentCore.Load(filename); StringBuilder sb = new StringBuilder(); // Get content of each Run where the text color is Red. foreach (Paragraph run in dc.GetChildElements(true, ElementType.Paragraph)) { string str = run.Content.ToString(); string[] strpath = str.Split('\r'); // Не удавалось расплитить по '\r\n'. Расплититл по '\r'. str = strpath[0]; // Хвост отбросил. if (str != "") // Проверка на какой-либо элемент. В том числе и филды. { lS1.Add(str); // Список всех элементов документа, где каждый первый - Id, а каждый второй - дата. } } ToTable TTable = new ToTable(); string FieldId = lS1[0]; string FieldData = lS1[1]; lS1.Remove(FieldId); lS1.Remove(FieldData); DataTable DT = TTable.ConvertToTable(lS1, FieldId, FieldData, GetPathAndName(opf.FileName)[1]); dataGridView1.DataSource = DT; } }
/// <summary> /// Replace all Run elements with Bold formatting to Italic and mark them by yellow. /// </summary> /// <remarks> /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/manipulation.php /// </remarks> static void Manipulation() { string filePath = @"..\..\example.docx"; DocumentCore dc = DocumentCore.Load(filePath); string filePathResult = @"Result-file.pdf"; foreach (Run run in dc.GetChildElements(true, ElementType.Run)) { if (run.CharacterFormat.Bold == true) { run.CharacterFormat.Bold = false; run.CharacterFormat.Italic = true; run.CharacterFormat.BackgroundColor = Color.Yellow; } } dc.Save(filePathResult); System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(filePath) { UseShellExecute = true }); System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(filePathResult) { UseShellExecute = true }); }
public static void ExtractPictures() { // Path to a document where to extract pictures. string filePath = @"..\..\..\..\..\..\Testing Files\example.pdf"; // Directory to store extracted pictures: DirectoryInfo imageDirectory = new DirectoryInfo(Path.GetDirectoryName(filePath)); string imageTemplate = "Picture"; // Here we store extracted images. List <ImageData> imageInventory = new List <ImageData>(); // Load the document. DocumentCore dc = DocumentCore.Load(filePath); // Extract all images from document, skip duplicates. foreach (Picture pict in dc.GetChildElements(true, ElementType.Picture)) { // Let's avoid the adding of duplicates. if (imageInventory.Exists((img => (img.GetStream().Length == pict.ImageData.GetStream().Length))) == false) { imageInventory.Add(pict.ImageData); } } // Save and show all images. for (int i = 0; i < imageInventory.Count; i++) { string imagePath = Path.Combine(imageDirectory.FullName, String.Format("{0}{1}.{2}", imageTemplate, i + 1, imageInventory[i].Format.ToString().ToLower())); File.WriteAllBytes(imagePath, imageInventory[i].GetStream().ToArray()); System.Diagnostics.Process.Start(imagePath); } }
/// <summary> /// How to delete all hyperlink objects. /// </summary> /// <remarks> /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/hyperlinks-delete-url-csharp-vb-net.php /// </remarks> public static void DeleteHyperlinksObjects() { // Let us say, we've a DOCX document. // And we've to remove the hyperlink objects. string inpFile = @"..\..\Hyperlinks example.docx"; string outFile = @"Result - Delete Hyperlinks completely.pdf"; // Let's open our document. DocumentCore dc = DocumentCore.Load(inpFile); // Loop by all hyperlinks and replace the URL (address). foreach (Hyperlink hpl in dc.GetChildElements(true, ElementType.Hyperlink).Reverse()) { hpl.ParentCollection.Remove(hpl); } // Save our document back, but in PDF format. dc.Save(outFile, new PdfSaveOptions() { Compliance = PdfCompliance.PDF_14 }); // Open the result for demonstration purposes. System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); }
/// <summary> /// How to replace a hyperlink URL by a new address. /// </summary> /// <remarks> /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/hyperlinks-replace-url-csharp-vb-net.php /// </remarks> public static void ReplaceHyperlinksURL() { // Let us say, we've a DOCX document. // And we've to replace the all URLs by the custom. // Furthermore, let's save the result as PDF. string inpFile = @"..\..\Hyperlinks example.docx"; string outFile = @"Result - URL.pdf"; // Let's open our document. DocumentCore dc = DocumentCore.Load(inpFile); // Specify the custom URL. string customURL = "https://www.sautinsoft.com"; // Loop by all hyperlinks and replace the URL (address). foreach (Hyperlink hpl in dc.GetChildElements(true, ElementType.Hyperlink)) { hpl.Address = customURL; } // Save our document back, but in PDF format. dc.Save(outFile, new PdfSaveOptions() { Compliance = PdfCompliance.PDF_14 }); // Open the result for demonstration purposes. System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); }
public static void TOC_Update() { // Let's create a simple document. DocumentCore dc = new DocumentCore(); // DocumentCore.Serial = "put your serial here"; //It's easy to load any document. dc = DocumentCore.Load(@"..\..\..\..\..\..\Testing Files\toc.docx"); // Update TOC (TOC can be updated only after all document content is added). var toc = (TableOfEntries)dc.GetChildElements(true, ElementType.TableOfEntries).FirstOrDefault(); toc.Update(); // Update TOC's page numbers. // Page numbers are automatically updated in that case. dc.GetPaginator(new PaginatorOptions() { UpdateFields = true }); // Save DOCX to a file dc.Save(@"..\..\..\..\..\..\Testing Files\TOC_Updated.docx"); ShowResult(@"..\..\..\..\..\..\Testing Files\TOC_Updated.docx"); }
public List <Paragraph> GetParagraphs(DocumentCore dc) { ParagraphList = new List <Paragraph>(); foreach (Paragraph par in dc.GetChildElements(true, ElementType.Paragraph)) { ParagraphList.Add(par); } return(ParagraphList); }
/// <summary> /// Calculate sections, paragraphs, inlines, runs and fields in DOCX document. /// </summary> /// <remarks> /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/iteration-in-element-collection-net-csharp-vb.php /// </remarks> static void IterationElement() { DocumentCore dc = DocumentCore.Load(@"..\..\Parsing.docx", LoadOptions.DocxDefault); int numberOfSections = dc.Sections.Count; int numberOfParagraphs = dc.GetChildElements(true, ElementType.Paragraph).Count(); int numberOfRunsAndFields = dc.GetChildElements(true, ElementType.Run, ElementType.Field).Count(); int numberOfInlines = dc.GetChildElements(true).OfType <Inline>().Count(); int elements = dc.Sections[0].GetChildElements(true).Count(); StringBuilder sb = new StringBuilder(); sb.AppendLine("File has:"); sb.AppendLine(numberOfSections + " section"); sb.AppendLine(numberOfParagraphs + " paragraphs"); sb.AppendLine(numberOfRunsAndFields + " runs and fields"); sb.AppendLine(numberOfInlines + " inlines"); sb.AppendLine("First section contains " + elements + " elements"); Console.WriteLine(sb.ToString()); Console.ReadKey(); }
/// <summary> /// Find all paragraphs in a document marked as list (ordered or unordered). /// </summary> /// <remarks> /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/elementcollection-linq.php /// </remarks> static void ShowLists() { string filePath = @"..\..\example.docx"; DocumentCore dc = DocumentCore.Load(filePath); foreach (Paragraph p in dc.GetChildElements(true, ElementType.Paragraph).Where(p => (p as Paragraph).ListFormat.IsList)) { Console.WriteLine(p.Content.ToString()); } Console.ReadKey(); }
/// <summary> /// Loads an existing DOCX document and renders all paragraphs to Console. /// </summary> /// <remarks> /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/get-paragraphs-from-docx-document-net-csharp-vb.php /// </remarks> static void GetParagraphs() { string filePath = @"..\..\example.docx"; DocumentCore dc = DocumentCore.Load(filePath); foreach (Paragraph par in dc.GetChildElements(true, ElementType.Paragraph)) { Console.WriteLine(par.Content.ToString()); } Console.ReadKey(); }
public static void SingleDigitalSignature() { // Path to a loadable document. string loadPath = @"C:\workspace\PDFDigitalSign\Resource\digitalsignature.docx"; string savePath = @"C:\workspace\PDFDigitalSign\Resource\Result1.pdf"; DocumentCore dc = DocumentCore.Load(loadPath); // Create a new invisible Shape for the digital signature. // Place the Shape into top-left corner (0 mm, 0 mm) of page. Shape signatureShape = new Shape(dc, Layout.Floating(new HorizontalPosition(0f, LengthUnit.Millimeter, HorizontalPositionAnchor.LeftMargin), new VerticalPosition(0f, LengthUnit.Millimeter, VerticalPositionAnchor.TopMargin), new Size(1, 1))); ((FloatingLayout)signatureShape.Layout).WrappingStyle = WrappingStyle.InFrontOfText; signatureShape.Outline.Fill.SetEmpty(); // Find a first paragraph and insert our Shape inside it. Paragraph firstPar = dc.GetChildElements(true).OfType <Paragraph>().FirstOrDefault(); firstPar.Inlines.Add(signatureShape); Picture signaturePict = new Picture(dc, @"C:\workspace\PDFDigitalSign\Resource\sign1.png"); // Signature picture will be positioned: // 14.5 cm from Top of the Shape. // 4.5 cm from Left of the Shape. signaturePict.Layout = Layout.Floating( new HorizontalPosition(4.5, LengthUnit.Centimeter, HorizontalPositionAnchor.Page), new VerticalPosition(14.5, LengthUnit.Centimeter, VerticalPositionAnchor.Page), new Size(20, 10, LengthUnit.Millimeter)); PdfSaveOptions options = new PdfSaveOptions(); // Path to the certificate (*.pfx). options.DigitalSignature.CertificatePath = @"C:\workspace\PDFDigitalSign\Resource\sautinsoft.pfx"; options.DigitalSignature.CertificatePassword = "******"; options.DigitalSignature.Location = "World Wide Web"; options.DigitalSignature.Reason = "Test Signature 1"; options.DigitalSignature.ContactInfo = "*****@*****.**"; options.DigitalSignature.SignatureLine = signatureShape; options.DigitalSignature.Signature = signaturePict; dc.Save(savePath, options); System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(savePath) { UseShellExecute = true }); }
/// <summary> /// How to replace a hyperlink content and formatting. /// </summary> /// <remarks> /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/hyperlinks-replace-url-csharp-vb-net.php /// </remarks> public static void ReplaceHyperlinksByText() { // Let us say, we've a DOCX document. // And we need to replace all hyperlinks by their text, color this text by red. // Also we have to preserve the rest formatting: font family, size and so on. string inpFile = @"..\..\Hyperlinks example.docx"; string outFile = @"Result - Replace By Text.docx"; // Let's open our document. DocumentCore dc = DocumentCore.Load(inpFile); // Loop by all hyperlinks in a reverse, to remove the "Hyperlink" objects // and replace them by "Inline" objects. foreach (Hyperlink hpl in dc.GetChildElements(true, ElementType.Hyperlink).Reverse()) { // Check that the Hyperlink is specified for a text element. if (hpl.DisplayInlines != null && hpl.DisplayInlines.Count > 0 && hpl.DisplayInlines[0] is Run) { // Get the "Hyperlink" index in the parent collection. InlineCollection parentCollection = hpl.ParentCollection; int index = parentCollection.IndexOf(hpl); // Get the "Hyperlink" text as the Inline collection. InlineCollection textInlines = hpl.DisplayInlines; // Remove the "Hyperlink" object from the parent collection by index. parentCollection.RemoveAt(index); // Insert the text (collection of Inlines) instead of the removed "Hyperlink" object // into the parent collection. for (int i = 0; i < textInlines.Count; i++) { // Set the red font color, remove underline. if (textInlines[i] is Run) { (textInlines[i] as Run).CharacterFormat.FontColor = Color.Red; (textInlines[i] as Run).CharacterFormat.UnderlineStyle = UnderlineType.None; } parentCollection.Insert(index + i, textInlines[i].Clone(true)); } } } // Save our document back. dc.Save(outFile); // Open the result for demonstration purposes. System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); }
/// <summary> /// How to delete all hyperlinks but preserve only their text. /// </summary> /// <remarks> /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/hyperlinks-delete-url-csharp-vb-net.php /// </remarks> public static void DeleteHyperlinksURL() { // Let us say, we've a DOCX document. // And we've to remove all hyperlinks, preserve only their text. // Note, we can't make the property 'Hyperlink.Address' empty, this is not allowed. // Therefore we have to remove the all 'Hyperlinks' object and // insert the text objects 'Inline' instead of them. string inpFile = @"..\..\Hyperlinks example.docx"; string outFile = @"Result - delete links and preserve text.docx"; // Let's open our document. DocumentCore dc = DocumentCore.Load(inpFile); // Loop by all hyperlinks in a reverse, to remove the "Hyperlink" objects // and replace them by their text ("Inline" objects). foreach (Hyperlink hpl in dc.GetChildElements(true, ElementType.Hyperlink).Reverse()) { // Get the "Hyperlink" index in the parent collection. InlineCollection parentCollection = hpl.ParentCollection; int index = parentCollection.IndexOf(hpl); // Get the "Hyperlink" text as the Inline collection. InlineCollection textInlines = hpl.DisplayInlines; // Remove the "Hyperlink" object from the parent collection by index. parentCollection.RemoveAt(index); // Insert the text (collection of Inlines) instead of the removed "Hyperlink" object // into the parent collection. for (int i = 0; i < textInlines.Count; i++) { // Set the Auto font color (Black for the most cases) and remove the underline. // Hide these lines if you want to preserve the formatting the same as the hyperlink had. if (textInlines[i] is Run) { (textInlines[i] as Run).CharacterFormat.FontColor = Color.Auto; (textInlines[i] as Run).CharacterFormat.UnderlineStyle = UnderlineType.None; } parentCollection.Insert(index + i, textInlines[i].Clone(true)); } } // Save the document back. dc.Save(outFile); // Open the result for demonstration purposes. System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); }
/// <summary> /// Get all Text (Run objects) from DOCX document and show it on Console. /// </summary> /// <remarks> /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/get-text-from-docx-document-net-csharp-vb.php /// </remarks> static void GetText() { string filePath = @"..\..\example.docx"; DocumentCore dc = DocumentCore.Load(filePath); // Get all Run elements from document. foreach (Run run in dc.GetChildElements(true, ElementType.Run)) { Console.WriteLine(run.Text); } Console.ReadKey(); }
/// <summary> /// How to remove the rows with the specified text from a table. /// </summary> /// <remarks> /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/from-customers-find-text-from-table-net-csharp-vb.php /// </remarks> public static void FindTextFromTable() { int longLiverMinYears = 90; string inpFile = @"..\..\example.docx"; string outFile = Path.ChangeExtension(inpFile, ".pdf"); // Load a document with a table containing various persons with different age. DocumentCore dc = DocumentCore.Load(inpFile); // Find a first table in the document. Table table = (Table)dc.GetChildElements(true, ElementType.Table).First(); // Loop by the all rows from the end. // Find long-livers. bool isLongLiver = false; for (int r = table.Rows.Count - 1; r > 0; r--) { isLongLiver = false; // Take the 3rd cell with the birth date. TableCell tc = table.Rows[r].Cells[2]; // Get the birth date. DateTime birthDate = DateTime.Now; if (DateTime.TryParse(tc.Content.ToString(), CultureInfo.CreateSpecificCulture("en-US"), DateTimeStyles.None, out birthDate)) { // Get the person age. // Remove the row if the person isn't long-liver. if (CalculateAge(birthDate) >= longLiverMinYears) { isLongLiver = true; } } // Remove the row if it doesn't contain a long-liver. if (!isLongLiver) { table.Rows.RemoveAt(r); } } // Save the document as PDF. dc.Save(outFile, new PdfSaveOptions()); // Open the result for demonstration purposes. System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); }
/// <summary> /// Load a scanned PDF document with help of Tesseract OCR (free OCR library) and save the result as DOCX document. /// </summary> /// <remarks> /// Details: https://www.sautinsoft.com/products/document/help/net/developer-guide/ocr-load-scanned-pdf-using-tesseract-and-save-as-docx-net-csharp-vb.php /// </remarks> static void LoadScannedPdf() { DocumentCore.Serial = "12345"; // Here we'll load a scanned PDF document (perform OCR) containing a text on English, Russian and Vietnamese. // Next save the OCR result as a new DOCX document. // First steps: // 1. Download data files for English, Russian and Vietnamese languages. // Please download the files: eng.traineddata, rus.traineddata and vie.traineddata. // From here (good and fast): https://github.com/tesseract-ocr/tessdata_fast // or (best and slow): https://github.com/tesseract-ocr/tessdata_best // 2. Copy the files: eng.traineddata, rus.traineddata and vie.traineddata to // the folder "tessdata" in the Project root. // 3. Be sure that the folder "tessdata" also contains "pdf.ttf" file. // Let's start: string inpFile = @"..\..\scan.pdf"; string outFile = "Result.docx"; PdfLoadOptions lo = new PdfLoadOptions(); lo.OCROptions.OCRMode = OCRMode.Enabled; lo.PreserveEmbeddedFonts = true; // You can specify all Tesseract parameters inside the method PerformOCR. lo.OCROptions.Method = PerformOCRTesseract; DocumentCore dc = DocumentCore.Load(inpFile, lo); // Make all text visible after Tesseract OCR (change font color to Black). // The matter is that Tesseract returns OCR result PDF document with invisible text. // But with help of Document .Net, we can change the text color, // char scaling and spacing to desired. foreach (Run r in dc.GetChildElements(true, ElementType.Run)) { r.CharacterFormat.FontColor = SautinSoft.Document.Color.Black; r.CharacterFormat.Scaling = 100; r.CharacterFormat.Spacing = 0; } dc.Save(outFile); // Open the result for demonstration purposes. System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); }
/// Load an existing document (*.docx, *.rtf, *.pdf, *.html, *.txt, *.pdf) and save it in a PDF document with the digital signature. public static void DigitalSignature() { // Path to a loadable document. string loadPath = @"C:\Users\admin\Desktop\Test\Test\Test.pdf"; DocumentCore dc = DocumentCore.Load(loadPath); // Signature line added with MS Word -> Insert tab -> Signature Line button by default has description 'Microsoft Office Signature Line...'. ShapeBase signatureLine = dc.GetChildElements(true).OfType <ShapeBase>().FirstOrDefault(); // This picture symbolizes a handwritten signature Picture signature = new Picture(dc, "C:\\Users\\admin\\Desktop\\Test\\Test\\signature.png"); // Signature in this document will be 4.5 cm right of TopLeft position of signature line // and 4.5 cm below of TopLeft position of signature line. signature.Layout = Layout.Floating( new HorizontalPosition(4.5, LengthUnit.Centimeter, HorizontalPositionAnchor.Page), new VerticalPosition(-4.5, LengthUnit.Centimeter, VerticalPositionAnchor.Page), signature.Layout.Size); dc.Sections.Last().Blocks.Add( new Paragraph(dc, signature)); //signature.Layout = Layout.Inline(signature.Layout.Size); PdfSaveOptions options = new PdfSaveOptions(); // Path to the certificate (*.pfx). options.DigitalSignature.CertificatePath = "C:\\Users\\admin\\Desktop\\Test\\Test\\xyz.pfx"; // Password of the certificate. options.DigitalSignature.CertificatePassword = "******"; // Additional information about the certificate. options.DigitalSignature.Location = "World Wide Web"; options.DigitalSignature.Reason = "Document.Net by SautiSoft"; options.DigitalSignature.ContactInfo = "*****@*****.**"; // Placeholder where signature should be visualized. options.DigitalSignature.SignatureLine = signatureLine; // Visual representation of digital signature. options.DigitalSignature.Signature = signature; string savePath = Path.ChangeExtension(loadPath, ".pdf"); dc.Save(savePath, options); ShowResult(savePath); }
private string[] ReadWords(DocumentCore dc) { var allWords = new List <string>(); foreach (Run element in dc.GetChildElements(true, ElementType.Run)) { var matches = Regex.Matches(element.Text, @"\b[\w']+\b"); var words = from m in matches.Cast <Match>() select m.Value; allWords.AddRange(words); } return(allWords.ToArray()); }
/// <summary> /// Find all paragraphs aligned by center in DOCX document and mark it by yellow. /// </summary> /// <remarks> /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/find-paragraphs-in-docx-document-net-csharp-vb.php /// </remarks> static void FindParagraph() { string filePath = @"..\..\example.docx"; string fileResult = @"Result.docx"; DocumentCore dc = DocumentCore.Load(filePath); foreach (Paragraph par in dc.GetChildElements(true, ElementType.Paragraph). Where(p => (p as Paragraph).ParagraphFormat.Alignment == HorizontalAlignment.Center)) { par.ParagraphFormat.BackgroundColor = Color.Yellow; } dc.Save(fileResult); System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(fileResult) { UseShellExecute = true }); }
/// <summary> /// The method converts a PDF document with scanned images to Word. But it works only if the PDF document contains a hidden text atop of the images. /// </summary> /// <remarks> /// Details: https://www.sautinsoft.com/products/document/help/net/developer-guide/from-customers-scanned-pdf-to-word-in-csharp-vb-net.php /// </remarks> static void ScannedPdfToWord() { // Actually there are a lot of PDF documents which looks like created using a scanner, // but they also contain a hidden text atop of the contents. // This hidden text duplicates the content of the scanned images. // This is made specially to have the ability to perform the 'find' operation. // Our steps: // 1. Load the PDF with the these settings: // - show hidden text; // - skip all images during the loading. // 2. Change the font color to the 'Black' for the all text. // 3. Save the document as DOCX. string inpFile = @"..\..\Scanned.pdf"; string outFile = @"Result.docx"; PdfLoadOptions pdfLO = new PdfLoadOptions() { PreserveEmbeddedFonts = true, PreserveImages = false, ShowInvisibleText = true, }; DocumentCore dc = DocumentCore.Load(inpFile, pdfLO); dc.DefaultCharacterFormat.FontColor = Color.Black; foreach (Element element in dc.GetChildElements(true, ElementType.Paragraph)) { foreach (Inline inline in (element as Paragraph).Inlines) { if (inline is Run) { (inline as Run).CharacterFormat.FontColor = Color.Black; } } (element as Paragraph).CharacterFormatForParagraphMark.FontColor = Color.Black; } dc.Save(outFile); // Open the result for demonstration purposes. System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); }
private void Button_Click_1(object sender, RoutedEventArgs e) { FileInfo pathToDocx = new FileInfo(@"C:\Users\Faizi\Desktop\zzzz.docx"); // Let's parse docx docuemnt and get all text from it. DocumentCore docx = DocumentCore.Load(pathToDocx.FullName); StringBuilder text = new StringBuilder(); foreach (var par in docx.GetChildElements(true, ElementType.TableRow)) { // MessageBox.Show((par.Content.ToString())); Console.WriteLine((par.Content.ToString())); } // Show extracted text. //Console.ReadLine(); }
static void Main(string[] args) { Console.WriteLine("Processing..............."); DocumentCore dcObj = DocumentCore.Load(blankdoc); Shape signatureShape = new Shape(dcObj, Layout.Floating(new HorizontalPosition(0f, LengthUnit.Millimeter, HorizontalPositionAnchor.LeftMargin), new VerticalPosition(0f, LengthUnit.Millimeter, VerticalPositionAnchor.TopMargin), new Size(3, 3))); ((FloatingLayout)signatureShape.Layout).WrappingStyle = WrappingStyle.InFrontOfText; signatureShape.Outline.Fill.SetEmpty(); Paragraph firstPar = dcObj.GetChildElements(true).OfType <Paragraph>().FirstOrDefault(); firstPar.Inlines.Add(signatureShape); Picture signaturePict = new Picture(dcObj, sign); signaturePict.Layout = Layout.Floating( new HorizontalPosition(2.5, LengthUnit.Centimeter, HorizontalPositionAnchor.Page), new VerticalPosition(4.5, LengthUnit.Centimeter, VerticalPositionAnchor.Page), new Size(10, 5, LengthUnit.Centimeter)); PdfSaveOptions options = new PdfSaveOptions(); options.DigitalSignature.CertificatePath = @"..\..\sautinsoft.pfx"; options.DigitalSignature.CertificatePassword = "******"; options.DigitalSignature.SignatureLine = signatureShape; options.DigitalSignature.Signature = signaturePict; dcObj.Save(resultpdf, options); System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(resultpdf) { UseShellExecute = true }); }
/// <summary> /// Get a Picture size, Change it and Save the document back. /// </summary> /// <remarks> /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/get-and-change-picture-size-in-docx-csharp-vb-net.php /// </remarks> public static void GetAndChangePictureSize() { // Path to a document where to extract pictures. string inpFile = @"..\..\example.docx"; string outFile = "Result.docx"; // Load the document. DocumentCore dc = DocumentCore.Load(inpFile); // Get the physical size of the first picture from the document. Picture pict = dc.GetChildElements(true, ElementType.Picture).FirstOrDefault() as Picture; Size size = pict.Layout.Size; Console.WriteLine("The 1st picture has this size:\r\n"); Console.WriteLine("W: {0}, H: {1} (In points)", size.Width, size.Height); Console.WriteLine("W: {0}, H: {1} (In pixels)", LengthUnitConverter.Convert(size.Width, LengthUnit.Point, LengthUnit.Pixel), LengthUnitConverter.Convert(size.Height, LengthUnit.Point, LengthUnit.Pixel)); Console.WriteLine("W: {0:F2}, H: {1:F2} (In mm)", LengthUnitConverter.Convert(size.Width, LengthUnit.Point, LengthUnit.Millimeter), LengthUnitConverter.Convert(size.Height, LengthUnit.Point, LengthUnit.Millimeter)); Console.WriteLine("W: {0:F2}, H: {1:F2} (In cm)", LengthUnitConverter.Convert(size.Width, LengthUnit.Point, LengthUnit.Centimeter), LengthUnitConverter.Convert(size.Height, LengthUnit.Point, LengthUnit.Centimeter)); Console.WriteLine("W: {0:F2}, H: {1:F2} (In inches)", LengthUnitConverter.Convert(size.Width, LengthUnit.Point, LengthUnit.Inch), LengthUnitConverter.Convert(size.Height, LengthUnit.Point, LengthUnit.Inch)); Console.WriteLine("\r\nNow let\'s increase the picture size in x1.5 times. Press any key ..."); Console.ReadKey(); // Note, we don't change the physical picture size we only scale/stretch the it. pict.Layout.Size = new Size(size.Width * 1.5, size.Height * 1.5); // Save the document as a new docx file. dc.Save(outFile); // Open the result for demonstration purposes. System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(outFile) { UseShellExecute = true }); }
public string GetText(string fileName) { var filename = _hostingEnvironment.WebRootPath + fileName; FileInfo pathToDocx = new FileInfo(filename); // Let's parse docx docuemnt and get all text from it. DocumentCore docx = DocumentCore.Load(pathToDocx.FullName); StringBuilder text = new StringBuilder(); foreach (Paragr par in docx.GetChildElements(true, ElementType.Paragraph)) { foreach (Run run in par.GetChildElements(true, ElementType.Run)) { text.Append(run.Text); } text.AppendLine(); } return(text.ToString()); }
/// <summary> /// Loads an existing DOCX document and calculates all 'Run' objects. /// </summary> /// <remarks> /// Details: https://sautinsoft.com/products/document/help/net/developer-guide/run-element-text-in-docx-document-net-csharp-vb.php /// </remarks> static void CalculateRuns() { string filePath = @"..\..\example.docx"; DocumentCore dc = DocumentCore.Load(filePath); string filePathResult = @"Result-file.docx"; foreach (Paragraph par in dc.GetChildElements(true, ElementType.Paragraph)) { int totalRuns = par.GetChildElements(true, ElementType.Run).Count(); Run r = new Run(dc, "<<This paragraph contains " + totalRuns.ToString() + " Run(s)>>", new CharacterFormat() { BackgroundColor = Color.Yellow, Size = 10, FontColor = Color.Black }); par.Content.End.Insert(r.Content); } dc.Save(filePathResult); System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(filePathResult) { UseShellExecute = true }); }